From: <ku...@us...> - 2008-04-22 15:23:48
|
Revision: 810 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=810&view=rev Author: kurzum Date: 2008-04-22 08:23:38 -0700 (Tue, 22 Apr 2008) Log Message: ----------- moved some classes to new package scripts, because, they are neither utitlity classes nor test classes, but scripts. Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/ trunk/src/dl-learner/org/dllearner/scripts/CloseOntology.java trunk/src/dl-learner/org/dllearner/scripts/CrossValidation.java trunk/src/dl-learner/org/dllearner/scripts/NT2RDF.java trunk/src/dl-learner/org/dllearner/scripts/SPARQLPreparation.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/test/SPARQLPreparation.java trunk/src/dl-learner/org/dllearner/utilities/CloseOntology.java trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java trunk/src/dl-learner/org/dllearner/utilities/NT2RDF.java Copied: trunk/src/dl-learner/org/dllearner/scripts/CloseOntology.java (from rev 808, trunk/src/dl-learner/org/dllearner/utilities/CloseOntology.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/CloseOntology.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/CloseOntology.java 2008-04-22 15:23:38 UTC (rev 810) @@ -0,0 +1,66 @@ +package org.dllearner.scripts; + +import java.io.File; +import java.net.URI; +import java.util.HashSet; +import java.util.Set; + +import org.dllearner.core.KnowledgeSource; +import org.dllearner.kb.OWLFile; +import org.dllearner.reasoning.OWLAPIReasoner; +import org.dllearner.utilities.OntologyCloserOWLAPI; + +/** + * Script for closing an ontology OWLAPI produces extensive filesizes, when + * exporting output file ist named like input file, but recieves a + * "_closedConcise" at the end. + * + * Counts all roles of individuals and adds an Intersection (Concise) of + * ExactCardinalityRestriction to the ABox + * + */ +public class CloseOntology { + + /** + * @param argument0 + * simply the path to the owl ontology "examples/test.owl" + */ + public static void main(String[] args) { + String ontopath=""; + //ontopath="examples/carcinogenesis/carcinogenesis.owl"; + // inputURI + //ontopath = args[0]; + File file = new File(ontopath); + URI inputURI = file.toURI(); + + // outputURI + String ending = ontopath.substring(ontopath.lastIndexOf(".") + 1); + ontopath = ontopath.replace("." + ending, "_closedConcise." + ending); + file = new File(ontopath); + URI outputURI = file.toURI(); + + try { + // initializing reasoner + OWLFile owlFile = new OWLFile(); + owlFile.setURL(inputURI.toURL()); + Set<KnowledgeSource> ks = new HashSet<KnowledgeSource>(); + ks.add(owlFile); + OWLAPIReasoner owlapireasoner = new OWLAPIReasoner(ks); + owlapireasoner.init(); + + // close + OntologyCloserOWLAPI oc = new OntologyCloserOWLAPI(owlapireasoner); + oc.testForTransitiveProperties(true); + System.out.println("Attempting to close"); + oc.applyNumberRestrictionsConcise(); + System.out.println("Finished, preparing output"); + + // save + oc.writeOWLFile(outputURI); + + } catch (Exception e) { + e.printStackTrace(); + } + } + +} Copied: trunk/src/dl-learner/org/dllearner/scripts/CrossValidation.java (from rev 808, trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/CrossValidation.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/CrossValidation.java 2008-04-22 15:23:38 UTC (rev 810) @@ -0,0 +1,318 @@ +/** + * Copyright (C) 2007-2008, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.scripts; + +import java.io.File; +import java.text.DecimalFormat; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.cli.Start; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.ComponentManager; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.ReasoningService; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.utilities.Datastructures; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.Stat; + +/** + * Performs cross validation for the given problem. Supports + * k-fold cross-validation and leave-one-out cross-validation. + * + * @author Jens Lehmann + * + */ +public class CrossValidation { + + private static Logger logger = Logger.getRootLogger(); + + public static void main(String[] args) { + File file = new File(args[0]); + + boolean leaveOneOut = false; + int folds = 10; + + // use second argument as number of folds; if not specified + // leave one out cross validation is used + if(args.length > 1) + folds = Integer.parseInt(args[1]); + else + leaveOneOut = true; + + if(folds < 2) { + System.out.println("At least 2 fold needed."); + System.exit(0); + } + + // create logger (a simple logger which outputs + // its messages to the console) + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.setLevel(Level.WARN); + // disable OWL API info output + java.util.logging.Logger.getLogger("").setLevel(java.util.logging.Level.WARNING); + + new CrossValidation(file, folds, leaveOneOut); + + } + + public CrossValidation(File file, int folds, boolean leaveOneOut) { + + DecimalFormat df = new DecimalFormat(); + ComponentManager cm = ComponentManager.getInstance(); + + // the first read of the file is used to detect the examples + // and set up the splits correctly according to our validation + // method + Start start = null; + try { + start = new Start(file); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + LearningProblem lp = start.getLearningProblem(); + ReasoningService rs = start.getReasoningService(); + + // the training and test sets used later on + List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); + List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); + + if(lp instanceof PosNegLP) { + + // get examples and shuffle them to + Set<Individual> posExamples = ((PosNegLP)lp).getPositiveExamples(); + List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); + Collections.shuffle(posExamplesList, new Random(1)); + Set<Individual> negExamples = ((PosNegLP)lp).getNegativeExamples(); + List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); + Collections.shuffle(negExamplesList, new Random(2)); + + // sanity check whether nr. of folds makes sense for this benchmark + if(!leaveOneOut && (posExamples.size()<folds && negExamples.size()<folds)) { + System.out.println("The number of folds is higher than the number of " + + "positive/negative examples. This can result in empty test sets. Exiting."); + System.exit(0); + } + + if(leaveOneOut) { + // note that leave-one-out is not identical to k-fold with + // k = nr. of examples in the current implementation, because + // with n folds and n examples there is no guarantee that a fold + // is never empty (this is an implementation issue) + int nrOfExamples = posExamples.size() + negExamples.size(); + for(int i = 0; i < nrOfExamples; i++) { + // ... + } + System.out.println("Leave-one-out not supported yet."); + System.exit(1); + } else { + // calculating where to split the sets, ; note that we split + // positive and negative examples separately such that the + // distribution of positive and negative examples remains similar + // (note that there better but more complex ways to implement this, + // which guarantee that the sum of the elements of a fold for pos + // and neg differs by at most 1 - it can differ by 2 in our implementation, + // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) + int[] splitsPos = calculateSplits(posExamples.size(),folds); + int[] splitsNeg = calculateSplits(negExamples.size(),folds); + +// System.out.println(splitsPos[0]); +// System.out.println(splitsNeg[0]); + + // calculating training and test sets + for(int i=0; i<folds; i++) { + Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); + Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); + testSetsPos.add(i, testPos); + testSetsNeg.add(i, testNeg); + trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); + trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); + } + + } + + } else if(lp instanceof PosOnlyLP) { + System.out.println("Cross validation for positive only learning not supported yet."); + System.exit(0); + // Set<Individual> posExamples = ((PosOnlyLP)lp).getPositiveExamples(); + // int[] splits = calculateSplits(posExamples.size(),folds); + } else { + System.out.println("Cross validation for learning problem " + lp + " not supported."); + System.exit(0); + } + + // statistical values + Stat runtime = new Stat(); + Stat accuracy = new Stat(); + Stat length = new Stat(); + + // run the algorithm + for(int currFold=0; currFold<folds; currFold++) { + // we always perform a full initialisation to make sure that + // no objects are reused + try { + start = new Start(file); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + lp = start.getLearningProblem(); + Set<String> pos = Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); + Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); + cm.applyConfigEntry(lp, "positiveExamples", pos); + cm.applyConfigEntry(lp, "negativeExamples", neg); +// System.out.println("pos: " + pos.size()); +// System.out.println("neg: " + neg.size()); +// System.exit(0); + + // es fehlt init zwischendurch + + LearningAlgorithm la = start.getLearningAlgorithm(); + // init again, because examples have changed + try { + la.init(); + } catch (ComponentInitException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + long algorithmStartTime = System.nanoTime(); + la.start(); + long algorithmDuration = System.nanoTime() - algorithmStartTime; + runtime.addNumber(algorithmDuration/(double)1000000000); + + Description concept = la.getBestSolution(); + + Set<Individual> tmp = rs.instanceCheck(concept, testSetsPos.get(currFold)); + Set<Individual> tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); + Set<Individual> tmp3 = rs.instanceCheck(concept, testSetsNeg.get(currFold)); + + System.out.println("test set errors pos: " + tmp2); + System.out.println("test set errors neg: " + tmp3); + + // calculate training accuracies + int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, trainingSetsPos.get(currFold)); + int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, trainingSetsNeg.get(currFold)); + int trainingCorrectExamples = trainingCorrectPosClassified + trainingCorrectNegClassified; + double trainingAccuracy = 100*((double)trainingCorrectExamples/(trainingSetsPos.get(currFold).size()+ + trainingSetsNeg.get(currFold).size())); + + // calculate test accuracies + int correctPosClassified = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)); + int correctNegClassified = getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); + int correctExamples = correctPosClassified + correctNegClassified; + double currAccuracy = 100*((double)correctExamples/(testSetsPos.get(currFold).size()+ + testSetsNeg.get(currFold).size())); + accuracy.addNumber(currAccuracy); + + length.addNumber(concept.getLength()); + + System.out.println("fold " + currFold + " (" + file + "):"); + System.out.println(" training: " + pos.size() + " positive and " + neg.size() + " negative examples"); + System.out.println(" testing: " + correctPosClassified + "/" + testSetsPos.get(currFold).size() + " correct positives, " + + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + " correct negatives"); + System.out.println(" concept: " + concept); + System.out.println(" accuracy: " + df.format(currAccuracy) + "% (" + df.format(trainingAccuracy) + "% on training set)"); + System.out.println(" length: " + df.format(concept.getLength())); + System.out.println(" runtime: " + df.format(algorithmDuration/(double)1000000000) + "s"); + + // free all resources + start.getReasoningService().releaseKB(); + cm.freeAllComponents(); + } + + System.out.println(); + System.out.println("Finished " + folds + "-folds cross-validation on " + file + "."); + System.out.println("runtime: " + statOutput(df, runtime, "s")); + System.out.println("length: " + statOutput(df, length, "")); + System.out.println("accuracy: " + statOutput(df, accuracy, "%")); + + } + + private int getCorrectPosClassified(ReasoningService rs, Description concept, Set<Individual> testSetPos) { + return rs.instanceCheck(concept, testSetPos).size(); + } + + private int getCorrectNegClassified(ReasoningService rs, Description concept, Set<Individual> testSetNeg) { + return testSetNeg.size() - rs.instanceCheck(concept, testSetNeg).size(); + } + + private Set<Individual> getTestingSet(List<Individual> examples, int[] splits, int fold) { + int fromIndex; + // we either start from 0 or after the last fold ended + if(fold == 0) + fromIndex = 0; + else + fromIndex = splits[fold-1]; + // the split corresponds to the ends of the folds + int toIndex = splits[fold]; + +// System.out.println("from " + fromIndex + " to " + toIndex); + + Set<Individual> testingSet = new HashSet<Individual>(); + // +1 because 2nd element is exclusive in subList method + testingSet.addAll(examples.subList(fromIndex, toIndex)); + return testingSet; + } + + private Set<Individual> getTrainingSet(Set<Individual> examples, Set<Individual> testingSet) { + return Helper.difference(examples, testingSet); + } + + // takes nr. of examples and the nr. of folds for this examples; + // returns an array which says where each fold ends, i.e. + // splits[i] is the index of the last element of fold i in the examples + private int[] calculateSplits(int nrOfExamples, int folds) { + int[] splits = new int[folds]; + for(int i=1; i<=folds; i++) { + // we always round up to the next integer + splits[i-1] = (int)Math.ceil(i*nrOfExamples/(double)folds); + } + return splits; + } + + private String statOutput(DecimalFormat df, Stat stat, String unit) { + String str = "av. " + df.format(stat.getMean()) + unit; + str += " (deviation " + df.format(stat.getStandardDeviation()) + unit + "; "; + str += "min " + df.format(stat.getMin()) + unit + "; "; + str += "max " + df.format(stat.getMax()) + unit + ")"; + return str; + } + +} Copied: trunk/src/dl-learner/org/dllearner/scripts/NT2RDF.java (from rev 808, trunk/src/dl-learner/org/dllearner/utilities/NT2RDF.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/NT2RDF.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/NT2RDF.java 2008-04-22 15:23:38 UTC (rev 810) @@ -0,0 +1,35 @@ +package org.dllearner.scripts; +import java.io.File; +import java.net.URI; + +import org.semanticweb.owl.apibinding.OWLManager; +import org.semanticweb.owl.io.RDFXMLOntologyFormat; +import org.semanticweb.owl.model.OWLOntology; +import org.semanticweb.owl.model.OWLOntologyManager; +public class NT2RDF { + + + public static void main(String[] args) { + try { + String ontopath=args[0]; + URI inputURI = new File(ontopath).toURI(); + + // outputURI + String ending = ontopath.substring(ontopath.lastIndexOf(".") + 1); + ontopath = ontopath.replace("." + ending, ".rdf" ); + URI outputURI = new File(ontopath).toURI(); + + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + OWLOntology ontology = manager.loadOntologyFromPhysicalURI(inputURI); + manager.saveOntology(ontology, new RDFXMLOntologyFormat(), outputURI); + // Remove the ontology from the manager + manager.removeOntology(ontology.getURI()); + } + catch (Exception e) { + System.out.println("The ontology could not be created: " + e.getMessage()); + } + + } + } + + Copied: trunk/src/dl-learner/org/dllearner/scripts/SPARQLPreparation.java (from rev 809, trunk/src/dl-learner/org/dllearner/test/SPARQLPreparation.java) =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/SPARQLPreparation.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/SPARQLPreparation.java 2008-04-22 15:23:38 UTC (rev 810) @@ -0,0 +1,176 @@ +package org.dllearner.scripts; + +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +import javax.sound.midi.SysexMessage; + +import org.apache.log4j.ConsoleAppender; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.kb.sparql.configuration.SparqlEndpoint; +import org.dllearner.utilities.ConfWriter; +import org.dllearner.utilities.JenaResultSetConvenience; +import org.dllearner.utilities.LearnSparql; +import org.dllearner.utilities.SimpleClock; + +import com.hp.hpl.jena.query.ResultSet; + +public class SPARQLPreparation { + + static Cache c; + static SparqlEndpoint se; + private static Logger logger = Logger.getRootLogger(); + /** + * @param args + */ + public static void main(String[] args) { + init(); + try { + + + + SimpleClock sc=new SimpleClock(); + SortedSet<String> concepts = new TreeSet<String>(); + //concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); + concepts.add("\"http://dbpedia.org/class/yago/FieldMarshal110086821\""); + SortedSet<String> posExamples = new TreeSet<String>(); + SortedSet<String> negExamples = new TreeSet<String>(); + String url = "http://dbpedia.openlinksw.com:8890/sparql"; + //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); + //HashMap<String, String> result2 = new HashMap<String, String>(); + + //System.out.println(concepts.first()); + posExamples = new JenaResultSetConvenience(queryConcept(concepts.first(),0)) + .getStringListForVariable("subject"); + + for (String string : posExamples) { + negExamples.addAll( getObjects(string)); + //if(neg.size()>=1)System.out.println(neg); + } + + /*for (String string2 : negExamples) { + if(posExamples.contains(string2)){ + System.out.println(string2); + negExamples.remove(string2); + }; + }*/ + //System.out.println(negExamples.size()); + negExamples.removeAll(posExamples); + posExamples=shrink(posExamples,5); + negExamples=shrink(negExamples,posExamples.size()); + //System.out.println(posExamples.first())); + //System.out.println(posExamples.size()); + //System.out.println(negExamples.size()); + + // + new ConfWriter().writeSPARQL("aaa.conf", posExamples, negExamples, url, new TreeSet<String>()); + new LearnSparql().learn(posExamples, negExamples, "http://dbpedia.openlinksw.com:8890/sparql", new TreeSet<String>()); + + sc.printAndSet("Finished"); + } catch (Exception e) { + e.printStackTrace(); + + } + + } + + /*************************************************************************** + * *********************OLDCODE String + * conj="(\"http://dbpedia.org/class/yago/Person100007846\" AND + * \"http://dbpedia.org/class/yago/Head110162991\")"; + * + * + * concepts.add("EXISTS \"http://dbpedia.org/property/disambiguates\".TOP"); + * concepts.add("EXISTS + * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add("EXISTS \"http://dbpedia.org/property/successor\"."+conj); + * //concepts.add("ALL \"http://dbpedia.org/property/disambiguates\".TOP"); + * //concepts.add("ALL + * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); + * concepts.add(conj); + * concepts.add("(\"http://dbpedia.org/class/yago/Person100007846\" OR + * \"http://dbpedia.org/class/yago/Head110162991\")"); + * + * //concepts.add("NOT \"http://dbpedia.org/class/yago/Person100007846\""); + * + * for (String kbsyntax : concepts) { + * result.put(kbsyntax,queryConcept(kbsyntax)); } + * System.out.println("************************"); for (String string : + * result.keySet()) { System.out.println("KBSyntayString: "+string); + * System.out.println("Query:\n"+result.get(string).hasNext()); + * System.out.println("************************"); } + **************************************************************************/ + + static SortedSet<String> getObjects(String subject) { + // SortedSet<String> result = new TreeSet<String>(); + + String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" + + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" + + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + + "}"; + //System.out.println(query); + String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); + //System.out.println(JSON); + ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); + JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); + return rsc.getStringListForVariable("o"); + } + + public static ResultSet queryConcept(String concept,int limit) { + ResultSet rs = null; + try { + String query = SparqlQueryDescriptionConvertVisitor + .getSparqlQuery(concept,limit); + + SparqlQuery sq = new SparqlQuery(query, se); + String JSON = c.executeSparqlQuery(sq); + //System.out.println(JSON); + rs = SparqlQuery.JSONtoResultSet(JSON); + + } catch (Exception e) { + e.printStackTrace(); + } + + return rs; + } + + public static void init() { + SparqlQueryDescriptionConvertVisitor.debug_flag = false; + c = new Cache("cache"); + se = SparqlEndpoint.dbpediaEndpoint(); + // create logger (a simple logger which outputs + // its messages to the console) + SimpleLayout layout = new SimpleLayout(); + ConsoleAppender consoleAppender = new ConsoleAppender(layout); + logger.removeAllAppenders(); + logger.addAppender(consoleAppender); + logger.setLevel(Level.DEBUG); + + + } + + public static SortedSet<String> shrink(SortedSet<String> s, int limit) { + SortedSet<String> ret = new TreeSet<String>(); + Random r = new Random(); + double treshold = ((double)limit)/s.size(); + //System.out.println("treshold"+howmany); + //System.out.println("treshold"+allRetrieved.size()); + System.out.println("treshold"+treshold); + + for (String oneInd : s) { + if(r.nextDouble()<treshold) { + ret.add(oneInd); + + } + } + return ret; + } + +} Deleted: trunk/src/dl-learner/org/dllearner/test/SPARQLPreparation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/SPARQLPreparation.java 2008-04-22 15:19:07 UTC (rev 809) +++ trunk/src/dl-learner/org/dllearner/test/SPARQLPreparation.java 2008-04-22 15:23:38 UTC (rev 810) @@ -1,176 +0,0 @@ -package org.dllearner.test; - -import java.util.Random; -import java.util.SortedSet; -import java.util.TreeSet; - -import javax.sound.midi.SysexMessage; - -import org.apache.log4j.ConsoleAppender; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.SimpleLayout; -import org.dllearner.kb.sparql.Cache; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; -import org.dllearner.kb.sparql.configuration.SparqlEndpoint; -import org.dllearner.utilities.ConfWriter; -import org.dllearner.utilities.JenaResultSetConvenience; -import org.dllearner.utilities.LearnSparql; -import org.dllearner.utilities.SimpleClock; - -import com.hp.hpl.jena.query.ResultSet; - -public class SPARQLPreparation { - - static Cache c; - static SparqlEndpoint se; - private static Logger logger = Logger.getRootLogger(); - /** - * @param args - */ - public static void main(String[] args) { - init(); - try { - - - - SimpleClock sc=new SimpleClock(); - SortedSet<String> concepts = new TreeSet<String>(); - //concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); - concepts.add("\"http://dbpedia.org/class/yago/FieldMarshal110086821\""); - SortedSet<String> posExamples = new TreeSet<String>(); - SortedSet<String> negExamples = new TreeSet<String>(); - String url = "http://dbpedia.openlinksw.com:8890/sparql"; - //HashMap<String, ResultSet> result = new HashMap<String, ResultSet>(); - //HashMap<String, String> result2 = new HashMap<String, String>(); - - //System.out.println(concepts.first()); - posExamples = new JenaResultSetConvenience(queryConcept(concepts.first(),0)) - .getStringListForVariable("subject"); - - for (String string : posExamples) { - negExamples.addAll( getObjects(string)); - //if(neg.size()>=1)System.out.println(neg); - } - - /*for (String string2 : negExamples) { - if(posExamples.contains(string2)){ - System.out.println(string2); - negExamples.remove(string2); - }; - }*/ - //System.out.println(negExamples.size()); - negExamples.removeAll(posExamples); - posExamples=shrink(posExamples,5); - negExamples=shrink(negExamples,posExamples.size()); - //System.out.println(posExamples.first())); - //System.out.println(posExamples.size()); - //System.out.println(negExamples.size()); - - // - new ConfWriter().writeSPARQL("aaa.conf", posExamples, negExamples, url, new TreeSet<String>()); - new LearnSparql().learn(posExamples, negExamples, "http://dbpedia.openlinksw.com:8890/sparql", new TreeSet<String>()); - - sc.printAndSet("Finished"); - } catch (Exception e) { - e.printStackTrace(); - - } - - } - - /*************************************************************************** - * *********************OLDCODE String - * conj="(\"http://dbpedia.org/class/yago/Person100007846\" AND - * \"http://dbpedia.org/class/yago/Head110162991\")"; - * - * - * concepts.add("EXISTS \"http://dbpedia.org/property/disambiguates\".TOP"); - * concepts.add("EXISTS - * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add("EXISTS \"http://dbpedia.org/property/successor\"."+conj); - * //concepts.add("ALL \"http://dbpedia.org/property/disambiguates\".TOP"); - * //concepts.add("ALL - * \"http://dbpedia.org/property/successor\".\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add("\"http://dbpedia.org/class/yago/Person100007846\""); - * concepts.add(conj); - * concepts.add("(\"http://dbpedia.org/class/yago/Person100007846\" OR - * \"http://dbpedia.org/class/yago/Head110162991\")"); - * - * //concepts.add("NOT \"http://dbpedia.org/class/yago/Person100007846\""); - * - * for (String kbsyntax : concepts) { - * result.put(kbsyntax,queryConcept(kbsyntax)); } - * System.out.println("************************"); for (String string : - * result.keySet()) { System.out.println("KBSyntayString: "+string); - * System.out.println("Query:\n"+result.get(string).hasNext()); - * System.out.println("************************"); } - **************************************************************************/ - - static SortedSet<String> getObjects(String subject) { - // SortedSet<String> result = new TreeSet<String>(); - - String query = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n" - + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n" - + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" - + "}"; - //System.out.println(query); - String JSON = (c.executeSparqlQuery(new SparqlQuery(query, se))); - //System.out.println(JSON); - ResultSet rs =SparqlQuery.JSONtoResultSet(JSON); - JenaResultSetConvenience rsc = new JenaResultSetConvenience(rs); - return rsc.getStringListForVariable("o"); - } - - public static ResultSet queryConcept(String concept,int limit) { - ResultSet rs = null; - try { - String query = SparqlQueryDescriptionConvertVisitor - .getSparqlQuery(concept,limit); - - SparqlQuery sq = new SparqlQuery(query, se); - String JSON = c.executeSparqlQuery(sq); - //System.out.println(JSON); - rs = SparqlQuery.JSONtoResultSet(JSON); - - } catch (Exception e) { - e.printStackTrace(); - } - - return rs; - } - - public static void init() { - SparqlQueryDescriptionConvertVisitor.debug_flag = false; - c = new Cache("cache"); - se = SparqlEndpoint.dbpediaEndpoint(); - // create logger (a simple logger which outputs - // its messages to the console) - SimpleLayout layout = new SimpleLayout(); - ConsoleAppender consoleAppender = new ConsoleAppender(layout); - logger.removeAllAppenders(); - logger.addAppender(consoleAppender); - logger.setLevel(Level.DEBUG); - - - } - - public static SortedSet<String> shrink(SortedSet<String> s, int limit) { - SortedSet<String> ret = new TreeSet<String>(); - Random r = new Random(); - double treshold = ((double)limit)/s.size(); - //System.out.println("treshold"+howmany); - //System.out.println("treshold"+allRetrieved.size()); - System.out.println("treshold"+treshold); - - for (String oneInd : s) { - if(r.nextDouble()<treshold) { - ret.add(oneInd); - - } - } - return ret; - } - -} Deleted: trunk/src/dl-learner/org/dllearner/utilities/CloseOntology.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/CloseOntology.java 2008-04-22 15:19:07 UTC (rev 809) +++ trunk/src/dl-learner/org/dllearner/utilities/CloseOntology.java 2008-04-22 15:23:38 UTC (rev 810) @@ -1,65 +0,0 @@ -package org.dllearner.utilities; - -import java.io.File; -import java.net.URI; -import java.util.HashSet; -import java.util.Set; - -import org.dllearner.core.KnowledgeSource; -import org.dllearner.kb.OWLFile; -import org.dllearner.reasoning.OWLAPIReasoner; - -/** - * Script for closing an ontology OWLAPI produces extensive filesizes, when - * exporting output file ist named like input file, but recieves a - * "_closedConcise" at the end. - * - * Counts all roles of individuals and adds an Intersection (Concise) of - * ExactCardinalityRestriction to the ABox - * - */ -public class CloseOntology { - - /** - * @param argument0 - * simply the path to the owl ontology "examples/test.owl" - */ - public static void main(String[] args) { - String ontopath=""; - //ontopath="examples/carcinogenesis/carcinogenesis.owl"; - // inputURI - //ontopath = args[0]; - File file = new File(ontopath); - URI inputURI = file.toURI(); - - // outputURI - String ending = ontopath.substring(ontopath.lastIndexOf(".") + 1); - ontopath = ontopath.replace("." + ending, "_closedConcise." + ending); - file = new File(ontopath); - URI outputURI = file.toURI(); - - try { - // initializing reasoner - OWLFile owlFile = new OWLFile(); - owlFile.setURL(inputURI.toURL()); - Set<KnowledgeSource> ks = new HashSet<KnowledgeSource>(); - ks.add(owlFile); - OWLAPIReasoner owlapireasoner = new OWLAPIReasoner(ks); - owlapireasoner.init(); - - // close - OntologyCloserOWLAPI oc = new OntologyCloserOWLAPI(owlapireasoner); - oc.testForTransitiveProperties(true); - System.out.println("Attempting to close"); - oc.applyNumberRestrictionsConcise(); - System.out.println("Finished, preparing output"); - - // save - oc.writeOWLFile(outputURI); - - } catch (Exception e) { - e.printStackTrace(); - } - } - -} Deleted: trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java 2008-04-22 15:19:07 UTC (rev 809) +++ trunk/src/dl-learner/org/dllearner/utilities/CrossValidation.java 2008-04-22 15:23:38 UTC (rev 810) @@ -1,315 +0,0 @@ -/** - * Copyright (C) 2007-2008, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.utilities; - -import java.io.File; -import java.text.DecimalFormat; -import java.util.Collections; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import java.util.Set; - -import org.apache.log4j.ConsoleAppender; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.SimpleLayout; -import org.dllearner.cli.Start; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.ComponentManager; -import org.dllearner.core.LearningAlgorithm; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.ReasoningService; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.Individual; -import org.dllearner.learningproblems.PosNegLP; -import org.dllearner.learningproblems.PosOnlyLP; - -/** - * Performs cross validation for the given problem. Supports - * k-fold cross-validation and leave-one-out cross-validation. - * - * @author Jens Lehmann - * - */ -public class CrossValidation { - - private static Logger logger = Logger.getRootLogger(); - - public static void main(String[] args) { - File file = new File(args[0]); - - boolean leaveOneOut = false; - int folds = 10; - - // use second argument as number of folds; if not specified - // leave one out cross validation is used - if(args.length > 1) - folds = Integer.parseInt(args[1]); - else - leaveOneOut = true; - - if(folds < 2) { - System.out.println("At least 2 fold needed."); - System.exit(0); - } - - // create logger (a simple logger which outputs - // its messages to the console) - SimpleLayout layout = new SimpleLayout(); - ConsoleAppender consoleAppender = new ConsoleAppender(layout); - logger.removeAllAppenders(); - logger.addAppender(consoleAppender); - logger.setLevel(Level.WARN); - // disable OWL API info output - java.util.logging.Logger.getLogger("").setLevel(java.util.logging.Level.WARNING); - - new CrossValidation(file, folds, leaveOneOut); - - } - - public CrossValidation(File file, int folds, boolean leaveOneOut) { - - DecimalFormat df = new DecimalFormat(); - ComponentManager cm = ComponentManager.getInstance(); - - // the first read of the file is used to detect the examples - // and set up the splits correctly according to our validation - // method - Start start = null; - try { - start = new Start(file); - } catch (ComponentInitException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - LearningProblem lp = start.getLearningProblem(); - ReasoningService rs = start.getReasoningService(); - - // the training and test sets used later on - List<Set<Individual>> trainingSetsPos = new LinkedList<Set<Individual>>(); - List<Set<Individual>> trainingSetsNeg = new LinkedList<Set<Individual>>(); - List<Set<Individual>> testSetsPos = new LinkedList<Set<Individual>>(); - List<Set<Individual>> testSetsNeg = new LinkedList<Set<Individual>>(); - - if(lp instanceof PosNegLP) { - - // get examples and shuffle them to - Set<Individual> posExamples = ((PosNegLP)lp).getPositiveExamples(); - List<Individual> posExamplesList = new LinkedList<Individual>(posExamples); - Collections.shuffle(posExamplesList, new Random(1)); - Set<Individual> negExamples = ((PosNegLP)lp).getNegativeExamples(); - List<Individual> negExamplesList = new LinkedList<Individual>(negExamples); - Collections.shuffle(negExamplesList, new Random(2)); - - // sanity check whether nr. of folds makes sense for this benchmark - if(!leaveOneOut && (posExamples.size()<folds && negExamples.size()<folds)) { - System.out.println("The number of folds is higher than the number of " - + "positive/negative examples. This can result in empty test sets. Exiting."); - System.exit(0); - } - - if(leaveOneOut) { - // note that leave-one-out is not identical to k-fold with - // k = nr. of examples in the current implementation, because - // with n folds and n examples there is no guarantee that a fold - // is never empty (this is an implementation issue) - int nrOfExamples = posExamples.size() + negExamples.size(); - for(int i = 0; i < nrOfExamples; i++) { - // ... - } - System.out.println("Leave-one-out not supported yet."); - System.exit(1); - } else { - // calculating where to split the sets, ; note that we split - // positive and negative examples separately such that the - // distribution of positive and negative examples remains similar - // (note that there better but more complex ways to implement this, - // which guarantee that the sum of the elements of a fold for pos - // and neg differs by at most 1 - it can differ by 2 in our implementation, - // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) - int[] splitsPos = calculateSplits(posExamples.size(),folds); - int[] splitsNeg = calculateSplits(negExamples.size(),folds); - -// System.out.println(splitsPos[0]); -// System.out.println(splitsNeg[0]); - - // calculating training and test sets - for(int i=0; i<folds; i++) { - Set<Individual> testPos = getTestingSet(posExamplesList, splitsPos, i); - Set<Individual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); - testSetsPos.add(i, testPos); - testSetsNeg.add(i, testNeg); - trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); - trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); - } - - } - - } else if(lp instanceof PosOnlyLP) { - System.out.println("Cross validation for positive only learning not supported yet."); - System.exit(0); - // Set<Individual> posExamples = ((PosOnlyLP)lp).getPositiveExamples(); - // int[] splits = calculateSplits(posExamples.size(),folds); - } else { - System.out.println("Cross validation for learning problem " + lp + " not supported."); - System.exit(0); - } - - // statistical values - Stat runtime = new Stat(); - Stat accuracy = new Stat(); - Stat length = new Stat(); - - // run the algorithm - for(int currFold=0; currFold<folds; currFold++) { - // we always perform a full initialisation to make sure that - // no objects are reused - try { - start = new Start(file); - } catch (ComponentInitException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - lp = start.getLearningProblem(); - Set<String> pos = Datastructures.individualSetToStringSet(trainingSetsPos.get(currFold)); - Set<String> neg = Datastructures.individualSetToStringSet(trainingSetsNeg.get(currFold)); - cm.applyConfigEntry(lp, "positiveExamples", pos); - cm.applyConfigEntry(lp, "negativeExamples", neg); -// System.out.println("pos: " + pos.size()); -// System.out.println("neg: " + neg.size()); -// System.exit(0); - - // es fehlt init zwischendurch - - LearningAlgorithm la = start.getLearningAlgorithm(); - // init again, because examples have changed - try { - la.init(); - } catch (ComponentInitException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - long algorithmStartTime = System.nanoTime(); - la.start(); - long algorithmDuration = System.nanoTime() - algorithmStartTime; - runtime.addNumber(algorithmDuration/(double)1000000000); - - Description concept = la.getBestSolution(); - - Set<Individual> tmp = rs.instanceCheck(concept, testSetsPos.get(currFold)); - Set<Individual> tmp2 = Helper.difference(testSetsPos.get(currFold), tmp); - Set<Individual> tmp3 = rs.instanceCheck(concept, testSetsNeg.get(currFold)); - - System.out.println("test set errors pos: " + tmp2); - System.out.println("test set errors neg: " + tmp3); - - // calculate training accuracies - int trainingCorrectPosClassified = getCorrectPosClassified(rs, concept, trainingSetsPos.get(currFold)); - int trainingCorrectNegClassified = getCorrectNegClassified(rs, concept, trainingSetsNeg.get(currFold)); - int trainingCorrectExamples = trainingCorrectPosClassified + trainingCorrectNegClassified; - double trainingAccuracy = 100*((double)trainingCorrectExamples/(trainingSetsPos.get(currFold).size()+ - trainingSetsNeg.get(currFold).size())); - - // calculate test accuracies - int correctPosClassified = getCorrectPosClassified(rs, concept, testSetsPos.get(currFold)); - int correctNegClassified = getCorrectNegClassified(rs, concept, testSetsNeg.get(currFold)); - int correctExamples = correctPosClassified + correctNegClassified; - double currAccuracy = 100*((double)correctExamples/(testSetsPos.get(currFold).size()+ - testSetsNeg.get(currFold).size())); - accuracy.addNumber(currAccuracy); - - length.addNumber(concept.getLength()); - - System.out.println("fold " + currFold + " (" + file + "):"); - System.out.println(" training: " + pos.size() + " positive and " + neg.size() + " negative examples"); - System.out.println(" testing: " + correctPosClassified + "/" + testSetsPos.get(currFold).size() + " correct positives, " - + correctNegClassified + "/" + testSetsNeg.get(currFold).size() + " correct negatives"); - System.out.println(" concept: " + concept); - System.out.println(" accuracy: " + df.format(currAccuracy) + "% (" + df.format(trainingAccuracy) + "% on training set)"); - System.out.println(" length: " + df.format(concept.getLength())); - System.out.println(" runtime: " + df.format(algorithmDuration/(double)1000000000) + "s"); - - // free all resources - start.getReasoningService().releaseKB(); - cm.freeAllComponents(); - } - - System.out.println(); - System.out.println("Finished " + folds + "-folds cross-validation on " + file + "."); - System.out.println("runtime: " + statOutput(df, runtime, "s")); - System.out.println("length: " + statOutput(df, length, "")); - System.out.println("accuracy: " + statOutput(df, accuracy, "%")); - - } - - private int getCorrectPosClassified(ReasoningService rs, Description concept, Set<Individual> testSetPos) { - return rs.instanceCheck(concept, testSetPos).size(); - } - - private int getCorrectNegClassified(ReasoningService rs, Description concept, Set<Individual> testSetNeg) { - return testSetNeg.size() - rs.instanceCheck(concept, testSetNeg).size(); - } - - private Set<Individual> getTestingSet(List<Individual> examples, int[] splits, int fold) { - int fromIndex; - // we either start from 0 or after the last fold ended - if(fold == 0) - fromIndex = 0; - else - fromIndex = splits[fold-1]; - // the split corresponds to the ends of the folds - int toIndex = splits[fold]; - -// System.out.println("from " + fromIndex + " to " + toIndex); - - Set<Individual> testingSet = new HashSet<Individual>(); - // +1 because 2nd element is exclusive in subList method - testingSet.addAll(examples.subList(fromIndex, toIndex)); - return testingSet; - } - - private Set<Individual> getTrainingSet(Set<Individual> examples, Set<Individual> testingSet) { - return Helper.difference(examples, testingSet); - } - - // takes nr. of examples and the nr. of folds for this examples; - // returns an array which says where each fold ends, i.e. - // splits[i] is the index of the last element of fold i in the examples - private int[] calculateSplits(int nrOfExamples, int folds) { - int[] splits = new int[folds]; - for(int i=1; i<=folds; i++) { - // we always round up to the next integer - splits[i-1] = (int)Math.ceil(i*nrOfExamples/(double)folds); - } - return splits; - } - - private String statOutput(DecimalFormat df, Stat stat, String unit) { - String str = "av. " + df.format(stat.getMean()) + unit; - str += " (deviation " + df.format(stat.getStandardDeviation()) + unit + "; "; - str += "min " + df.format(stat.getMin()) + unit + "; "; - str += "max " + df.format(stat.getMax()) + unit + ")"; - return str; - } - -} Deleted: trunk/src/dl-learner/org/dllearner/utilities/NT2RDF.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/NT2RDF.java 2008-04-22 15:19:07 UTC (rev 809) +++ trunk/src/dl-learner/org/dllearner/utilities/NT2RDF.java 2008-04-22 15:23:38 UTC (rev 810) @@ -1,35 +0,0 @@ -package org.dllearner.utilities; -import java.io.File; -import java.net.URI; - -import org.semanticweb.owl.apibinding.OWLManager; -import org.semanticweb.owl.io.RDFXMLOntologyFormat; -import org.semanticweb.owl.model.OWLOntology; -import org.semanticweb.owl.model.OWLOntologyManager; -public class NT2RDF { - - - public static void main(String[] args) { - try { - String ontopath=args[0]; - URI inputURI = new File(ontopath).toURI(); - - // outputURI - String ending = ontopath.substring(ontopath.lastIndexOf(".") + 1); - ontopath = ontopath.replace("." + ending, ".rdf" ); - URI outputURI = new File(ontopath).toURI(); - - OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); - OWLOntology ontology = manager.loadOntologyFromPhysicalURI(inputURI); - manager.saveOntology(ontology, new RDFXMLOntologyFormat(), outputURI); - // Remove the ontology from the manager - manager.removeOntology(ontology.getURI()); - } - catch (Exception e) { - System.out.println("The ontology could not be created: " + e.getMessage()); - } - - } - } - - This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |