From: <ku...@us...> - 2010-02-13 19:33:57
|
Revision: 2034 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2034&view=rev Author: kurzum Date: 2010-02-13 19:33:50 +0000 (Sat, 13 Feb 2010) Log Message: ----------- finished script for iterative learning Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/examples/KRKOntologyTBox.java trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java Modified: trunk/src/dl-learner/org/dllearner/examples/KRKOntologyTBox.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/KRKOntologyTBox.java 2010-02-13 17:45:31 UTC (rev 2033) +++ trunk/src/dl-learner/org/dllearner/examples/KRKOntologyTBox.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -70,11 +70,13 @@ public void addConcept(String concept){ try{ //make Description - Description d = KBParser.parseConcept(concept,ontologyURI.toString()+"#"); - //d.addChild(oecr); + KBParser.internalNamespace = ontologyURI.toString()+"#"; + Description d = KBParser.parseConcept(concept); kb.addTBoxAxiom(new EquivalentClassesAxiom(getAtomicConcept("test"),d)); - }catch (Exception e) {e.printStackTrace();} + }catch (Exception e) { + e.printStackTrace(); + } } public void initOntologyTBox(){ Modified: trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java =================================================================== --- trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2010-02-13 17:45:31 UTC (rev 2033) +++ trunk/src/dl-learner/org/dllearner/kb/sparql/SparqlQueryDescriptionConvertVisitor.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -30,9 +30,12 @@ import org.apache.log4j.Logger; import org.dllearner.algorithms.gp.ADC; import org.dllearner.core.ComponentManager; +import org.dllearner.core.owl.Constant; +import org.dllearner.core.owl.Datatype; import org.dllearner.core.owl.DatatypeExactCardinalityRestriction; import org.dllearner.core.owl.DatatypeMaxCardinalityRestriction; import org.dllearner.core.owl.DatatypeMinCardinalityRestriction; +import org.dllearner.core.owl.DatatypeProperty; import org.dllearner.core.owl.DatatypeSomeRestriction; import org.dllearner.core.owl.DatatypeValueRestriction; import org.dllearner.core.owl.Description; @@ -49,7 +52,9 @@ import org.dllearner.core.owl.ObjectProperty; import org.dllearner.core.owl.ObjectSomeRestriction; import org.dllearner.core.owl.ObjectValueRestriction; +import org.dllearner.core.owl.StringValueRestriction; import org.dllearner.core.owl.Thing; +import org.dllearner.core.owl.TypedConstant; import org.dllearner.core.owl.Union; import org.dllearner.parser.KBParser; import org.dllearner.parser.ParseException; @@ -229,15 +234,43 @@ s.clear(); // s.add("(\"http://nlp2rdf.org/ontology/Sentence\" AND (EXISTS \"http://nlp2rdf.org/ontology/syntaxTreeHasPart\".\"http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#Pronoun\" AND EXISTS \"http://nlp2rdf.org/ontology/syntaxTreeHasPart\".\"http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag\"))"); - s.add("(\"http://nlp2rdf.org/ontology/Sentence\" AND (\"http://nlp2rdf.org/ontology/hasLemma\" VALUE \"test\" )"); +// s.add("(\"http://nlp2rdf.org/ontology/Sentence\" AND (\"http://nlp2rdf.org/ontology/hasLemma\" VALUE \"test\" )"); + String prefix = "http://nlp2rdf.org/ontology/"; + String test = "(\"Sentence\" AND (EXISTS \"syntaxTreeHasPart\".\"VVPP\" AND EXISTS \"syntaxTreeHasPart\".(\"stts:AuxilliaryVerb\" AND \"hasLemma\" = werden)))"; + + ObjectProperty stp = new ObjectProperty(prefix+"syntaxTreeHasPart"); + DatatypeProperty dtp = new DatatypeProperty(prefix+"hasLemma"); + StringValueRestriction svr = new StringValueRestriction(dtp,"werden" ); + Intersection inner = new Intersection(new NamedClass(prefix+"Auxillary"), svr); + Intersection middle = new Intersection( + new ObjectSomeRestriction(stp, new NamedClass(prefix+"VVPP")), + new ObjectSomeRestriction(stp, inner)); + Intersection outer = new Intersection( + new NamedClass(prefix+"Sentence"), + middle + ); + + System.out.println(outer.toKBSyntaxString(null,null)); + System.out.println(test); + +// s.add(outer.toKBSyntaxString(null,null)); + SparqlQueryDescriptionConvertVisitor testVisitor = new SparqlQueryDescriptionConvertVisitor(); + String q = testVisitor.getSparqlQuery(outer.toKBSyntaxString()); + System.out.println(q); + if (true) { + System.exit(0); + } + // <http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag> String query = ""; SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); - visit.setLabels(true); - visit.setDistinct(true); - visit.setClassToSubclassesVirtuoso(subclassMap); + visit.setLabels(false); + visit.setDistinct(false); +// visit.setClassToSubclassesVirtuoso(subclassMap); + + for (String kbsyntax : s) { query = visit.getSparqlQuery(kbsyntax); result.put(kbsyntax, query); Modified: trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java 2010-02-13 17:45:31 UTC (rev 2033) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/ExperimentConfig.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -8,11 +8,12 @@ public int resultLimit = -1; public int splits = 5; - public int initialsplits = 10; - public int iteration = 5; + public int initialsplits = 30; + public int iteration = 1; public boolean useStartClass = true; - public int noise = 5; + public boolean searchTree = false; + public int noise = 0; //sets ValueFrequency treshold and maxExecution time public boolean adaptive = true; public int maxExecutionTime = 40; Added: trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/GlobalTest.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -0,0 +1,407 @@ +package org.dllearner.scripts.tiger; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.dllearner.algorithms.refinement2.ROLComponent2; +import org.dllearner.algorithms.refinement2.ROLearner2; +import org.dllearner.core.ComponentPool; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningAlgorithm; +import org.dllearner.core.LearningProblem; +import org.dllearner.core.ReasonerComponent; +import org.dllearner.core.configurators.ComponentFactory; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.kb.OWLFile; +import org.dllearner.kb.sparql.Cache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.kb.sparql.SparqlQueryDescriptionConvertVisitor; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Helper; +import org.dllearner.utilities.JamonMonitorLogger; +import org.dllearner.utilities.examples.ExMakerFixedSize; +import org.dllearner.utilities.examples.ExampleDataCollector; +import org.dllearner.utilities.examples.Examples; + +import com.jamonapi.Monitor; + +public class GlobalTest { + private static final Logger logger = Logger.getLogger(GlobalTest.class); + + static DecimalFormat df = new DecimalFormat("00.###%"); + + static String backgroundXML = "files/tiger.noSchema.noImports.rdf"; + static String propertiesXML = "files/propertiesOnly.rdf"; + static String sentenceXMLFolder = "files/tiger/"; + static String sentenceprefix = "http://nlp2rdf.org/ontology/s"; + static String prefix = "http://nlp2rdf.org/ontology/"; + + static String active = "files/active_all_sentenceNumbers.txt"; + static String passiveNoZU = "files/passive_noZuInf_sentenceNumbers.txt"; + static String passiveWithZu = "files/passive_zuInf_sentenceNumbers.txt"; + static String test_has_pos = "files/test_has_pos.txt"; + static String test_has_neg = "files/test_has_neg.txt"; + + static SparqlEndpoint sparqlEndpoint; + static SPARQLTasks sparqlTasks; + + static String sparqlEndpointURL = "http://db0.aksw.org:8893/sparql"; + static String graph = "http://nlp2rdf.org/tiger"; + static String rulegraph = "http://nlp2rdf.org/schema/rules1"; + + + + + + final static boolean debug = false; + //no randomization in examples + final static boolean randomizedebug = !debug; + + public static void main(String[] args) { + LogHelper.initLoggers(); + Logger.getLogger(Cache.class).setLevel(Level.INFO); + Logger.getLogger(ComponentPool.class).setLevel(Level.INFO); + Logger.getLogger(ROLearner2.class).setLevel(Level.TRACE); + Logger.getLogger(RhoDRDown.class).setLevel(Level.TRACE); + Logger.getLogger(SparqlQuery.class).setLevel(Level.INFO); + + try { + sparqlEndpoint = new SparqlEndpoint(new URL(sparqlEndpointURL), new ArrayList<String>(Arrays + .asList(new String[] { graph })), new ArrayList<String>()); + sparqlTasks = new SPARQLTasks(Cache.getDefaultCache(), sparqlEndpoint); + } catch (Exception e) { + e.printStackTrace(); + } + + Examples allExamples = new Examples(); + SortedSet<String> positives; + SortedSet<String> negatives; + +// positives = read(passiveWithZu); + positives = read(passiveNoZU); + negatives = read(active); + + //removing overlap + positives.removeAll(negatives); + negatives.removeAll(positives); + +// System.out.println(Helper.intersection(passiveZuInfSentences, activeSentences)); +// System.out.println(Helper.intersection(passiveZuInfSentences, passiveNoZuSentences)); +// System.out.println(Helper.intersection(activeSentences, passiveNoZuSentences)); + allExamples.addPosTrain(positives); + allExamples.addNegTrain(negatives); + + logger.debug("All examples \n"+allExamples); + + ExperimentConfig config = new ExperimentConfig(); + firstContact( allExamples, config); + JamonMonitorLogger.writeHTMLReport("log/tiger.html"); + //retrieved wird neues Example, als schnittmenge mit all + //und den bisher gewaehlten + //dann splits auswählen und + //pos und neg wieder hinzufuegen + + } + + public static void firstContact(Examples allExamples, ExperimentConfig config){ + ExMakerFixedSize fs = new ExMakerFixedSize(allExamples, randomizedebug); + Examples learn = fs.select(config.initialsplits, config.initialsplits); + logger.debug("Intial training set \n"+learn); +// System.out.println(learn.getPosTrain()); +// System.out.println(learn.getNegTrain()); +// if (true) { +// System.exit(0); +// } +// int size = 0; + for(int i = 0 ; config.stopCondition(i, learn) ;i++ ) { + /*LEARNING*/ + EvaluatedDescription ed = learn(learn, config); + + /*RETRIEVING*/ + SortedSet<String> retrieved = getSentences(ed, config.resultLimit); + logger.debug("Retrieved "+retrieved.size()+" sentences"); + + + /*MASHING*/ + //Menge aller positiven geschn. mit den gefundenen + SortedSet<String> posAsPos = Helper.intersection(retrieved, allExamples.getPosTrain()); + logger.debug("Number of retrieved positives: "+posAsPos.size()); + logger.debug("Number of total positives: "+allExamples.getPosTrain().size()); + results(posAsPos, retrieved, allExamples); + + //Menge aller positiven geschn. mit den gefundenen + SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTrain()); + logger.debug("Number of retrieved negatives: "+negAsPos.size()); + logger.debug("Total: "+posAsPos.size()+" + "+negAsPos.size() +" = "+retrieved.size()); + +// if(retrieved.size()!=(posAsPos.size()+negAsPos.size())){ +// logger.warn("sets are wrong"); +// System.exit(0); +// } + + Examples newlyFound = new Examples(); + newlyFound.addPosTrain(Helper.intersection(retrieved, learn.getPosTest())); + newlyFound.addNegTrain(Helper.intersection(retrieved, learn.getNegTest())); + //validate here + + fs = new ExMakerFixedSize(newlyFound, randomizedebug); + newlyFound = fs.select(config.splits, config.splits); + + learn.addPosTrain(newlyFound.getPosTrain()); + learn.addNegTrain(newlyFound.getNegTrain()); + logger.debug("Next training set \n"+learn); +// size = learn.getPosTrain().size() + learn.getNegTrain().size(); + + } + + + + + + } + + private static void results(SortedSet<String> posAsPos, SortedSet<String> retrieved, Examples allExamples) { + double precision = precision( posAsPos.size(), retrieved.size()); + double recall = recall( posAsPos.size(),allExamples.getPosTrain().size()); + logger.info("F-Measure: "+df.format( (2*precision*recall)/(precision+recall)) ); + + } + + public static double precision( int posAsPos, int retrieved){ + double precision = ((double)posAsPos)/((double)retrieved); + logger.info("Precision: "+df.format(precision)); + return precision; + } + public static double recall( int posAsPos, int allPositives){ + double recall = ((double)posAsPos)/((double)allPositives); + + logger.info("Recall: "+df.format(recall)); + return recall; + + } + + private static Set<KnowledgeSource> _getOWL(Examples ex) throws Exception{ + Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); + List<URL> urls = new ArrayList<URL>(); + urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getPosTrain())); + urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getNegTrain())); + urls.add(new File(backgroundXML).toURI().toURL()); + + for (URL u : urls) { + OWLFile ks = ComponentFactory.getOWLFile(u); + tmp.add(ks); + } + return tmp; + } + @SuppressWarnings("unused") + private static Set<KnowledgeSource> _getSPARQL(Examples ex) throws Exception{ + Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); + + Set<String> examples = new TreeSet<String>(); + examples.addAll(ex.getPosTrain()); + examples.addAll(ex.getNegTrain()); + SparqlKnowledgeSource ks = ComponentFactory.getSparqlKnowledgeSource(new URL(sparqlEndpointURL), examples); + ks.getConfigurator().setUrl(new URL(sparqlEndpointURL)); + ks.getConfigurator().setDefaultGraphURIs(new HashSet<String>(Arrays.asList(new String[]{graph}))); + ks.getConfigurator().setInstances(examples); + ks.getConfigurator().setDissolveBlankNodes(false); + ks.getConfigurator().setRecursionDepth(2); + ks.getConfigurator().setDissolveBlankNodes(false); + ks.getConfigurator().setCloseAfterRecursion(true); + ks.getConfigurator().setGetAllSuperClasses(true); + ks.getConfigurator().setGetPropertyInformation(false); + ks.getConfigurator().setUseLits(true); +// ks.getConfigurator(). + OWLFile ks2 = ComponentFactory.getOWLFile(new File(propertiesXML).toURI().toURL()); + tmp.add(ks); + tmp.add(ks2); + + return tmp; + } + + //test if virtuoso is correct + public static void validate(Description d, Examples newlyFound){ + try { + ExMakerFixedSize fs = new ExMakerFixedSize(newlyFound); + Examples tmp = fs.select(100, 100); + FastInstanceChecker fc = _getFastInstanceChecker(tmp); + SortedSet<Individual> inds = fc.getIndividuals(d); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static FastInstanceChecker _getFastInstanceChecker(Examples ex)throws Exception{ + Set<KnowledgeSource> tmp = _getOWL(ex); +// Set<KnowledgeSource> tmp = _getSPARQL(ex); + + + FastInstanceChecker rc = ComponentFactory.getFastInstanceChecker(tmp); + for (KnowledgeSource ks : tmp) { + ks.init(); + } + rc.init(); + return rc; + } + + public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { + Monitor init = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "init").start(); + + EvaluatedDescription result = null; + + try { + FastInstanceChecker rc = _getFastInstanceChecker(ex); + PosNegLPStandard lp = ComponentFactory + .getPosNegLPStandard(rc, ex.getPosTrain(), ex.getNegTrain()); + LearningAlgorithm la = _getROLLearner(lp, rc, config, ex); + lp.init(); + la.init(); + init.stop(); + Monitor learning = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "learning") + .start(); + la.start(); + learning.stop(); + + result = la.getCurrentlyBestEvaluatedDescription(); + logger.debug(PrefixMap.toKBSyntaxString(result.getDescription())); + logger.debug(PrefixMap.toManchesterSyntaxString(result.getDescription())); + + } catch (Exception e) { + e.printStackTrace(); + System.exit(0); + } + return result; + } + + public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit) { + SortedSet<String> result = new TreeSet<String>(); + SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); + visit.setDistinct(true); + visit.setLabels(false); + visit.setLimit(resultLimit); + String sparqlQuery = ""; + try { + sparqlQuery = visit.getSparqlQuery(ed.getDescription()); + } catch (Exception e1) { + e1.printStackTrace(); + } + logger.debug(PrefixMap.toKBSyntaxString(ed.getDescription())); + sparqlQuery = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQuery; + logger.debug(sparqlQuery); + + Monitor m = JamonMonitorLogger.getTimeMonitor(GlobalTest.class, "sparqlquery").start(); + result.addAll(sparqlTasks.queryAsSet(sparqlQuery, "subject")); + m.stop(); + logger.debug("query avg: " + ((double)m.getAvg() / (double)1000)+ " seconds (last: "+((double)m.getLastValue() / (double)1000)+")"); + if(result.isEmpty()){ + + logger.error("sparql query returned no results "); + logger.error(sparqlQuery); + System.exit(0); + } + return result; + } + + private static LearningAlgorithm _getROLLearner(LearningProblem lp, ReasonerComponent rc, ExperimentConfig config, Examples ex) + throws Exception { + + int maxExecutionTime = config.maxExecutionTime; + int valueFrequencyThreshold = config.valueFrequencyThreshold; + if(config.adaptive){ + maxExecutionTime = 2 * ex.sizeOfTrainingSets(); + valueFrequencyThreshold = ex.getPosTrain().size(); +// valueFrequencyThreshold = (int) Math.floor(0.8d*((double)ex.getPosTrain().size())); + + } + + ROLComponent2 la = ComponentFactory.getROLComponent2(lp, rc); + la.getConfigurator().setUseExistsConstructor(true); + + la.getConfigurator().setUseAllConstructor(false); + la.getConfigurator().setUseCardinalityRestrictions(false); + la.getConfigurator().setUseNegation(false); + la.getConfigurator().setUseHasValueConstructor(false); + la.getConfigurator().setUseDataHasValueConstructor(true); + la.getConfigurator().setValueFrequencyThreshold(valueFrequencyThreshold); + + la.getConfigurator().setIgnoredConcepts(new HashSet<String>(Arrays.asList(new String[]{ + "http://nlp2rdf.org/ontology/sentencefinalpunctuation_tag", + "http://nlp2rdf.org/ontology/comma_tag", + "http://nachhalt.sfb632.uni-potsdam.de/owl/stts.owl#SentenceFinalPunctuation" + }))); + + + la.getConfigurator().setNoisePercentage(config.noise); + la.getConfigurator().setTerminateOnNoiseReached(true); + la.getConfigurator().setMaxExecutionTimeInSeconds(maxExecutionTime); + + if(config.useStartClass){ + la.getConfigurator().setStartClass(prefix + "Sentence"); + } + + la.getConfigurator().setWriteSearchTree(config.searchTree); + la.getConfigurator().setSearchTreeFile("log/searchTreeTiger.txt"); + la.getConfigurator().setReplaceSearchTree(true); + return la; + } + + public static SortedSet<String> read(String f) { + SortedSet<String> result = new TreeSet<String>(); + BufferedReader in = null; + try { + in = new BufferedReader(new InputStreamReader(new FileInputStream(f))); + + String line; + while ((line = in.readLine()) != null) { + try { + line = line.trim(); + Integer.parseInt(line); + if (!result.add(sentenceprefix + line)) { + logger.error("reading failed"); + System.exit(0); + } + } catch (Exception e) { + e.printStackTrace(); + System.exit(0); + } + } + + } catch (Exception e) { + e.printStackTrace(); + logger.error("Could not read examples from: " + f); + System.exit(0); + + } finally { + try { + in.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + logger.info("read " + result.size() + " lines from " + f); + + return result; + } + +} Modified: trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java =================================================================== --- trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java 2010-02-13 17:45:31 UTC (rev 2033) +++ trunk/src/dl-learner/org/dllearner/scripts/tiger/TestIterativeLearning.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -26,6 +26,8 @@ import org.dllearner.core.LearningProblem; import org.dllearner.core.ReasonerComponent; import org.dllearner.core.configurators.ComponentFactory; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; import org.dllearner.kb.OWLFile; import org.dllearner.kb.sparql.Cache; import org.dllearner.kb.sparql.SPARQLTasks; @@ -36,6 +38,7 @@ import org.dllearner.learningproblems.PosNegLPStandard; import org.dllearner.reasoning.FastInstanceChecker; import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; import org.dllearner.utilities.JamonMonitorLogger; import org.dllearner.utilities.examples.ExMakerFixedSize; @@ -47,7 +50,10 @@ public class TestIterativeLearning { private static final Logger logger = Logger.getLogger(TestIterativeLearning.class); - static String backgroundXML = "files/tiger.noSchema.noImports.rdf"; + static DecimalFormat df = new DecimalFormat("00.###%"); + +// static String backgroundXML = "files/tiger.noSchema.noImports.rdf"; + static String backgroundXML = "files/tiger_trimmed_toPOS.rdf"; static String propertiesXML = "files/propertiesOnly.rdf"; static String sentenceXMLFolder = "files/tiger/"; static String sentenceprefix = "http://nlp2rdf.org/ontology/s"; @@ -68,6 +74,8 @@ + + final static boolean debug = false; //no randomization in examples final static boolean randomizedebug = !debug; @@ -133,7 +141,7 @@ EvaluatedDescription ed = learn(learn, config); /*RETRIEVING*/ - SortedSet<String> retrieved = getSentences(ed, config.resultLimit); + SortedSet<String> retrieved = getSentences(ed, config.resultLimit, learn); logger.debug("Retrieved "+retrieved.size()+" sentences"); @@ -142,23 +150,34 @@ SortedSet<String> posAsPos = Helper.intersection(retrieved, allExamples.getPosTrain()); logger.debug("Number of retrieved positives: "+posAsPos.size()); logger.debug("Number of total positives: "+allExamples.getPosTrain().size()); - precision( posAsPos.size(), retrieved.size()); + results(posAsPos, retrieved, allExamples); //Menge aller positiven geschn. mit den gefundenen - recall( posAsPos.size(),allExamples.getPosTrain().size()); - SortedSet<String> negAsPos = Helper.intersection(retrieved, allExamples.getNegTrain()); logger.debug("Number of retrieved negatives: "+negAsPos.size()); logger.debug("Total: "+posAsPos.size()+" + "+negAsPos.size() +" = "+retrieved.size()); + //not covered + + // if(retrieved.size()!=(posAsPos.size()+negAsPos.size())){ // logger.warn("sets are wrong"); // System.exit(0); // } Examples newlyFound = new Examples(); - newlyFound.addPosTrain(Helper.intersection(retrieved, learn.getPosTest())); - newlyFound.addNegTrain(Helper.intersection(retrieved, learn.getNegTest())); + SortedSet<String> discoveredPosInStore = Helper.intersection(retrieved, learn.getPosTest()); + SortedSet<String> misclassifiedNegInStore = Helper.intersection(retrieved, learn.getNegTest()); + newlyFound.addPosTrain(discoveredPosInStore); + newlyFound.addNegTrain(misclassifiedNegInStore); + int print = 5; + logger.info("Discovered "+discoveredPosInStore.size()+" positive sentences in store (printing "+print+"):"); + _getLabels(discoveredPosInStore, print); + logger.info("Misclassified "+misclassifiedNegInStore.size()+" negative sentences in store (printing "+print+"):"); + _getLabels(misclassifiedNegInStore, print); + + + fs = new ExMakerFixedSize(newlyFound, randomizedebug); newlyFound = fs.select(config.splits, config.splits); @@ -175,20 +194,33 @@ } - public static void precision( int posAsPos, int retrieved){ - logger.info("Precision: "+DecimalFormat.getPercentInstance().format(((double)posAsPos)/(double)retrieved)+"%"); + private static void results(SortedSet<String> posAsPos, SortedSet<String> retrieved, Examples allExamples) { + double precision = precision( posAsPos.size(), retrieved.size()); + double recall = recall( posAsPos.size(),allExamples.getPosTrain().size()); + logger.info("F-Measure: "+df.format( (2*precision*recall)/(precision+recall)) ); + } - public static void recall( int posAsPos, int allPositives){ - logger.info("Recall: "+DecimalFormat.getPercentInstance().format(((double)posAsPos)/(double)allPositives)+"%"); + + public static double precision( int posAsPos, int retrieved){ + double precision = ((double)posAsPos)/((double)retrieved); + logger.info("Precision: "+df.format(precision)); + return precision; + } + public static double recall( int posAsPos, int allPositives){ + double recall = ((double)posAsPos)/((double)allPositives); + logger.info("Recall: "+df.format(recall)); + return recall; + } private static Set<KnowledgeSource> _getOWL(Examples ex) throws Exception{ Set<KnowledgeSource> tmp = new HashSet<KnowledgeSource>(); List<URL> urls = new ArrayList<URL>(); + urls.add(new File(backgroundXML).toURI().toURL()); urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getPosTrain())); urls.addAll(ExampleDataCollector.convert(sentenceXMLFolder, ex.getNegTrain())); - urls.add(new File(backgroundXML).toURI().toURL()); + for (URL u : urls) { OWLFile ks = ComponentFactory.getOWLFile(u); @@ -222,25 +254,42 @@ return tmp; } - public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { - Monitor init = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "init").start(); - - EvaluatedDescription result = null; - + //test if virtuoso is correct + public static void validate(Description d, Examples newlyFound){ try { + ExMakerFixedSize fs = new ExMakerFixedSize(newlyFound); + Examples tmp = fs.select(100, 100); + FastInstanceChecker fc = _getFastInstanceChecker(tmp); + @SuppressWarnings("unused") + SortedSet<Individual> inds = fc.getIndividuals(d); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static FastInstanceChecker _getFastInstanceChecker(Examples ex)throws Exception{ Set<KnowledgeSource> tmp = _getOWL(ex); // Set<KnowledgeSource> tmp = _getSPARQL(ex); FastInstanceChecker rc = ComponentFactory.getFastInstanceChecker(tmp); - PosNegLPStandard lp = ComponentFactory - .getPosNegLPStandard(rc, ex.getPosTrain(), ex.getNegTrain()); - LearningAlgorithm la = _getROLLearner(lp, rc, config, ex); - for (KnowledgeSource ks : tmp) { ks.init(); } rc.init(); + return rc; + } + + public static EvaluatedDescription learn(Examples ex, ExperimentConfig config) { + Monitor init = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "init").start(); + + EvaluatedDescription result = null; + + try { + FastInstanceChecker rc = _getFastInstanceChecker(ex); + PosNegLPStandard lp = ComponentFactory + .getPosNegLPStandard(rc, ex.getPosTrain(), ex.getNegTrain()); + LearningAlgorithm la = _getROLLearner(lp, rc, config, ex); lp.init(); la.init(); init.stop(); @@ -260,33 +309,64 @@ return result; } - public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit) { + public static SortedSet<String> getSentences(EvaluatedDescription ed, int resultLimit, Examples justforFindingTheBug) { SortedSet<String> result = new TreeSet<String>(); SparqlQueryDescriptionConvertVisitor visit = new SparqlQueryDescriptionConvertVisitor(); visit.setDistinct(true); visit.setLabels(false); visit.setLimit(resultLimit); - String sparqlQuery = ""; + String sparqlQueryGood = ""; + String sparqlQueryBad = ""; try { - sparqlQuery = visit.getSparqlQuery(ed.getDescription()); + sparqlQueryGood = visit.getSparqlQuery(ed.getDescription().toKBSyntaxString()); + sparqlQueryBad = visit.getSparqlQuery(ed.getDescription()); + if(!sparqlQueryGood.equals(sparqlQueryBad)){ + String file = "errorDescription/"+System.currentTimeMillis(); + justforFindingTheBug.writeExamples(file); + Files.appendFile(new File(file), "\n\n/**\nGood:\n"+sparqlQueryGood+"\nBad:\n"+sparqlQueryBad+"**/"); + } + } catch (Exception e1) { e1.printStackTrace(); } - logger.debug(PrefixMap.toKBSyntaxString(ed.getDescription())); - sparqlQuery = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQuery; - logger.debug(sparqlQuery); + logger.debug("USING CONCEPT: "+PrefixMap.toKBSyntaxString(ed.getDescription())); + sparqlQueryGood = " \n define input:inference \"" + rulegraph + "\" \n" + "" + sparqlQueryGood; + logger.debug(sparqlQueryGood); Monitor m = JamonMonitorLogger.getTimeMonitor(TestIterativeLearning.class, "sparqlquery").start(); - result.addAll(sparqlTasks.queryAsSet(sparqlQuery, "subject")); + result.addAll(sparqlTasks.queryAsSet(sparqlQueryGood, "subject")); + m.stop(); logger.debug("query avg: " + ((double)m.getAvg() / (double)1000)+ " seconds (last: "+((double)m.getLastValue() / (double)1000)+")"); if(result.isEmpty()){ logger.error("sparql query returned no results "); - logger.error(sparqlQuery); + logger.error(sparqlQueryGood); System.exit(0); } return result; } + + private static void _getLabels(SortedSet<String> sentenceURIs, int limit){ + int i = 0; + for (String sentenceURI : sentenceURIs) { + if(i>=limit){ + break; + } + i++; + _getLabel(sentenceURI); + } + } + + private static void _getLabel(String sentenceURI){ + String query = "SELECT * FROM <"+graph+"> " + + "{ <"+sentenceURI+"> rdfs:label ?label . }"; + SortedSet<String> s = sparqlTasks.queryAsSet(query, "label"); + if(s.isEmpty()){ + logger.warn("no label for "+sentenceURI); + }else{ + logger.debug(sentenceURI.replace(prefix, "")+" "+s.first()); + } + } private static LearningAlgorithm _getROLLearner(LearningProblem lp, ReasonerComponent rc, ExperimentConfig config, Examples ex) throws Exception { @@ -295,7 +375,8 @@ int valueFrequencyThreshold = config.valueFrequencyThreshold; if(config.adaptive){ maxExecutionTime = 2 * ex.sizeOfTrainingSets(); - valueFrequencyThreshold = (int) Math.floor(0.8d*((double)ex.getPosTrain().size())); + valueFrequencyThreshold = ex.getPosTrain().size(); +// valueFrequencyThreshold = (int) Math.floor(0.8d*((double)ex.getPosTrain().size())); } @@ -324,9 +405,9 @@ la.getConfigurator().setStartClass(prefix + "Sentence"); } - la.getConfigurator().setWriteSearchTree(false); - la.getConfigurator().setSearchTreeFile("log/searchTree.txt"); - la.getConfigurator().setReplaceSearchTree(false); + la.getConfigurator().setWriteSearchTree(config.searchTree); + la.getConfigurator().setSearchTreeFile("log/searchTreeTiger.txt"); + la.getConfigurator().setReplaceSearchTree(true); return la; } Modified: trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java 2010-02-13 17:45:31 UTC (rev 2033) +++ trunk/src/dl-learner/org/dllearner/utilities/examples/Examples.java 2010-02-13 19:33:50 UTC (rev 2034) @@ -26,6 +26,7 @@ import java.util.TreeSet; import org.apache.log4j.Logger; +import org.dllearner.utilities.URLencodeUTF8; /** * a container for examples @@ -154,12 +155,19 @@ public void writeExamples(String filename) { try { FileWriter a = new FileWriter(filename, false); + + StringBuffer buffer = new StringBuffer(); + buffer.append("\n\n\n\n\n"); for (String s : posTrain) { - a.write("+\"" + s + "\"\n"); + a.write("import(\""+URLencodeUTF8.encode(s)+"\");\n"); + buffer.append("+\"" + s + "\"\n"); } for (String s : negTrain) { - a.write("-\"" + s + "\"\n"); + a.write("import(\""+URLencodeUTF8.encode(s)+"\");\n"); + buffer.append("-\"" + s + "\"\n"); } + + a.write(buffer.toString()); a.flush(); a.close(); logger.info("wrote examples to " + filename); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |