From: <jen...@us...> - 2010-07-14 15:39:57
|
Revision: 2199 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2199&view=rev Author: jenslehmann Date: 2010-07-14 15:39:50 +0000 (Wed, 14 Jul 2010) Log Message: ----------- - fixed bug in description minimizer - fixed bug in evaluated set - CELOE now defaults to not suppressing expressions, which contain an expression already on the suggestion list - wrote unit tests for minimizer and sub expression checks - extended unit tests for heuristics - implemented search tree option for CELOE Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java trunk/src/dl-learner/org/dllearner/utilities/owl/DescriptionMinimizer.java trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java Added Paths: ----------- trunk/src/dl-learner/org/dllearner/test/junit/ClassExpressionTests.java Removed Paths: ------------- trunk/src/dl-learner/org/dllearner/test/junit/MinimizeTests.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -19,6 +19,7 @@ */ package org.dllearner.algorithms.celoe; +import java.io.File; import java.text.DecimalFormat; import java.util.Collection; import java.util.Iterator; @@ -30,6 +31,7 @@ import java.util.TreeSet; import org.apache.log4j.Logger; +import org.dllearner.algorithms.refinement2.ExampleBasedNode; import org.dllearner.core.ComponentInitException; import org.dllearner.core.EvaluatedDescription; import org.dllearner.core.LearningAlgorithm; @@ -39,6 +41,7 @@ import org.dllearner.core.options.BooleanConfigOption; import org.dllearner.core.options.CommonConfigOptions; import org.dllearner.core.options.ConfigOption; +import org.dllearner.core.options.StringConfigOption; import org.dllearner.core.owl.ClassHierarchy; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; @@ -53,6 +56,7 @@ import org.dllearner.refinementoperators.OperatorInverter; import org.dllearner.refinementoperators.RefinementOperator; import org.dllearner.refinementoperators.RhoDRDown; +import org.dllearner.utilities.Files; import org.dllearner.utilities.Helper; import org.dllearner.utilities.owl.ConceptComparator; import org.dllearner.utilities.owl.ConceptTransformation; @@ -119,8 +123,13 @@ // important parameters private double noise; private double maxDepth; - private boolean filterFollowsFromKB; + private boolean filterFollowsFromKB; + // less important parameters + // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity + // but it can also suppress quite useful expressions + private boolean forceMutualDifference = false; + // utility variables private String baseURI; private Map<String, String> prefixes; @@ -169,6 +178,9 @@ options.add(CommonConfigOptions.getInstanceBasedDisjoints()); options.add(new BooleanConfigOption("filterDescriptionsFollowingFromKB", "If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.", false)); options.add(new BooleanConfigOption("reuseExistingDescription", "If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.", false)); + options.add(new BooleanConfigOption("writeSearchTree", "specifies whether to write a search tree", false)); + options.add(new StringConfigOption("searchTreeFile","file to use for the search tree", "log/searchTree.txt")); + options.add(new BooleanConfigOption("replaceSearchTree","specifies whether to replace the search tree in the log file after each run or append the new search tree", false)); return options; } @@ -193,6 +205,9 @@ operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator); baseURI = reasoner.getBaseURI(); prefixes = reasoner.getPrefixes(); + if(configurator.getWriteSearchTree()) { + Files.clearFile(new File(configurator.getSearchTreeFile())); + } bestEvaluatedDescriptions = new EvaluatedDescriptionSet(configurator.getMaxNrOfResults()); @@ -374,6 +389,24 @@ updateMinMaxHorizExp(nextNode); + // writing the search tree (if configured) + if (configurator.getWriteSearchTree()) { + String treeString = "best node: " + bestEvaluatedDescriptions.getBest() + "\n"; + if (refinements.size() > 1) { + treeString += "all expanded nodes:\n"; + for (Description n : refinements) { + treeString += " " + n + "\n"; + } + } + treeString += startNode.toTreeString(baseURI); + treeString += "\n"; + + if (configurator.getReplaceSearchTree()) + Files.createFile(new File(configurator.getSearchTreeFile()), treeString); + else + Files.appendFile(new File(configurator.getSearchTreeFile()), treeString); + } + // System.out.println(loop); loop++; } @@ -486,6 +519,8 @@ return true; } +// System.out.println("description " + description + " accuracy " + accuracy); + // maybe add to best descriptions (method keeps set size fixed); // we need to make sure that this does not get called more often than // necessary since rewriting is expensive @@ -498,30 +533,42 @@ (accuracy >= accThreshold && description.getLength() < worst.getDescriptionLength())); } +// System.out.println(isCandidate); + // System.out.println("Test4 " + new Date()); if(isCandidate) { + Description niceDescription = rewriteNode(node); ConceptTransformation.transformToOrderedForm(niceDescription, descriptionComparator); // Description niceDescription = node.getDescription(); // another test: none of the other suggested descriptions should be // a subdescription of this one unless accuracy is different + // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing + // A is not a candidate; on the other hand this suppresses many meaningless extensions of A boolean shorterDescriptionExists = false; - for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { - if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { - shorterDescriptionExists = true; - break; - } + if(forceMutualDifference) { + for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) { + if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) { +// System.out.println("shorter: " + ed.getDescription()); + shorterDescriptionExists = true; + break; + } + } } +// System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription); + if(!shorterDescriptionExists) { if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) { +// System.out.println("Test2"); bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem); // System.out.println("acc: " + accuracy); // System.out.println(bestEvaluatedDescriptions); } } +// System.out.println(bestEvaluatedDescriptions.getSet().size()); } // System.out.println("Test5 " + new Date()); Modified: trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -219,6 +219,33 @@ public boolean getReuseExistingDescription() { return (Boolean) ComponentManager.getInstance().getConfigOptionValue(cELOE, "reuseExistingDescription") ; } +/** +* writeSearchTree specifies whether to write a search tree. +* mandatory: false| reinit necessary: true +* default value: false +* @return boolean +**/ +public boolean getWriteSearchTree() { +return (Boolean) ComponentManager.getInstance().getConfigOptionValue(cELOE, "writeSearchTree") ; +} +/** +* searchTreeFile file to use for the search tree. +* mandatory: false| reinit necessary: true +* default value: log/searchTree.txt +* @return String +**/ +public String getSearchTreeFile() { +return (String) ComponentManager.getInstance().getConfigOptionValue(cELOE, "searchTreeFile") ; +} +/** +* replaceSearchTree specifies whether to replace the search tree in the log file after each run or append the new search tree. +* mandatory: false| reinit necessary: true +* default value: false +* @return boolean +**/ +public boolean getReplaceSearchTree() { +return (Boolean) ComponentManager.getInstance().getConfigOptionValue(cELOE, "replaceSearchTree") ; +} /** * @param useAllConstructor specifies whether the universal concept constructor is used in the learning algorithm. @@ -382,6 +409,33 @@ ComponentManager.getInstance().applyConfigEntry(cELOE, "reuseExistingDescription", reuseExistingDescription); reinitNecessary = true; } +/** +* @param writeSearchTree specifies whether to write a search tree. +* mandatory: false| reinit necessary: true +* default value: false +**/ +public void setWriteSearchTree(boolean writeSearchTree) { +ComponentManager.getInstance().applyConfigEntry(cELOE, "writeSearchTree", writeSearchTree); +reinitNecessary = true; +} +/** +* @param searchTreeFile file to use for the search tree. +* mandatory: false| reinit necessary: true +* default value: log/searchTree.txt +**/ +public void setSearchTreeFile(String searchTreeFile) { +ComponentManager.getInstance().applyConfigEntry(cELOE, "searchTreeFile", searchTreeFile); +reinitNecessary = true; +} +/** +* @param replaceSearchTree specifies whether to replace the search tree in the log file after each run or append the new search tree. +* mandatory: false| reinit necessary: true +* default value: false +**/ +public void setReplaceSearchTree(boolean replaceSearchTree) { +ComponentManager.getInstance().applyConfigEntry(cELOE, "replaceSearchTree", replaceSearchTree); +reinitNecessary = true; +} /** * true, if this component needs reinitializsation. Added: trunk/src/dl-learner/org/dllearner/test/junit/ClassExpressionTests.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/ClassExpressionTests.java (rev 0) +++ trunk/src/dl-learner/org/dllearner/test/junit/ClassExpressionTests.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -0,0 +1,87 @@ +/** + * Copyright (C) 2007-2010, Jens Lehmann + * + * This file is part of DL-Learner. + * + * DL-Learner is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * DL-Learner is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ +package org.dllearner.test.junit; + +import static org.junit.Assert.*; + +import org.dllearner.core.ReasonerComponent; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Intersection; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectAllRestriction; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.parser.KBParser; +import org.dllearner.parser.ParseException; +import org.dllearner.test.junit.TestOntologies.TestOntology; +import org.dllearner.utilities.owl.ConceptTransformation; +import org.dllearner.utilities.owl.DescriptionMinimizer; +import org.junit.Test; + +/** + * Tests on class expressins, e.g. transformations or tests on them. + * + * @author Jens Lehmann + * + */ +public class ClassExpressionTests { + + @Test + public void minimizeTest1() throws ParseException { + ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.FATHER_OE); + DescriptionMinimizer minimizer = new DescriptionMinimizer(reasoner); + Description d = KBParser.parseConcept("(\"http://example.com/father#male\" AND (\"http://example.com/father#male\" OR EXISTS \"http://example.com/father#hasChild\".TOP))"); + Description minD = minimizer.minimize(d); + assertTrue(minD.toString().equals("http://example.com/father#male")); + } + + @Test + public void minimizeTest2() throws ParseException { + // this tests for a bug, when in A AND A AND SOMETHING, both A were removed because they subsume + // each other, while in fact only one A should be removed + ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.MDM); + DescriptionMinimizer minimizer = new DescriptionMinimizer(reasoner); + NamedClass nc = new NamedClass("http://acl/BMV#MedicalThings"); + ObjectProperty op = new ObjectProperty("http://acl/BMV#refersSubstance"); + Description tmp1 = new ObjectAllRestriction(op,nc); + Description d = new Intersection(nc,tmp1); + Description d2 = new Intersection(nc,nc,tmp1); + Description minD = minimizer.minimizeClone(d); + Description minD2 = minimizer.minimizeClone(d2); + + assertEquals(minD.toString(),minD2.toString()); + } + + @Test + public void subExpressionTest1() throws ParseException { + Description d = KBParser.parseConcept("(\"http://example.com/father#male\" AND (\"http://example.com/father#male\" OR EXISTS \"http://example.com/father#hasChild\".TOP))"); + Description d2 = KBParser.parseConcept("EXISTS \"http://example.com/father#hasChild\".TOP"); + assertTrue(ConceptTransformation.isSubdescription(d, d2)); + + Description d3 = KBParser.parseConcept("(\"http://example.com/test#A\" AND (\"http://example.com/father#A\" AND EXISTS \"http://example.com/father#hasChild\".TOP))"); + Description d4 = KBParser.parseConcept("EXISTS \"http://example.com/father#hasChild\".TOP"); + assertTrue(ConceptTransformation.isSubdescription(d3, d4)); + + // related to http://sourceforge.net/tracker/?func=detail&atid=986319&aid=3029181&group_id=203619 + Description d5 = KBParser.parseConcept("(\"http://acl/BMV#MedicalThings\" AND (\"http://acl/BMV#MedicalThings\" AND ALL \"http://acl/BMV#refersSubstance\".\"http://acl/BMV#MedicalThings\"))"); + Description d6 = KBParser.parseConcept("ALL \"http://acl/BMV#refersSubstance\".\"http://acl/BMV#MedicalThings\""); + assertTrue(ConceptTransformation.isSubdescription(d5, d6)); + + } +} Modified: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -115,6 +115,7 @@ // TODO: test super class learning + // TODO: test noise parameter } // the class learning problem provides several ways to get the accuracy of a description, this method Deleted: trunk/src/dl-learner/org/dllearner/test/junit/MinimizeTests.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/MinimizeTests.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/test/junit/MinimizeTests.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -1,46 +0,0 @@ -/** - * Copyright (C) 2007-2009, Jens Lehmann - * - * This file is part of DL-Learner. - * - * DL-Learner is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * DL-Learner is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -package org.dllearner.test.junit; - -import org.dllearner.core.ReasonerComponent; -import org.dllearner.core.owl.Description; -import org.dllearner.parser.KBParser; -import org.dllearner.parser.ParseException; -import org.dllearner.test.junit.TestOntologies.TestOntology; -import org.dllearner.utilities.owl.DescriptionMinimizer; -import org.junit.Test; - -/** - * Tests for minimizing class descriptions. - * - * @author Jens Lehmann - * - */ -public class MinimizeTests { - - @Test - public void minimizeTest1() throws ParseException { - ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.FATHER_OE); - DescriptionMinimizer minimizer = new DescriptionMinimizer(reasoner); - Description d = KBParser.parseConcept("(\"http://example.com/father#male\" AND (\"http://example.com/father#male\" OR EXISTS \"http://example.com/father#hasChild\".TOP))"); - Description minD = minimizer.minimize(d); - assert(minD.toString().equals("http://example.com/father#male")); - } -} Modified: trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -41,7 +41,7 @@ */ public final class TestOntologies { - public enum TestOntology { EMPTY, SIMPLE, SIMPLE_NO_DR, SIMPLE_NO_DISJOINT, SIMPLE_NO_DR_DISJOINT, SIMPLE2, SIMPLE3, R1SUBR2, DATA1, FIVE_ROLES, FATHER, FATHER_OE, CARCINOGENESIS, EPC_OE, KRK_ZERO_ONE, DBPEDIA_OWL, TRAINS_OWL, RHO1, SWORE }; + public enum TestOntology { EMPTY, SIMPLE, SIMPLE_NO_DR, SIMPLE_NO_DISJOINT, SIMPLE_NO_DR_DISJOINT, SIMPLE2, SIMPLE3, R1SUBR2, DATA1, FIVE_ROLES, FATHER, FATHER_OE, CARCINOGENESIS, EPC_OE, KRK_ZERO_ONE, DBPEDIA_OWL, TRAINS_OWL, RHO1, SWORE, MDM }; public static ReasonerComponent getTestOntology(TestOntology ont) { String kbString = ""; @@ -134,6 +134,8 @@ owlFile = "examples/cross-benchmark/trains/trains.owl"; } else if(ont.equals(TestOntology.SWORE)) { owlFile = "examples/swore/swore.rdf"; + } else if(ont.equals(TestOntology.MDM)) { + owlFile = "resources/test/MDM0.73.owl"; } try { Modified: trunk/src/dl-learner/org/dllearner/utilities/owl/DescriptionMinimizer.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/owl/DescriptionMinimizer.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/utilities/owl/DescriptionMinimizer.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -146,52 +146,95 @@ } return description; } else if(description instanceof Intersection || description instanceof Union) { - List<Integer> toRemove = new LinkedList<Integer>(); - // intersection + if(description instanceof Intersection) { - // in an intersection, we have that D1 \sqcap D2 \equiv D1 if - // D1 \sqsubseteq D2; this means we first check whether the - // first element in a union is superclass of any other element in the - // union; if yes, then we delete it and proceed to the next element for(int i=0; i<children.size(); i++) { - for(int j=0; j<children.size(); j++) { + for(int j=0; j<children.size(); j++) { if(i != j && isSubclassOf(children.get(j), children.get(i))) { - toRemove.add(i-toRemove.size()); - break; + // remove element because it is super class of another element + children.remove(i); + // we apply the minimization procedure again after removal of the element + // (this is not the fastest implementation but avoids bugs as in the previous code) + if(children.size()==1) { + return minimize(children.get(0)); + } else { + return minimize(description); + } } } - } - // union + } } else { - // in a union, we have that D1 \sqcup D2 \equiv D2 if - // D1 \sqsubseteq D2; this means we first check whether the - // first element in a union is subclass of any other element in the - // union; if yes, then we delete it and proceed to the next element - // (note the difference to intersection) for(int i=0; i<children.size(); i++) { for(int j=0; j<children.size(); j++) { if(i != j && isSubclassOf(children.get(i), children.get(j))) { - toRemove.add(i-toRemove.size()); - break; + children.remove(i); + if(children.size()==1) { + return minimize(children.get(0)); + } else { + return minimize(description); + } } } - } - } - // if all elements are superfluous wrt. another element, then we need - // to keep at least one - if(toRemove.size() == children.size()) { - return children.get(0); - } else { - // remove all elements according to remove list - for(int pos : toRemove) { - children.remove(pos); } - // dissolve intersection with only one element - if(children.size()==1) { - return children.get(0); - } - return description; } + + // no subclass relationships => description is already minimal + return description; + + // the code below is buggy because in "A AND A AND C", it removes both As + +// List<Integer> toRemove = new LinkedList<Integer>(); +// // intersection +// if(description instanceof Intersection) { +// // in an intersection, we have that D1 \sqcap D2 \equiv D1 if +// // D1 \sqsubseteq D2; this means we first check whether the +// // first element in an intersection is subclass of any other element in the +// // intersection; if yes, then we delete it and proceed to the next element +// for(int i=0; i<children.size(); i++) { +// for(int j=0; j<children.size(); j++) { +// if(i!=j) +// System.out.println(children.get(i) + " -- " + children.get(j)); +// if(i != j && isSubclassOf(children.get(j), children.get(i))) { +// System.out.println("sub"); +// toRemove.add(i-toRemove.size()); +// break; +// } +// } +// } +// // union +// } else { +// // in a union, we have that D1 \sqcup D2 \equiv D2 if +// // D1 \sqsubseteq D2; this means we first check whether the +// // first element in a union is subclass of any other element in the +// // union; if yes, then we delete it and proceed to the next element +// // (note the difference to intersection) +// for(int i=0; i<children.size(); i++) { +// for(int j=0; j<children.size(); j++) { +// if(i != j && isSubclassOf(children.get(i), children.get(j))) { +// toRemove.add(i-toRemove.size()); +// break; +// } +// } +// } +// } +// +// System.out.println("to remove: " + toRemove); +// +// // if all elements are superfluous wrt. another element, then we need +// // to keep at least one +// if(toRemove.size() == children.size()) { +// return children.get(0); +// } else { +// // remove all elements according to remove list +// for(int pos : toRemove) { +// children.remove(pos); +// } +// // dissolve intersection with only one element +// if(children.size()==1) { +// return children.get(0); +// } +// return description; +// } } else { throw new Error("Cannot minimize description " + description + "."); } Modified: trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java =================================================================== --- trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java 2010-07-14 10:08:19 UTC (rev 2198) +++ trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java 2010-07-14 15:39:50 UTC (rev 2199) @@ -51,7 +51,9 @@ } public void add(Description description, double accuracy, LearningProblem problem) { - if(set.size()==0 || getWorst().getAccuracy() <= accuracy) { + // bug http://sourceforge.net/tracker/?func=detail&atid=986319&aid=3029181&group_id=203619 + // -> set should be filled up to max size before we compare acc. with the worst result + if(set.size()<maxSize || getWorst().getAccuracy() <= accuracy) { set.add(problem.evaluate(description)); } if(set.size()>maxSize) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |