From: <jen...@us...> - 2008-03-19 08:11:25
|
Revision: 715 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=715&view=rev Author: jenslehmann Date: 2008-03-19 01:11:19 -0700 (Wed, 19 Mar 2008) Log Message: ----------- redundancy check bug fixes changes in heuristic Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-17 10:21:51 UTC (rev 714) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-03-19 08:11:19 UTC (rev 715) @@ -115,7 +115,7 @@ // the divide&conquer approach in many ILP programs using a // clause by clause search; after a period of time the candidate // set is reduced to focus CPU time on the most promising concepts - private boolean useCandidateReduction = false; + private boolean useCandidateReduction = true; private int candidatePostReductionSize = 30; // setting to true gracefully stops the algorithm @@ -244,24 +244,36 @@ } public void start() { + + // TODO: write a JUnit test for this problem (long-lasting or infinite loops because + // redundant children of a node are called recursively after when the node is extended + // twice) /* // String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\". TOP)))"; - String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND (\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE)))"; +// String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND (\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE)))"; + String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 3 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND ALL \"http://dl-learner.org/carcinogenesis#hasAtom\".TOP)))"; + String conceptStr2 = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 4 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND ALL \"http://dl-learner.org/carcinogenesis#hasAtom\".TOP)))"; try { NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Compound"); Description d = KBParser.parseConcept(conceptStr); + Description d2 = KBParser.parseConcept(conceptStr2); // SortedSet<Description> ds = (SortedSet<Description>) operator.refine(d,15,null,struc); // System.out.println(ds); - System.out.println(RhoDRDown.checkIntersection((Intersection)d)); +// System.out.println(RhoDRDown.checkIntersection((Intersection)d)); Set<Individual> coveredNegatives = rs.instanceCheck(d, learningProblem.getNegativeExamples()); Set<Individual> coveredPositives = rs.instanceCheck(d, learningProblem.getPositiveExamples()); ExampleBasedNode ebn = new ExampleBasedNode(d); ebn.setCoveredExamples(coveredPositives, coveredNegatives); - extendNodeProper(ebn,15); + properRefinements.add(d2); + extendNodeProper(ebn,13); + extendNodeProper(ebn,14); + for(Description refinement: ebn.getChildConcepts()) + System.out.println("refinement: " + refinement); + // Individual i = new Individual("http://dl-learner.org/carcinogenesis#d101"); // for(Individual i : learningProblem.getPositiveExamples()) // rs.instanceCheck(ds.last(), i); @@ -308,7 +320,7 @@ // print statistics at most once a second currentTime = System.nanoTime(); - if(currentTime - lastPrintTime > 1000000000) { + if(currentTime - lastPrintTime > 3000000000l) { printStatistics(false); lastPrintTime = currentTime; logger.debug("--- loop " + loop + " started ---"); @@ -327,7 +339,7 @@ // Logger.getRootLogger().setLevel(Level.TRACE); } - System.out.println("next expanded: " + candidates.last().getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); +// System.out.println("next expanded: " + candidates.last().getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); // chose best node according to heuristics bestNode = candidates.last(); // extend best node @@ -434,8 +446,11 @@ childConceptsDeletionTimeNs += System.nanoTime() - childConceptsDeletionTimeNsStart; -// if(refinements.size()<30) -// System.out.println("refinements: " + refinements); +// if(refinements.size()<30) { +//// System.out.println("refinements: " + refinements); +// for(Description refinement: refinements) +// System.out.println("refinement: " + refinement); +// } long evaluateSetCreationTimeNsStart = System.nanoTime(); @@ -463,7 +478,7 @@ propernessTestsAvoidedByShortConceptConstruction++; propernessDetected = true; - System.out.println("refinement " + refinement + " can be shortened"); +// System.out.println("refinement " + refinement + " can be shortened"); // System.exit(0); } } @@ -517,7 +532,7 @@ } evaluateSetCreationTimeNs += System.nanoTime() - evaluateSetCreationTimeNsStart; -// System.out.println("intermediate 1"); +// System.out.println("intermediate 1 " + node.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI)); // System.out.println(toEvaluateConcepts.size()); @@ -678,8 +693,17 @@ for(Description refinement : refinements) { // for(int i=0; i<=recDepth; i++) // System.out.print(" "); -// System.out.println("call: " + refinement + " [maxLength " + maxLength + "]"); - extendNodeProper(node, refinement, maxLength, recDepth+1); +// System.out.println("call: " + refinement + " [maxLength " + maxLength + ", rec depth " + recDepth + "]"); + + // check for redundancy (otherwise we may run into very time-intensive loops, + // see planned JUnit test case $x) + + long redundancyCheckTimeNsStart = System.nanoTime(); + boolean redundant = properRefinements.contains(refinement); + redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart; + + if(!redundant) + extendNodeProper(node, refinement, maxLength, recDepth+1); // for(int i=0; i<=recDepth; i++) // System.out.print(" "); // System.out.println("finished: " + refinement + " [maxLength " + maxLength + "]"); Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-17 10:21:51 UTC (rev 714) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/MultiHeuristic.java 2008-03-19 08:11:19 UTC (rev 715) @@ -79,6 +79,9 @@ private double gainBonusFactor; private double nodeChildPenalty = 0.0001; private double startNodeBonus = 1.0; + // penalise errors on positive examples harder than on negative examples + // (positive weight = 1) + private double negativeWeight = 0.8; // examples private int nrOfNegativeExamples; @@ -113,11 +116,11 @@ } public double getNodeScore(ExampleBasedNode node) { - double accuracy = getAccuracy(node.getCoveredPositives().size(),node.getCoveredNegatives().size()); + double accuracy = getWeightedAccuracy(node.getCoveredPositives().size(),node.getCoveredNegatives().size()); ExampleBasedNode parent = node.getParent(); double gain = 0; if(parent != null) { - double parentAccuracy = getAccuracy(parent.getCoveredPositives().size(),parent.getCoveredNegatives().size()); + double parentAccuracy = getWeightedAccuracy(parent.getCoveredPositives().size(),parent.getCoveredNegatives().size()); gain = accuracy - parentAccuracy; } else { accuracy += startNodeBonus; @@ -126,9 +129,8 @@ return accuracy + gainBonusFactor * gain - expansionPenaltyFactor * he - nodeChildPenalty * node.getChildren().size(); } - private double getAccuracy(int coveredPositives, int coveredNegatives) { - return (coveredPositives + nrOfNegativeExamples - coveredNegatives)/(double)nrOfExamples; - + private double getWeightedAccuracy(int coveredPositives, int coveredNegatives) { + return (coveredPositives + negativeWeight * (nrOfNegativeExamples - coveredNegatives))/(double)nrOfExamples; } public static double getNodeScore(ExampleBasedNode node, int nrOfPositiveExamples, int nrOfNegativeExamples) { Modified: trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java =================================================================== --- trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-17 10:21:51 UTC (rev 714) +++ trunk/src/dl-learner/org/dllearner/examples/Carcinogenesis.java 2008-03-19 08:11:19 UTC (rev 715) @@ -123,6 +123,9 @@ private static boolean learnCarcinogenic = true; private static boolean useNewGroups = true; + private static boolean createPTE1Conf = false; + private static boolean createPTE2Conf = false; + /** * @param args * No arguments supported. @@ -133,14 +136,13 @@ public static void main(String[] args) throws FileNotFoundException, IOException, ParseException { - // TODO: newgroups are not mapped currently String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl", "ind_nos.pl", "ind_pos.pl"}; // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl", // "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl" // "train.b" => not a pure Prolog file but Progol/Aleph specific // }; - File owlFile = new File("examples/carcinogenesis/pte.owl"); + File owlFile = new File("examples/carcinogenesis/carcinogenesis.owl"); Program program = null; long startTime, duration; @@ -281,7 +283,6 @@ // generating test examples for PTE-1 // => put all in one file, because they were used as training for PTE-2 File confPTE1File = new File("examples/carcinogenesis/testpte1.conf"); - Files.clearFile(confPTE1File); File testPTE1Positives = new File(prologDirectory + "pte1.f"); File testPTE1Negatives = new File(prologDirectory + "pte1.n"); @@ -289,16 +290,20 @@ List<Individual> negPTE1Examples = getExamples(testPTE1Negatives); appendPosExamples(confTrainFile, posPTE1Examples); appendNegExamples(confTrainFile, negPTE1Examples); - Files.clearFile(confPTE1File); - Files.appendFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); - appendPosExamples(confPTE1File, posPTE1Examples); - appendNegExamples(confPTE1File, negPTE1Examples); + if(createPTE1Conf) { + Files.clearFile(confPTE1File); + Files.appendFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); + appendPosExamples(confPTE1File, posPTE1Examples); + appendNegExamples(confPTE1File, negPTE1Examples); + } // create a PTE-2 test file - File confPTE2File = new File("examples/carcinogenesis/testpte2.conf"); - Files.clearFile(confPTE2File); - Files.appendFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); - Files.appendFile(confPTE2File, getPTE2Examples()); + if(createPTE2Conf) { + File confPTE2File = new File("examples/carcinogenesis/testpte2.conf"); + Files.clearFile(confPTE2File); + Files.appendFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n"); + Files.appendFile(confPTE2File, getPTE2Examples()); + } } Modified: trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java =================================================================== --- trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java 2008-03-17 10:21:51 UTC (rev 714) +++ trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java 2008-03-19 08:11:19 UTC (rev 715) @@ -325,10 +325,10 @@ return true; // earyl abort: e.g. >= 10 hasStructure.Methyl; // if there are 11 fillers and 2 are not Methyl, the result is false - } /* else { + } else { if(roleFillers.size() - index < number) return false; - }*/ + } } return false; } else if (description instanceof ObjectMaxCardinalityRestriction) { @@ -363,10 +363,10 @@ return false; // earyl abort: e.g. <= 5 hasStructure.Methyl; // if there are 6 fillers and 2 are not Methyl, the result is true - } /* else { + } else { if(roleFillers.size() - index <= number) return true; - } */ + } } return true; } else if (description instanceof BooleanValueRestriction) { Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-17 10:21:51 UTC (rev 714) +++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java 2008-03-19 08:11:19 UTC (rev 715) @@ -92,6 +92,10 @@ // maximum number of fillers for eeach role private Map<ObjectProperty,Integer> maxNrOfFillers = new TreeMap<ObjectProperty,Integer>(); + // limit for cardinality restrictions (this makes sense if we e.g. have compounds with up to + // more than 200 atoms but we are only interested in atoms with certain characteristics and do + // not want something like e.g. >= 204 hasAtom.NOT Carbon-87; which blows up the search space + private int cardinalityLimit = 5; // start concept (can be used to start from an arbitrary concept, needs // to be Thing or NamedClass), note that when you use e.g. Compound as @@ -105,7 +109,7 @@ private int topRefinementsLength = 0; private Map<NamedClass, Integer> topARefinementsLength = new TreeMap<NamedClass, Integer>(); // M is finite and this value is the maximum length of any value in M - private static int mMaxLength = 3; + private static int mMaxLength = 4; // the sets M_\top and M_A private Map<Integer,SortedSet<Description>> m = new TreeMap<Integer,SortedSet<Description>>(); @@ -194,15 +198,22 @@ } // determine the maximum number of fillers for each role + // (up to a specified cardinality maximum) + if(useCardinalityRestrictions) { for(ObjectProperty op : rs.getAtomicRoles()) { int maxFillers = 0; Map<Individual,SortedSet<Individual>> opMembers = rs.getRoleMembers(op); for(SortedSet<Individual> inds : opMembers.values()) { if(inds.size()>maxFillers) maxFillers = inds.size(); + if(maxFillers >= cardinalityLimit) { + maxFillers = cardinalityLimit; + break; + } } maxNrOfFillers.put(op, maxFillers); } + } /* String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\". TOP)))"; @@ -412,23 +423,41 @@ // rule 4: ALL r.D => <= (maxFillers-1) r.D // (length increases by 1 so we have to check whether max length is sufficient) - if(useCardinalityRestrictions) { - if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) { - ObjectMaxCardinalityRestriction max = new ObjectMaxCardinalityRestriction(maxNrOfFillers.get(ar)-1,role,description.getChild(0)); - refinements.add(max); - } - } + // => commented out because this is acutally not a downward refinement +// if(useCardinalityRestrictions) { +// if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) { +// ObjectMaxCardinalityRestriction max = new ObjectMaxCardinalityRestriction(maxNrOfFillers.get(ar)-1,role,description.getChild(0)); +// refinements.add(max); +// } +// } } else if (description instanceof ObjectCardinalityRestriction) { + ObjectPropertyExpression role = ((ObjectCardinalityRestriction)description).getRole(); + Description range = opRanges.get(role); + int number = ((ObjectCardinalityRestriction)description).getCardinality(); if(description instanceof ObjectMaxCardinalityRestriction) { - // <= x r.C => <= (x-1) r.C + // rule 1: <= x r.C => <= x r.D + tmp = refine(description.getChild(0), maxLength-3, null, range); + + for(Description d : tmp) { + refinements.add(new ObjectMaxCardinalityRestriction(number,role,d)); + } + + // rule 2: <= x r.C => <= (x-1) r.C ObjectMaxCardinalityRestriction max = (ObjectMaxCardinalityRestriction) description; - int number = max.getNumber(); - if(number > 0) +// int number = max.getNumber(); + if(number > 1) refinements.add(new ObjectMaxCardinalityRestriction(number-1,max.getRole(),max.getChild(0))); + } else if(description instanceof ObjectMinCardinalityRestriction) { + tmp = refine(description.getChild(0), maxLength-3, null, range); + + for(Description d : tmp) { + refinements.add(new ObjectMinCardinalityRestriction(number,role,d)); + } + // >= x r.C => >= (x+1) r.C ObjectMinCardinalityRestriction min = (ObjectMinCardinalityRestriction) description; - int number = min.getNumber(); +// int number = min.getNumber(); if(number < maxNrOfFillers.get(min.getRole())) refinements.add(new ObjectMinCardinalityRestriction(number+1,min.getRole(),min.getChild(0))); } @@ -723,7 +752,7 @@ long mComputationTimeStartNs = System.nanoTime(); // initialise all possible lengths (1 to 3) - for(int i=1; i<=3; i++) { + for(int i=1; i<=mMaxLength; i++) { m.put(i, new TreeSet<Description>(conceptComparator)); } @@ -777,6 +806,15 @@ m.put(3,m3); + SortedSet<Description> m4 = new TreeSet<Description>(conceptComparator); + if(useCardinalityRestrictions) { + for(ObjectProperty r : rs.getMostGeneralRoles()) { + int maxFillers = maxNrOfFillers.get(r); + m4.add(new ObjectMaxCardinalityRestriction(maxFillers-1, r, new Thing())); + } + } + m.put(4,m4); + mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; } @@ -866,6 +904,15 @@ mA.get(nc).put(3,m3); + SortedSet<Description> m4 = new TreeSet<Description>(conceptComparator); + if(useCardinalityRestrictions) { + for(ObjectProperty r : mgr.get(nc)) { + int maxFillers = maxNrOfFillers.get(r); + m4.add(new ObjectMaxCardinalityRestriction(maxFillers-1, r, new Thing())); + } + } + mA.get(nc).put(4,m4); + mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |