From: <jen...@us...> - 2008-02-07 17:36:37
|
Revision: 515 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=515&view=rev Author: jenslehmann Date: 2008-02-07 09:36:33 -0800 (Thu, 07 Feb 2008) Log Message: ----------- started better example and noise handling Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java 2008-02-07 17:11:15 UTC (rev 514) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedNode.java 2008-02-07 17:36:33 UTC (rev 515) @@ -39,9 +39,9 @@ */ public class ExampleBasedNode { - // TODO: add example based variables here - @SuppressWarnings({"unused"}) + // example based variables here private Set<Individual> coveredPositives; + private Set<Individual> coveredNegatives; // TOP ist einfach das TOP-Konzept, also das einzige welches nicht evaluiert wird public enum QualityEvaluationMethod { TOP, REASONER, TOO_WEAK_LIST, OVERLY_GENERAL_LIST }; @@ -190,5 +190,21 @@ public void setQualityEvaluationMethod(QualityEvaluationMethod qualityEvaluationMethod) { this.qualityEvaluationMethod = qualityEvaluationMethod; } + + public Set<Individual> getCoveredPositives() { + return coveredPositives; + } + + public void setCoveredPositives(Set<Individual> coveredPositives) { + this.coveredPositives = coveredPositives; + } + + public Set<Individual> getCoveredNegatives() { + return coveredNegatives; + } + + public void setCoveredNegatives(Set<Individual> coveredNegatives) { + this.coveredNegatives = coveredNegatives; + } } Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java 2008-02-07 17:11:15 UTC (rev 514) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java 2008-02-07 17:36:33 UTC (rev 515) @@ -250,6 +250,7 @@ Helper.checkConcepts(rs, allowedConcepts); usedConcepts = allowedConcepts; } else if(ignoredConcepts != null) { + System.out.println(ignoredConcepts); usedConcepts = Helper.computeConceptsUsingIgnoreList(rs, ignoredConcepts); } else { usedConcepts = Helper.computeConcepts(rs); @@ -292,7 +293,7 @@ algHeuristic, // usedConcepts, // usedRoles, - noisePercentage, + noisePercentage/(double)100, writeSearchTree, replaceSearchTree, searchTreeFile, Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java =================================================================== --- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-02-07 17:11:15 UTC (rev 514) +++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java 2008-02-07 17:36:33 UTC (rev 515) @@ -21,6 +21,7 @@ import java.io.File; import java.text.DecimalFormat; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -28,6 +29,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.dllearner.algorithms.refinement.RefinementOperator; import org.dllearner.algorithms.refinement.RhoDown; @@ -35,6 +37,7 @@ import org.dllearner.core.ReasoningService; import org.dllearner.core.Score; import org.dllearner.core.dl.Concept; +import org.dllearner.core.dl.Individual; import org.dllearner.core.dl.MultiConjunction; import org.dllearner.core.dl.MultiDisjunction; import org.dllearner.core.dl.Top; @@ -56,6 +59,15 @@ * can be misclassified (everything above is considered a too weak * concept wrt. the noise percentage). * + * TODO: Solve subsumption problems. It is easy to implement approximate fast + * instance checks. However, subsumption is difficult to optimise. It should by + * analysed whether it is reasonable to perform instance comparisions instead + * of subsumption checks, e.g. a concept is only included in the search tree + * when it covers strictly less examples than its parent. Note, that this does + * not mean that concepts not bearing an advantage in classification are thrown + * away. They are just handled like improper refinements. [TODO: How does this + * differ from not checking subsumption at all?] + * * @author Jens Lehmann * */ @@ -68,8 +80,15 @@ private ReasoningService rs; private PosNegLP learningProblem; private PosOnlyDefinitionLP posOnlyLearningProblem; - private boolean posOnly = false; + private boolean posOnly = false; + private int nrOfExamples; + private int nrOfPositiveExamples; + private int nrOfNegativeExamples; + // noise regulates how many positives can be misclassified and when the algorithm terminates + private double noise = 0.0; + private int allowedMisclassifications = 0; + // search tree options private boolean writeSearchTree; private File searchTreeFile; @@ -153,7 +172,7 @@ ExampleBasedHeuristic heuristic, // Set<AtomicConcept> allowedConcepts, // Set<AtomicRole> allowedRoles, - double noisePercentage, + double noise, boolean writeSearchTree, boolean replaceSearchTree, File searchTreeFile, @@ -162,39 +181,56 @@ boolean useShortConceptConstruction ) { if(learningProblem instanceof PosNegLP) { - this.learningProblem = (PosNegLP) learningProblem; + PosNegLP lp = (PosNegLP) learningProblem; + this.learningProblem = lp; posOnly = false; + nrOfPositiveExamples = lp.getPositiveExamples().size(); + nrOfNegativeExamples = lp.getNegativeExamples().size(); } else if(learningProblem instanceof PosOnlyDefinitionLP) { - this.posOnlyLearningProblem = (PosOnlyDefinitionLP) learningProblem; + PosOnlyDefinitionLP lp = (PosOnlyDefinitionLP) learningProblem; + this.posOnlyLearningProblem = lp; posOnly = true; + nrOfPositiveExamples = lp.getPositiveExamples().size(); + nrOfNegativeExamples = lp.getPseudoNegatives().size(); } + nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples; this.rs = rs; this.operator = (RhoDown) operator; // initialise candidate set with heuristic as ordering candidates = new TreeSet<ExampleBasedNode>(heuristic); - // this.noisePercentage ... + this.noise = noise; this.writeSearchTree = writeSearchTree; this.replaceSearchTree = replaceSearchTree; this.searchTreeFile = searchTreeFile; this.useTooWeakList = useTooWeakList; this.useOverlyGeneralList = useOverlyGeneralList; this.useShortConceptConstruction = useShortConceptConstruction; + + logger.setLevel(Level.DEBUG); } public void start() { + + // calculate quality threshold required for a solution + allowedMisclassifications = (int) Math.round(noise * nrOfExamples); + // start search with most general concept Top top = new Top(); ExampleBasedNode topNode = new ExampleBasedNode(top); // top covers all negatives int coveredNegativeExamples = getNumberOfNegatives(); topNode.setCoveredNegativeExamples(coveredNegativeExamples); + topNode.setCoveredPositives(learningProblem.getPositiveExamples()); + topNode.setCoveredNegatives(learningProblem.getNegativeExamples()); candidates.add(topNode); candidatesStable.add(topNode); + // note that TOP may already be a solution - solutionFound = (coveredNegativeExamples == 0); - solutions = new LinkedList<Concept>(); - if(solutionFound) - solutions.add(top); + ExampleBasedNode bestNode = topNode; +// solutionFound = (coveredNegativeExamples == 0); +// solutions = new LinkedList<Concept>(); +// if(solutionFound) +// solutions.add(top); int loop = 0; @@ -202,14 +238,17 @@ while(!solutionFound && !stop) { - // besten Knoten nach Heuristik auswählen - ExampleBasedNode bestNode = candidates.last(); - // besten Knoten erweitern - // newCandidates = new TreeSet<Node>(nodeComparator); + printStatistics(false); + + // chose best node according to heuristics + bestNode = candidates.last(); + // extend best node newCandidates.clear(); + // TODO: why is the best node tempoariliy removed from the candidates set? candidates.remove(bestNode); extendNodeProper(bestNode, bestNode.getHorizontalExpansion()+1); candidates.add(bestNode); + // newCandidates has been filled during node expansion candidates.addAll(newCandidates); candidatesStable.addAll(newCandidates); @@ -373,7 +412,7 @@ propernessTestsAvoidedByTooWeakList++; conceptTestsTooWeakList++; propernessDetected = true; - tooWeakList.add(refinement); + // tooWeakList.add(refinement); // Knoten wird direkt erzeugt (es ist buganfällig zwei Plätze // zu haben, an denen Knoten erzeugt werden, aber es erscheint @@ -436,40 +475,12 @@ // schon existiert if(nonRedundant) { - // Knoten erzeugen + // newly created node ExampleBasedNode newNode = new ExampleBasedNode(refinement); // die -1 ist wichtig, da sonst keine gleich langen Refinements // für den neuen Knoten erlaubt wären z.B. person => male newNode.setHorizontalExpansion(refinement.getLength()-1); - - // hier finden Tests statt, die Retrieval-Anfrage vermeiden sollen - /* - Integer n = evaluationCache.get(concept); - // Konzept gefunden - if(n!=null) { - // Knoten erzeugen - Node newNode = new Node(refinement); - newNode.setHorizontalExpansion(refinement.getLength()-1); - node.addChild(newNode); - - // too weak - if(n==-1) { - newNode.setTooWeak(true); - // nicht too weak - } else { - // feststellen, ob proper => geht so nicht - // gleiche covered negatives bedeutet nicht improper - boolean proper = (n==node.getCoveredNegativeExamples()); - newNode.setCoveredNegativeExamples(n); - - } - // Konzept nicht gefunden => muss ausgewertet werden - } else { - toEvaluateConcepts.add(refinement); - } - */ - boolean qualityKnown = false; int quality = -2; @@ -487,9 +498,59 @@ if(!qualityKnown) { long propCalcReasoningStart2 = System.nanoTime(); conceptTestsReasoner++; - quality = coveredNegativesOrTooWeak(refinement); + + // quality = coveredNegativesOrTooWeak(refinement); + + // determine individuals which have not been covered yet (more efficient than full retrieval) + Set<Individual> coveredPositives = node.getCoveredPositives(); + Set<Individual> newlyCoveredPositives = new HashSet<Individual>(); + + // calculate how many pos. examples are not covered by the + // parent node of the refinement + int misclassifications = nrOfPositiveExamples - coveredPositives.size(); + + // iterate through all covered examples (examples which are not + // covered do not need to be tested, because they remain uncovered) + // TODO: DIG will be slow if we send each reasoner request individually + // (however if we send everything in one request, too many instance checks + // are performed => rely on fast instance checker [still to implement]) + for(Individual i : coveredPositives) { + // TODO: move code to a separate function + if(quality != -1) { + boolean covered = rs.instanceCheck(refinement, i); + if(!covered) + misclassifications++; + else + newlyCoveredPositives.add(i); + + if(misclassifications > allowedMisclassifications) + quality = -1; + + } + } + + Set<Individual> newlyCoveredNegatives = null; + if(quality != -1) { + Set<Individual> coveredNegatives = node.getCoveredNegatives(); + newlyCoveredNegatives = new HashSet<Individual>(); + + for(Individual i : coveredNegatives) { + boolean covered = rs.instanceCheck(refinement, i); + if(covered) + newlyCoveredNegatives.add(i); + } + } + propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2; newNode.setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.REASONER); + if(quality != -1) { + // quality is the number of misclassifications (if it is not too weak) + quality = (nrOfPositiveExamples - newlyCoveredPositives.size()) + + newlyCoveredNegatives.size(); + newNode.setCoveredNegatives(newlyCoveredNegatives); + newNode.setCoveredPositives(newlyCoveredPositives); + } + } if(quality == -1) { @@ -520,66 +581,6 @@ } - /* - Iterator<Concept> it = refinements.iterator(); - while(it.hasNext()) { - Concept refinement = it.next(); - if(refinement.getLength()>node.getHorizontalExpansion()) { - // Test auf properness - long propCalcReasoningStart = System.nanoTime(); - boolean isProper = !learningProblem.getReasoningService().subsumes(refinement, concept); - propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart; - - if(isProper) { - long redundancyCheckTimeNsStart = System.nanoTime(); - boolean nonRedundant = properRefinements.add(refinement); - redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart; - - if(!nonRedundant) - redundantConcepts++; - - // es wird nur ein neuer Knoten erzeugt, falls das Konzept nicht - // schon existiert - if(nonRedundant) { - - // Knoten erzeugen - Node newNode = new Node(refinement); - // die -1 ist wichtig, da sonst keine gleich langen Refinements - // für den neuen Knoten erlaubt wären z.B. person => male - newNode.setHorizontalExpansion(refinement.getLength()-1); - node.addChild(newNode); - - // Qualität des Knotens auswerten - long propCalcReasoningStart2 = System.nanoTime(); - int quality = learningProblem.coveredNegativeExamplesOrTooWeak(refinement); - propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2; - - if(quality == -1) { - newNode.setTooWeak(true); - } else { - // Lösung gefunden - if(quality == 0) { - solutionFound = true; - solutions.add(refinement); - } - - newNode.setCoveredNegativeExamples(quality); - newCandidates.add(newNode); - - // System.out.print("."); - } - } - - // jedes proper Refinement wird gelöscht - it.remove(); - - } - } - } - */ - - - // es sind jetzt noch alle Konzepte übrig, die improper refinements sind // auf jedem dieser Konzepte wird die Funktion erneut aufgerufen, da sich // proper refinements ergeben könnten @@ -662,6 +663,7 @@ + conceptTestsTooWeakList + "/" + conceptTestsOverlyGeneralList + "/" + redundantConcepts); } + @SuppressWarnings({"unused"}) private int coveredNegativesOrTooWeak(Concept concept) { if(posOnly) return posOnlyLearningProblem.coveredPseudoNegativeExamplesOrTooWeak(concept); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |