From: <jen...@us...> - 2009-03-20 13:38:20
|
Revision: 1656 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1656&view=rev Author: jenslehmann Date: 2009-03-20 13:38:09 +0000 (Fri, 20 Mar 2009) Log Message: ----------- implemented accuracy approximation based on draft article Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-20 11:21:30 UTC (rev 1655) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-20 13:38:09 UTC (rev 1656) @@ -40,6 +40,8 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.utilities.Helper; +import com.jamonapi.MonitorFactory; + /** * The problem of learning the description of an existing class * in an OWL ontology. @@ -190,60 +192,112 @@ int maxNotCovered = (int) Math.ceil(minAccuracy*classInstances.size()); int instancesCovered = 0; int instancesNotCovered = 0; + int total = 0; + boolean estimatedA = false; + double lowerBorderA = 0; + int lowerEstimateA = 0; + double upperBorderA = 1; + int upperEstimateA = classInstances.size(); + for(Individual ind : classInstances) { if(reasoner.hasType(description, ind)) { instancesCovered++; -// System.out.println("covered"); } else { -// System.out.println(ind + " not covered."); instancesNotCovered ++; if(instancesNotCovered > maxNotCovered) { return -1; } } + + // approximation step (starting after 10 tests) + total = instancesCovered + instancesNotCovered; + if(total > 10) { + // compute confidence interval + double p1 = p1(instancesCovered, total); + double p2 = p3(p1, total); + lowerBorderA = Math.max(0, p1 - p2); + upperBorderA = Math.min(1, p1 + p2); + double size = upperBorderA - lowerBorderA; + // if the interval has a size smaller than 10%, we can be confident + if(size < 0.1) { + // we have to distinguish the cases that the accuracy limit is + // below, within, or above the limit and that the mean is below + // or above the limit + double mean = instancesCovered/(double)total; + + // if the mean is greater than the required minimum, we can accept; + // we also accept if the interval is small and close to the minimum + // (worst case is to accept a few inaccurate descriptions) + if(mean > minAccuracy || (upperBorderA > mean && size < 0.03)) { + instancesCovered = (int) (instancesCovered/(double)total * classInstances.size()); + upperEstimateA = (int) upperBorderA * classInstances.size(); + lowerEstimateA = (int) lowerBorderA * classInstances.size(); + estimatedA = true; + break; + } + + // reject only if the upper border is far away (we are very + // certain not to lose a potential solution) + if(upperBorderA + 0.1 < minAccuracy) { + return -1; + } + } + } } double coverage = instancesCovered/(double)classInstances.size(); + MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); + MonitorFactory.add("aInstances","count", total); + // we know that a definition candidate is always subclass of the - // intersection of all super classes, so we test only the relevent instances + // intersection of all super classes, so we test only the relevant instances // (leads to undesired effects for descriptions not following this rule, // but improves performance a lot); // for learning a superclass of a defined class, similar observations apply; - // we only test 10 * instances covered; while this is only an - // approximation, it is unlikely that further tests will have any - // significant impact on the overall accuracy - int maxTests = 10 * instancesCovered; -// int tests = Math.min(maxTests, superClassInstances.size()); + int testsPerformed = 0; int instancesDescription = 0; + boolean estimatedB = false; for(Individual ind : superClassInstances) { - -// System.out.println(ind); - + if(reasoner.hasType(description, ind)) { -// System.out.println("ind: " + ind); instancesDescription++; } testsPerformed++; - if(testsPerformed > maxTests) { -// System.out.println(testsPerformed); -// System.out.println("estimating accuracy by random sampling"); - // estimate for the number of instances of the description - instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size()); - break; + if(testsPerformed > 10) { + + // compute confidence interval + double p1 = p1(instancesDescription, testsPerformed); + double p2 = p3(p1, testsPerformed); + double lowerBorder = Math.max(0, p1 - p2); + double upperBorder = Math.min(1, p1 + p2); + int lowerEstimate = (int) lowerBorder * superClassInstances.size(); + int upperEstimate = (int) upperBorder * superClassInstances.size(); + + double size; + if(estimatedA) { +// size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); + size = getAccuracy(upperBorderA, upperEstimateA/(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(lowerEstimateA+upperEstimate)); + } else { +// size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); + size = getAccuracy(coverage, instancesCovered/(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(instancesCovered+upperEstimate)); + } + + if(size < 0.1) { + estimatedB = true; + // calculate total number of instances + instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size()); + break; + } } } -// System.out.println(description); -// System.out.println("A and C: " + instancesCovered); -// System.out.println("instances description: " + instancesDescription); - // since we measured/estimated accuracy only on instances outside A (superClassInstances // does not include instances of A), we need to add it in the denominator double protusion = instancesCovered/(double)(instancesDescription+instancesCovered); @@ -251,14 +305,9 @@ protusion = 0; } -// System.out.println(description); -// System.out.println(instancesDescription); -// System.out.println("prot: " + protusion); - -// double acc = (coverageFactor * coverage + protusion) / (coverageFactor + 1); - -// System.out.println("acc: " + acc); - + MonitorFactory.add("estimatedB","count", estimatedB ? 1 : 0); + MonitorFactory.add("bInstances","count", testsPerformed); + return getAccuracy(coverage, protusion); } @@ -275,11 +324,29 @@ return -1; } } - + + // computes accuracy from coverage and protusion (changing this function may + // make it necessary to change the appoximation too) private double getAccuracy(double coverage, double protusion) { return (coverageFactor * coverage + Math.sqrt(protusion)) / (coverageFactor + 1); } + // see paper: expression used in confidence interval estimation + private static double p3(double p1, int total) { + return 1.96 * Math.sqrt(p1*(1-p1)/(total+4)); + } + + // see paper: expression used in confidence interval estimation +// private static double p2(int success, int total) { +// double p1 = p1(success, total); +// return 1.96 * Math.sqrt(p1*(1-p1)/(total+4)); +// } + + // see paper: p' + private static double p1(int success, int total) { + return (success+2)/(double)(total+4); + } + /** * @return the classToDescribe */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-03-20 14:15:20
|
Revision: 1657 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1657&view=rev Author: jenslehmann Date: 2009-03-20 14:15:15 +0000 (Fri, 20 Mar 2009) Log Message: ----------- tested and completed accuracy approximation Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-20 13:38:09 UTC (rev 1656) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-20 14:15:15 UTC (rev 1657) @@ -231,8 +231,8 @@ // (worst case is to accept a few inaccurate descriptions) if(mean > minAccuracy || (upperBorderA > mean && size < 0.03)) { instancesCovered = (int) (instancesCovered/(double)total * classInstances.size()); - upperEstimateA = (int) upperBorderA * classInstances.size(); - lowerEstimateA = (int) lowerBorderA * classInstances.size(); + upperEstimateA = (int) (upperBorderA * classInstances.size()); + lowerEstimateA = (int) (lowerBorderA * classInstances.size()); estimatedA = true; break; } @@ -248,8 +248,8 @@ double coverage = instancesCovered/(double)classInstances.size(); - MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); - MonitorFactory.add("aInstances","count", total); +// MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); +// MonitorFactory.add("aInstances","count", total); // we know that a definition candidate is always subclass of the // intersection of all super classes, so we test only the relevant instances @@ -260,7 +260,7 @@ int testsPerformed = 0; int instancesDescription = 0; - boolean estimatedB = false; +// boolean estimatedB = false; for(Individual ind : superClassInstances) { @@ -277,20 +277,24 @@ double p2 = p3(p1, testsPerformed); double lowerBorder = Math.max(0, p1 - p2); double upperBorder = Math.min(1, p1 + p2); - int lowerEstimate = (int) lowerBorder * superClassInstances.size(); - int upperEstimate = (int) upperBorder * superClassInstances.size(); + int lowerEstimate = (int) (lowerBorder * superClassInstances.size()); + int upperEstimate = (int) (upperBorder * superClassInstances.size()); double size; if(estimatedA) { // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); - size = getAccuracy(upperBorderA, upperEstimateA/(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(lowerEstimateA+upperEstimate)); + size = getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)); } else { // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); - size = getAccuracy(coverage, instancesCovered/(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(instancesCovered+upperEstimate)); + size = getAccuracy(coverage, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(double)(instancesCovered+upperEstimate)); } if(size < 0.1) { - estimatedB = true; +// System.out.println(instancesDescription + " of " + testsPerformed); +// System.out.println("interval from " + lowerEstimate + " to " + upperEstimate); +// System.out.println("size: " + size); + +// estimatedB = true; // calculate total number of instances instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size()); break; @@ -305,8 +309,8 @@ protusion = 0; } - MonitorFactory.add("estimatedB","count", estimatedB ? 1 : 0); - MonitorFactory.add("bInstances","count", testsPerformed); +// MonitorFactory.add("estimatedB","count", estimatedB ? 1 : 0); +// MonitorFactory.add("bInstances","count", testsPerformed); return getAccuracy(coverage, protusion); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-03-23 14:03:37
|
Revision: 1659 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1659&view=rev Author: jenslehmann Date: 2009-03-23 14:03:26 +0000 (Mon, 23 Mar 2009) Log Message: ----------- fixed incorrect handling of borderline case in accuracy computation Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-23 10:35:06 UTC (rev 1658) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-23 14:03:26 UTC (rev 1659) @@ -149,7 +149,7 @@ } double coverage = coveredInstances.size()/(double)classInstances.size(); - double protusion = additionalInstances.size() == 0 ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size()); + double protusion = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size()); return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, getAccuracy(coverage, protusion)); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-03-28 10:50:36
|
Revision: 1671 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1671&view=rev Author: jenslehmann Date: 2009-03-28 10:50:31 +0000 (Sat, 28 Mar 2009) Log Message: ----------- small improvement Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-27 21:43:54 UTC (rev 1670) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-03-28 10:50:31 UTC (rev 1671) @@ -153,7 +153,9 @@ double coverage = coveredInstances.size()/(double)classInstances.size(); double protusion = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size()); - boolean isConsistent = isConsistent(description); + // for each description with less than 100% coverage, we check whether it is + // leads to an inconsistent knowledge base + boolean isConsistent = coverage >= 0.999999 || isConsistent(description); return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, getAccuracy(coverage, protusion), isConsistent); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-06-18 11:55:20
|
Revision: 1805 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1805&view=rev Author: jenslehmann Date: 2009-06-18 11:53:56 +0000 (Thu, 18 Jun 2009) Log Message: ----------- small fix Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-06-18 07:53:24 UTC (rev 1804) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-06-18 11:53:56 UTC (rev 1805) @@ -345,6 +345,7 @@ } public double getAccuracyOrTooWeakExact(Description description, double noise) { + // overhang int additionalInstances = 0; for(Individual ind : superClassInstances) { @@ -367,7 +368,8 @@ return -1; } - double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances); +// double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances); + double protusion = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); return getAccuracy(coverage, protusion); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-12-19 09:53:06
|
Revision: 1933 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1933&view=rev Author: jenslehmann Date: 2009-12-19 09:52:58 +0000 (Sat, 19 Dec 2009) Log Message: ----------- implemented Jaccard distance metric for accuracy computation Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-17 15:18:17 UTC (rev 1932) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-19 09:52:58 UTC (rev 1933) @@ -42,6 +42,7 @@ import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.SubClassAxiom; +import org.dllearner.utilities.Helper; /** * The problem of learning the description of an existing class @@ -58,13 +59,14 @@ private NamedClass classToDescribe; private List<Individual> classInstances; + private TreeSet<Individual> classInstancesSet; private boolean equivalence = true; private ClassLearningProblemConfigurator configurator; // approximation of accuracy +- 0.05 % private double approx = 0.05; private boolean useApproximations; - private boolean useFMeasure; +// private boolean useFMeasure; // factor for higher weight on coverage (needed for subclass learning) private double coverageFactor; @@ -72,6 +74,9 @@ // instances of super classes excluding instances of the class itself private List<Individual> superClassInstances; + private enum HeuristicType { PRED_ACC, OWN, JACCARD, FMEASURE, GEN_FMEASURE }; + private HeuristicType heuristic = HeuristicType.OWN; + @Override public ClassLearningProblemConfigurator getConfigurator(){ return configurator; @@ -95,7 +100,7 @@ DoubleConfigOption approxAccuracy = new DoubleConfigOption("approxAccuracy", "accuracy of the approximation (only for expert use)", 0.05); options.add(approxAccuracy); StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); // or domain/range of a property. - accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "predacc"}); + accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "predacc", "generalised_fmeasure", "jaccard"}); options.add(accMethod); return options; } @@ -108,7 +113,25 @@ public void init() throws ComponentInitException { classToDescribe = new NamedClass(configurator.getClassToDescribe().toString()); useApproximations = configurator.getUseApproximations(); - useFMeasure = configurator.getAccuracyMethod().equals("fmeasure"); + + String accM = configurator.getAccuracyMethod(); + if(accM.equals("standard")) { + heuristic = HeuristicType.OWN; + } else if(accM.equals("fmeasure")) { + heuristic = HeuristicType.FMEASURE; + } else if(accM.equals("generalised_fmeasure")) { + heuristic = HeuristicType.GEN_FMEASURE; + } else if(accM.equals("jaccard")) { + heuristic = HeuristicType.JACCARD; + } else if(accM.equals("predacc")) { + heuristic = HeuristicType.PRED_ACC; + } + + if(useApproximations && !(heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE))) { + throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\""); + } + +// useFMeasure = configurator.getAccuracyMethod().equals("fmeasure"); approx = configurator.getApproxAccuracy(); if(!reasoner.getNamedClasses().contains(classToDescribe)) { @@ -116,6 +139,7 @@ } classInstances = new LinkedList<Individual>(reasoner.getIndividuals(classToDescribe)); + classInstancesSet = new TreeSet<Individual>(classInstances); equivalence = (configurator.getType().equals("equivalence")); if(equivalence) { @@ -188,7 +212,17 @@ // we check whether the axiom already follows from the knowledge base boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); - double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion); + double acc = 0; + if(heuristic.equals(HeuristicType.FMEASURE)) { + acc = getFMeasure(coverage, protusion); + } else if(heuristic.equals(HeuristicType.OWN)) { + acc = getAccuracy(coverage, protusion); + } else { + // TODO: some superfluous instance checks are required to compute accuracy => + // move accuracy computation here if possible + acc = getAccuracyOrTooWeakExact(description, 1); + } +// double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion); return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, acc, isConsistent, followsFromKB); } @@ -339,10 +373,10 @@ double size; if(estimatedA) { // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate))); - size = useFMeasure ? getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)) : getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)); + size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)) : getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)); } else { // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); - size = useFMeasure ? getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate)) : getAccuracy(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(recall, instancesCovered/(double)(instancesCovered+upperEstimate)); + size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate)) : getAccuracy(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(recall, instancesCovered/(double)(instancesCovered+upperEstimate)); } if(size < 0.1) { @@ -375,34 +409,64 @@ // System.out.println("standard acc: " + getAccuracy(recall, precision)); // return getAccuracy(recall, precision); - return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision); + return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } public double getAccuracyOrTooWeakExact(Description description, double noise) { - int additionalInstances = 0; - for(Individual ind : superClassInstances) { - if(reasoner.hasType(description, ind)) { - additionalInstances++; + if(heuristic.equals(HeuristicType.JACCARD)) { + + // computing R(C) restricted to relevant instances + TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>(); + for(Individual ind : superClassInstances) { + if(reasoner.hasType(description, ind)) { + additionalInstancesSet.add(ind); + } } - } - - int coveredInstances = 0; - for(Individual ind : classInstances) { - if(reasoner.hasType(description, ind)) { - coveredInstances++; + + // computing R(A) + TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>(); + for(Individual ind : classInstances) { + if(reasoner.hasType(description, ind)) { + coveredInstancesSet.add(ind); + } + } + + // for Jaccard: covered instances is the intersection of the sets + // R(A) and R(C); + Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); + return (1 - (union.size() - coveredInstancesSet.size()) / (double) union.size()); + + } else if (heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE)) { + + // computing R(C) restricted to relevant instances + int additionalInstances = 0; + for(Individual ind : superClassInstances) { + if(reasoner.hasType(description, ind)) { + additionalInstances++; + } } + + // computing R(A) + int coveredInstances = 0; + for(Individual ind : classInstances) { + if(reasoner.hasType(description, ind)) { + coveredInstances++; + } + } + + double recall = coveredInstances/(double)classInstances.size(); + + if(recall < 1 - noise) { + return -1; + } + + double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); + + return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } - double recall = coveredInstances/(double)classInstances.size(); - - if(recall < 1 - noise) { - return -1; - } - - double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); - - return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision); + throw new Error("ClassLearningProblem error: not implemented"); } // @Deprecated @@ -478,6 +542,13 @@ return 0; } + @SuppressWarnings("unused") + private double getInverseJaccardDistance(TreeSet<Individual> set1, TreeSet<Individual> set2) { + Set<Individual> intersection = Helper.intersection(set1, set2); + Set<Individual> union = Helper.union(set1, set2); + return 1 - (union.size() - intersection.size()) / (double) union.size(); + } + // computes accuracy from coverage and protusion (changing this function may // make it necessary to change the appoximation too) private double getAccuracy(double coverage, double protusion) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-12-19 10:54:53
|
Revision: 1934 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1934&view=rev Author: jenslehmann Date: 2009-12-19 10:54:30 +0000 (Sat, 19 Dec 2009) Log Message: ----------- implemented predictive accuracy for class learning (for evaluation purposes) Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-19 09:52:58 UTC (rev 1933) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-19 10:54:30 UTC (rev 1934) @@ -100,7 +100,7 @@ DoubleConfigOption approxAccuracy = new DoubleConfigOption("approxAccuracy", "accuracy of the approximation (only for expert use)", 0.05); options.add(approxAccuracy); StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); // or domain/range of a property. - accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "predacc", "generalised_fmeasure", "jaccard"}); + accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "pred_acc", "generalised_fmeasure", "jaccard"}); options.add(accMethod); return options; } @@ -123,7 +123,7 @@ heuristic = HeuristicType.GEN_FMEASURE; } else if(accM.equals("jaccard")) { heuristic = HeuristicType.JACCARD; - } else if(accM.equals("predacc")) { + } else if(accM.equals("pred_acc")) { heuristic = HeuristicType.PRED_ACC; } @@ -437,7 +437,7 @@ Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); return (1 - (union.size() - coveredInstancesSet.size()) / (double) union.size()); - } else if (heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE)) { + } else if (heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { // computing R(C) restricted to relevant instances int additionalInstances = 0; @@ -462,8 +462,17 @@ } double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); - - return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); + + if(heuristic.equals(HeuristicType.OWN)) { + return getAccuracy(recall, precision); + } else if(heuristic.equals(HeuristicType.FMEASURE)) { + return getFMeasure(recall, precision); + } else if(heuristic.equals(HeuristicType.PRED_ACC)) { + // correctly classified divided by all examples + return (coveredInstances + superClassInstances.size() - additionalInstances) / (double) (classInstances.size() + superClassInstances.size()); + } + +// return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } throw new Error("ClassLearningProblem error: not implemented"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2009-12-21 13:21:55
|
Revision: 1939 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1939&view=rev Author: jenslehmann Date: 2009-12-21 13:21:47 +0000 (Mon, 21 Dec 2009) Log Message: ----------- implemented generalised F-measure (preliminary) Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-21 12:12:05 UTC (rev 1938) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-21 13:21:47 UTC (rev 1939) @@ -37,10 +37,12 @@ import org.dllearner.core.options.StringConfigOption; import org.dllearner.core.options.URLConfigOption; import org.dllearner.core.owl.Axiom; +import org.dllearner.core.owl.ClassAssertionAxiom; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.EquivalentClassesAxiom; import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.Negation; import org.dllearner.core.owl.SubClassAxiom; import org.dllearner.utilities.Helper; @@ -73,7 +75,13 @@ // instances of super classes excluding instances of the class itself private List<Individual> superClassInstances; + // instances of super classes including instances of the class itself + private List<Individual> classAndSuperClassInstances; + // specific variables for generalised F-measure +// private Set<Individual> dcPos; => not need, is the same as classInstances + private TreeSet<Individual> negatedClassInstances; + private enum HeuristicType { PRED_ACC, OWN, JACCARD, FMEASURE, GEN_FMEASURE }; private HeuristicType heuristic = HeuristicType.OWN; @@ -155,6 +163,9 @@ for(Description superClass : superClasses) { superClassInstancesTmp.retainAll(reasoner.getIndividuals(superClass)); } + // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ... + classAndSuperClassInstances = new LinkedList<Individual>(superClassInstancesTmp); + // ... and a second list not including them superClassInstancesTmp.removeAll(classInstances); // since we use the instance list for approximations, we want to avoid // any bias through URI names, so we shuffle the list once pseudo-randomly @@ -163,7 +174,17 @@ Collections.shuffle(classInstances, rand); Collections.shuffle(superClassInstances, rand); - System.out.println(classInstances.size() + " " + superClassInstances.size()); + if(heuristic.equals(HeuristicType.GEN_FMEASURE)) { + Description classToDescribeNeg = new Negation(classToDescribe); + negatedClassInstances = new TreeSet<Individual>(); + for(Individual ind : superClassInstances) { + if(reasoner.hasType(classToDescribeNeg, ind)) { + negatedClassInstances.add(ind); + } + } + } + +// System.out.println(classInstances.size() + " " + superClassInstances.size()); } /** @@ -412,6 +433,11 @@ return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } + ////// + // TODO: Adaption to super class learning case needs to be made for each heuristic! + // TODO: noise parameter is not used by some heuristics + ////// + public double getAccuracyOrTooWeakExact(Description description, double noise) { if(heuristic.equals(HeuristicType.JACCARD)) { @@ -473,6 +499,38 @@ } // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision); + } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) { + + // implementation is based on: + // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf + // default negation should be turned off when using fast instance checker + + // compute I_C (negated and non-negated concepts separately) + TreeSet<Individual> icPos = new TreeSet<Individual>(); + TreeSet<Individual> icNeg = new TreeSet<Individual>(); + Description descriptionNeg = new Negation(description); + // loop through all relevant instances + for(Individual ind : classAndSuperClassInstances) { + if(reasoner.hasType(description, ind)) { + icPos.add(ind); + } else if(reasoner.hasType(descriptionNeg, ind)) { + icNeg.add(ind); + } + } + + // semantic precision + // first compute I_C \cap Cn(DC) + // => TODO: we ignore Cn for now, because it is not clear how to implement it + Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet); + Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances); + int tmp1Size = tmp1Pos.size() + tmp1Neg.size(); + + // Cn(I_C) \cap D_C is the same set if we ignore Cn ... + + double prec = tmp1Size / (double) (icPos.size() + icNeg.size()); + double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size()); + + return getFMeasure(rec,prec); } throw new Error("ClassLearningProblem error: not implemented"); @@ -565,7 +623,7 @@ } private double getFMeasure(double recall, double precision) { - return 2 * precision * recall / (precision + recall); + return (precision + recall == 0) ? 0 : 2 * precision * recall / (precision + recall); } // see paper: expression used in confidence interval estimation This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2010-01-01 14:11:16
|
Revision: 1979 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1979&view=rev Author: jenslehmann Date: 2010-01-01 14:11:08 +0000 (Fri, 01 Jan 2010) Log Message: ----------- bug fix Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-12-31 12:40:18 UTC (rev 1978) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2010-01-01 14:11:08 UTC (rev 1979) @@ -615,7 +615,7 @@ // return -1; // } // we only return too weak if there is no recall - if(rec <= 0.01) { + if(rec <= 0.0000001) { return -1; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |