From: <jen...@us...> - 2009-11-23 14:35:30
|
Revision: 1914 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1914&view=rev Author: jenslehmann Date: 2009-11-23 14:35:21 +0000 (Mon, 23 Nov 2009) Log Message: ----------- added ability to optimise towards F-measure in class learning problems Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java Modified: trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java 2009-11-16 22:09:55 UTC (rev 1913) +++ trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java 2009-11-23 14:35:21 UTC (rev 1914) @@ -81,6 +81,15 @@ public boolean getUseApproximations() { return (Boolean) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem, "useApproximations") ; } +/** +* accuracyMethod Specifies, which method/function to use for computing accuracy.. +* mandatory: false| reinit necessary: true +* default value: standard +* @return String +**/ +public String getAccuracyMethod() { +return (String) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem, "accuracyMethod") ; +} /** * @param classToDescribe class of which a description should be learned. @@ -108,6 +117,15 @@ ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "useApproximations", useApproximations); reinitNecessary = true; } +/** +* @param accuracyMethod Specifies, which method/function to use for computing accuracy.. +* mandatory: false| reinit necessary: true +* default value: standard +**/ +public void setAccuracyMethod(String accuracyMethod) { +ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "accuracyMethod", accuracyMethod); +reinitNecessary = true; +} /** * true, if this component needs reinitializsation. Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-11-16 22:09:55 UTC (rev 1913) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2009-11-23 14:35:21 UTC (rev 1914) @@ -51,6 +51,10 @@ */ public class ClassLearningProblem extends LearningProblem { + // TODO: naming needs to be cleaned up for consistency: + // coverage => recall + // protusion => precision + private NamedClass classToDescribe; private List<Individual> classInstances; private boolean equivalence = true; @@ -59,6 +63,7 @@ private static final double approx = 0.05; private boolean useApproximations; + private boolean useFMeasure; // factor for higher weight on coverage (needed for subclass learning) private double coverageFactor; @@ -86,6 +91,9 @@ options.add(type); BooleanConfigOption approx = new BooleanConfigOption("useApproximations", "whether to use stochastic approximations for computing accuracy", true); options.add(approx); + StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); // or domain/range of a property. + accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "predacc"}); + options.add(accMethod); return options; } @@ -97,6 +105,7 @@ public void init() throws ComponentInitException { classToDescribe = new NamedClass(configurator.getClassToDescribe().toString()); useApproximations = configurator.getUseApproximations(); + useFMeasure = configurator.getAccuracyMethod().equals("fmeasure"); if(!reasoner.getNamedClasses().contains(classToDescribe)) { throw new ComponentInitException("The class \"" + configurator.getClassToDescribe() + "\" does not exist. Make sure you spelled it correctly."); @@ -173,7 +182,8 @@ // we check whether the axiom already follows from the knowledge base boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe); - return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, getAccuracy(coverage, protusion), isConsistent, followsFromKB); + double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion); + return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, acc, isConsistent, followsFromKB); } public boolean isEquivalenceProblem() { @@ -210,11 +220,19 @@ @Override public double getAccuracyOrTooWeak(Description description, double noise) { - if(useApproximations) { - return getAccuracyOrTooWeakApprox(description, noise); - } else { - return getAccuracyOrTooWeakExact(description, noise); - } +// if(useFMeasure) { +// if(useApproximations) { +// return getFMeasureOrTooWeakApprox(description, noise); +// } else { +// return getFMeasureOrTooWeakExact(description, noise); +// } +// } else { + if(useApproximations) { + return getAccuracyOrTooWeakApprox(description, noise); + } else { + return getAccuracyOrTooWeakExact(description, noise); + } +// } } // instead of using the standard operation, we use optimisation @@ -278,7 +296,7 @@ } } - double coverage = instancesCovered/(double)classInstances.size(); + double recall = instancesCovered/(double)classInstances.size(); // MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0); // MonitorFactory.add("aInstances","count", total); @@ -318,7 +336,7 @@ size = getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)); } else { // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate))); - size = getAccuracy(coverage, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(double)(instancesCovered+upperEstimate)); + size = getAccuracy(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(recall, instancesCovered/(double)(instancesCovered+upperEstimate)); } if(size < 0.1) { @@ -336,20 +354,26 @@ // since we measured/estimated accuracy only on instances outside A (superClassInstances // does not include instances of A), we need to add it in the denominator - double protusion = instancesCovered/(double)(instancesDescription+instancesCovered); + double precision = instancesCovered/(double)(instancesDescription+instancesCovered); if(instancesCovered + instancesDescription == 0) { - protusion = 0; + precision = 0; } // MonitorFactory.add("estimatedB","count", estimatedB ? 1 : 0); // MonitorFactory.add("bInstances","count", testsPerformed); - return getAccuracy(coverage, protusion); + // debug code to compare the two measures +// System.out.println("recall: " + recall); +// System.out.println("precision: " + precision); +// System.out.println("F-measure: " + getFMeasure(recall, precision)); +// System.out.println("standard acc: " + getAccuracy(recall, precision)); + +// return getAccuracy(recall, precision); + return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } public double getAccuracyOrTooWeakExact(Description description, double noise) { - // overhang int additionalInstances = 0; for(Individual ind : superClassInstances) { if(reasoner.hasType(description, ind)) { @@ -357,7 +381,6 @@ } } - // coverage int coveredInstances = 0; for(Individual ind : classInstances) { if(reasoner.hasType(description, ind)) { @@ -365,16 +388,15 @@ } } - double coverage = coveredInstances/(double)classInstances.size(); + double recall = coveredInstances/(double)classInstances.size(); - if(coverage < 1 - noise) { + if(recall < 1 - noise) { return -1; } -// double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances); - double protusion = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); + double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); - return getAccuracy(coverage, protusion); + return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision); } // @Deprecated @@ -389,12 +411,77 @@ // } // } + // please note that getting recall and precision wastes some computational + // resource, because both methods need to compute the covered instances + public double getRecall(Description description) { + int coveredInstances = 0; + for(Individual ind : classInstances) { + if(reasoner.hasType(description, ind)) { + coveredInstances++; + } + } + return coveredInstances/(double)classInstances.size(); + } + + public double getPrecision(Description description) { + + int additionalInstances = 0; + for(Individual ind : superClassInstances) { + if(reasoner.hasType(description, ind)) { + additionalInstances++; + } + } + + int coveredInstances = 0; + for(Individual ind : classInstances) { + if(reasoner.hasType(description, ind)) { + coveredInstances++; + } + } + + return (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); + } + + public double getPredictiveAccuracy() { + return 0; + } + + // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf + // for all methods below (currently dummies) + public double getMatchRate() { + return 0; + } + + public double getOmissionError() { + return 0; + } + + public double getInductionRate() { + return 0; + } + + public double getComissionError() { + return 0; + } + + public double getGeneralisedRecall() { + return 0; + } + + public double getGeneralisedPrecision() { + return 0; + } + // computes accuracy from coverage and protusion (changing this function may // make it necessary to change the appoximation too) private double getAccuracy(double coverage, double protusion) { return (coverageFactor * coverage + Math.sqrt(protusion)) / (coverageFactor + 1); } + private double getFMeasure(double recall, double precision) { + return 2 * precision * recall / (precision + recall); + } + // see paper: expression used in confidence interval estimation private static double p3(double p1, int total) { return 1.96 * Math.sqrt(p1*(1-p1)/(total+4)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |