[DL-Learner SVN] SF.net SVN: dl-learner:[1914] trunk/src/dl-learner/org/dllearner

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1914
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1914&view=rev
Author:   jenslehmann
Date:     2009-11-23 14:35:21 +0000 (Mon, 23 Nov 2009)

Log Message:
-----------
added ability to optimise towards F-measure in class learning problems

Modified Paths:
--------------
    trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java
    trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java

Modified: trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java
===================================================================

--- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java	2009-11-16 22:09:55 UTC (rev 1913)
+++ trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java	2009-11-23 14:35:21 UTC (rev 1914)
@@ -81,6 +81,15 @@
 public boolean getUseApproximations() {
 return (Boolean) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem,  "useApproximations") ;
 }
+/**
+* accuracyMethod Specifies, which method/function to use for computing accuracy..
+* mandatory: false| reinit necessary: true
+* default value: standard
+* @return String 
+**/
+public String getAccuracyMethod() {
+return (String) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem,  "accuracyMethod") ;
+}
 
 /**
 * @param classToDescribe class of which a description should be learned.
@@ -108,6 +117,15 @@
 ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "useApproximations", useApproximations);
 reinitNecessary = true;
 }
+/**
+* @param accuracyMethod Specifies, which method/function to use for computing accuracy..
+* mandatory: false| reinit necessary: true
+* default value: standard
+**/
+public void setAccuracyMethod(String accuracyMethod) {
+ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "accuracyMethod", accuracyMethod);
+reinitNecessary = true;
+}
 
 /**
 * true, if this component needs reinitializsation.

Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2009-11-16 22:09:55 UTC (rev 1913)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2009-11-23 14:35:21 UTC (rev 1914)
@@ -51,6 +51,10 @@
  */
 public class ClassLearningProblem extends LearningProblem {
 
+	// TODO: naming needs to be cleaned up for consistency:
+	// coverage => recall
+	// protusion => precision
+	
 	private NamedClass classToDescribe;
 	private List<Individual> classInstances;
 	private boolean equivalence = true;
@@ -59,6 +63,7 @@
 	private static final double approx = 0.05;
 	
 	private boolean useApproximations;
+	private boolean useFMeasure;
 	
 	// factor for higher weight on coverage (needed for subclass learning)
 	private double coverageFactor;
@@ -86,6 +91,9 @@
 		options.add(type);	
 		BooleanConfigOption approx = new BooleanConfigOption("useApproximations", "whether to use stochastic approximations for computing accuracy", true);
 		options.add(approx);
+		StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); //  or domain/range of a property.
+		accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "predacc"});
+		options.add(accMethod);
 		return options;
 	}
 
@@ -97,6 +105,7 @@
 	public void init() throws ComponentInitException {
 		classToDescribe = new NamedClass(configurator.getClassToDescribe().toString());
 		useApproximations = configurator.getUseApproximations();
+		useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
 		
 		if(!reasoner.getNamedClasses().contains(classToDescribe)) {
 			throw new ComponentInitException("The class \"" + configurator.getClassToDescribe() + "\" does not exist. Make sure you spelled it correctly.");
@@ -173,7 +182,8 @@
 		// we check whether the axiom already follows from the knowledge base
 		boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);
 		
-		return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, getAccuracy(coverage, protusion), isConsistent, followsFromKB);
+		double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion);
+		return new ClassScore(coveredInstances, coverage, additionalInstances, protusion, acc, isConsistent, followsFromKB);
 	}	
 	
 	public boolean isEquivalenceProblem() {
@@ -210,11 +220,19 @@
 
 	@Override
 	public double getAccuracyOrTooWeak(Description description, double noise) {
-		if(useApproximations) {
-			return getAccuracyOrTooWeakApprox(description, noise);
-		} else {
-			return getAccuracyOrTooWeakExact(description, noise);
-		}
+//		if(useFMeasure) {
+//			if(useApproximations) {
+//				return getFMeasureOrTooWeakApprox(description, noise);
+//			} else {
+//				return getFMeasureOrTooWeakExact(description, noise);
+//			}
+//		} else {
+			if(useApproximations) {
+				return getAccuracyOrTooWeakApprox(description, noise);
+			} else {
+				return getAccuracyOrTooWeakExact(description, noise);
+			}			
+//		}
 	}
 	
 	// instead of using the standard operation, we use optimisation
@@ -278,7 +296,7 @@
 			}
 		}	
 		
-		double coverage = instancesCovered/(double)classInstances.size();
+		double recall = instancesCovered/(double)classInstances.size();
 		
 //		MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
 //		MonitorFactory.add("aInstances","count", total);
@@ -318,7 +336,7 @@
 					size = getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate));					
 				} else {
 //					size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
-					size = getAccuracy(coverage, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(double)(instancesCovered+upperEstimate));
+					size = getAccuracy(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(recall, instancesCovered/(double)(instancesCovered+upperEstimate));
 				}
 				
 				if(size < 0.1) {
@@ -336,20 +354,26 @@
 		
 		// since we measured/estimated accuracy only on instances outside A (superClassInstances
 		// does not include instances of A), we need to add it in the denominator
-		double protusion = instancesCovered/(double)(instancesDescription+instancesCovered);
+		double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
 		if(instancesCovered + instancesDescription == 0) {
-			protusion = 0;
+			precision = 0;
 		}
 		
 //		MonitorFactory.add("estimatedB","count", estimatedB ? 1 : 0);
 //		MonitorFactory.add("bInstances","count", testsPerformed);		
 	
-		return getAccuracy(coverage, protusion);		
+		// debug code to compare the two measures
+//		System.out.println("recall: " + recall);
+//		System.out.println("precision: " + precision);
+//		System.out.println("F-measure: " + getFMeasure(recall, precision));
+//		System.out.println("standard acc: " + getAccuracy(recall, precision));
+		
+//		return getAccuracy(recall, precision);
+		return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision);
 	}
 	
 	public double getAccuracyOrTooWeakExact(Description description, double noise) {
 
-		// overhang
 		int additionalInstances = 0;
 		for(Individual ind : superClassInstances) {
 			if(reasoner.hasType(description, ind)) {
@@ -357,7 +381,6 @@
 			}
 		}
 		
-		// coverage
 		int coveredInstances = 0;
 		for(Individual ind : classInstances) {
 			if(reasoner.hasType(description, ind)) {
@@ -365,16 +388,15 @@
 			}
 		}
 		
-		double coverage = coveredInstances/(double)classInstances.size();
+		double recall = coveredInstances/(double)classInstances.size();
 		
-		if(coverage < 1 - noise) {
+		if(recall < 1 - noise) {
 			return -1;
 		}
 		
-//		double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances);
-		double protusion = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
+		double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
 		
-		return getAccuracy(coverage, protusion);		
+		return useFMeasure ? getFMeasure(recall, precision) : getAccuracy(recall, precision);		
 	}
 	
 //	@Deprecated
@@ -389,12 +411,77 @@
 //		}
 //	}
 	
+	// please note that getting recall and precision wastes some computational
+	// resource, because both methods need to compute the covered instances
+	public double getRecall(Description description) {
+		int coveredInstances = 0;
+		for(Individual ind : classInstances) {
+			if(reasoner.hasType(description, ind)) {
+				coveredInstances++;
+			}
+		}		
+		return coveredInstances/(double)classInstances.size();
+	}
+	
+	public double getPrecision(Description description) {
+
+		int additionalInstances = 0;
+		for(Individual ind : superClassInstances) {
+			if(reasoner.hasType(description, ind)) {
+				additionalInstances++;
+			}
+		}
+		
+		int coveredInstances = 0;
+		for(Individual ind : classInstances) {
+			if(reasoner.hasType(description, ind)) {
+				coveredInstances++;
+			}
+		}
+
+		return (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
+	}
+	
+	public double getPredictiveAccuracy() {
+		return 0;
+	}
+	
+	// see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
+	// for all methods below (currently dummies)
+	public double getMatchRate() {
+		return 0;
+	}
+	
+	public double getOmissionError() {
+		return 0;
+	}
+	
+	public double getInductionRate() {
+		return 0;
+	}
+	
+	public double getComissionError() {
+		return 0;
+	}
+	
+	public double getGeneralisedRecall() {
+		return 0;
+	}	
+	
+	public double getGeneralisedPrecision() {
+		return 0;
+	}		
+	
 	// computes accuracy from coverage and protusion (changing this function may
 	// make it necessary to change the appoximation too)
 	private double getAccuracy(double coverage, double protusion) {
 		return (coverageFactor * coverage + Math.sqrt(protusion)) / (coverageFactor + 1);
 	}
 	
+	private double getFMeasure(double recall, double precision) {
+		return 2 * precision * recall / (precision + recall);
+	}
+	
 	// see paper: expression used in confidence interval estimation
 	private static double p3(double p1, int total) {
 		return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.