[DL-Learner SVN] SF.net SVN: dl-learner:[2197] trunk/src/dl-learner/org/dllearner

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2197
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2197&view=rev
Author:   jenslehmann
Date:     2010-07-14 08:58:23 +0000 (Wed, 14 Jul 2010)

Log Message:
-----------
- further work unit tests for heuristics of class learning problem
- max. execution time of computing a heuristic value can now be configured in class learning problem (avoids that the algorithm gets stuck in case a heuristic value is very expensive to compute)

Modified Paths:
--------------
    trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java
    trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java
    trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
    trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java

Modified: trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java
===================================================================

--- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java	2010-07-13 17:02:04 UTC (rev 2196)
+++ trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java	2010-07-14 08:58:23 UTC (rev 2197)
@@ -108,6 +108,15 @@
 public boolean getCheckConsistency() {
 return (Boolean) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem,  "checkConsistency") ;
 }
+/**
+* maxExecutionTimeInSeconds algorithm will stop after specified seconds.
+* mandatory: false| reinit necessary: true
+* default value: 10
+* @return int 
+**/
+public int getMaxExecutionTimeInSeconds() {
+return (Integer) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem,  "maxExecutionTimeInSeconds") ;
+}
 
 /**
 * @param classToDescribe class of which a description should be learned.
@@ -162,6 +171,15 @@
 ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "checkConsistency", checkConsistency);
 reinitNecessary = true;
 }
+/**
+* @param maxExecutionTimeInSeconds algorithm will stop after specified seconds.
+* mandatory: false| reinit necessary: true
+* default value: 10
+**/
+public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) {
+ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds);
+reinitNecessary = true;
+}
 
 /**
 * true, if this component needs reinitializsation.

Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2010-07-13 17:02:04 UTC (rev 2196)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2010-07-14 08:58:23 UTC (rev 2197)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) 2007-2009, Jens Lehmann
+ * Copyright (C) 2007-2010, Jens Lehmann
  *
  * This file is part of DL-Learner.
  * 
@@ -34,6 +34,7 @@
 import org.dllearner.core.ReasonerComponent;
 import org.dllearner.core.configurators.ClassLearningProblemConfigurator;
 import org.dllearner.core.options.BooleanConfigOption;
+import org.dllearner.core.options.CommonConfigOptions;
 import org.dllearner.core.options.ConfigOption;
 import org.dllearner.core.options.DoubleConfigOption;
 import org.dllearner.core.options.StringConfigOption;
@@ -45,6 +46,7 @@
 import org.dllearner.core.owl.NamedClass;
 import org.dllearner.core.owl.Negation;
 import org.dllearner.core.owl.SubClassAxiom;
+import org.dllearner.learningproblems.Heuristics.HeuristicType;
 import org.dllearner.utilities.Helper;
 
 /**
@@ -59,36 +61,28 @@
 	private static Logger logger = Logger.getLogger(ClassLearningProblem.class);
     private long nanoStartTime;
 	private static int maxExecutionTimeInSeconds = 10;
-
-	// TODO: naming needs to be cleaned up for consistency:
-	// coverage => recall
-	// protusion => precision
 	
 	private NamedClass classToDescribe;
 	private List<Individual> classInstances;
 	private TreeSet<Individual> classInstancesSet;
 	private boolean equivalence = true;
 	private ClassLearningProblemConfigurator configurator;
-	// approximation of accuracy +- 0.05 %
+	// approximation of accuracy
 	private double approx = 0.05;
 	
 	private boolean useApproximations;
-//	private boolean useFMeasure;
 	
-	// factor for higher weight on coverage (needed for subclass learning)
+	// factor for higher weight on recall (needed for subclass learning)
 	private double coverageFactor;
 	
 	// instances of super classes excluding instances of the class itself
 	private List<Individual> superClassInstances;
 	// instances of super classes including instances of the class itself
 	private List<Individual> classAndSuperClassInstances;
-	
 	// specific variables for generalised F-measure
-//	private Set<Individual> dcPos; => not need, is the same as classInstances
 	private TreeSet<Individual> negatedClassInstances;
 	
-	private enum HeuristicType { PRED_ACC, OWN, JACCARD, FMEASURE, GEN_FMEASURE };
-	private HeuristicType heuristic = HeuristicType.OWN;
+	private HeuristicType heuristic = HeuristicType.AMEASURE;
 	
 	@Override
 	public ClassLearningProblemConfigurator getConfigurator(){
@@ -116,7 +110,8 @@
 		accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "pred_acc", "generalised_fmeasure", "jaccard"});
 		options.add(accMethod);
 		BooleanConfigOption consistency = new BooleanConfigOption("checkConsistency", "Specify whether to check consistency for solution candidates. This is convenient for user interfaces, but can be performance intensive.", true);
-		options.add(consistency);		
+		options.add(consistency);	
+		options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
 		return options;
 	}
 
@@ -131,7 +126,7 @@
 		
 		String accM = configurator.getAccuracyMethod();
 		if(accM.equals("standard")) {
-			heuristic = HeuristicType.OWN;
+			heuristic = HeuristicType.AMEASURE;
 		} else if(accM.equals("fmeasure")) {
 			heuristic = HeuristicType.FMEASURE;
 		} else if(accM.equals("generalised_fmeasure")) {
@@ -142,7 +137,7 @@
 			heuristic = HeuristicType.PRED_ACC;
 		}
 		
-		if(useApproximations && !(heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE))) {
+		if(useApproximations && !(heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) {
 			throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\"");
 		}
 		
@@ -161,6 +156,7 @@
 		
 		classInstancesSet = new TreeSet<Individual>(classInstances);
 		equivalence = (configurator.getType().equals("equivalence"));
+		maxExecutionTimeInSeconds = configurator.getMaxExecutionTimeInSeconds();
 		
 		if(equivalence) {
 			coverageFactor = 1;
@@ -199,25 +195,13 @@
 		
 //		System.out.println(classInstances.size() + " " + superClassInstances.size());
 	}
-	
-	/**
-	 * Computes the fraction of the instances of the class to learn, which 
-	 * is covered by the given description.
-	 * @param description The description for which to compute coverage.
-	 * @return The class coverage (between 0 and 1).
-	 */
-//	public double getCoverage(Description description) {
-//		int instancesCovered = 0;
-//		for(Individual instance : classInstances) {
-//			if(reasoner.hasType(description, instance)) {
-//				instancesCovered++;
-//			}
-//		}
-//		return instancesCovered/(double)classInstances.size();
-//	}
-	
+		
 	@Override
 	public ClassScore computeScore(Description description) {
+		
+		// TODO: reuse code to ensure that we never return inconsistent results
+		// between getAccuracy, getAccuracyOrTooWeak and computeScore
+		
 		// overhang
 		Set<Individual> additionalInstances = new TreeSet<Individual>();
 		for(Individual ind : superClassInstances) {
@@ -234,16 +218,16 @@
 			}
 		}
 		
-		double coverage = coveredInstances.size()/(double)classInstances.size();
-		double protusion = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size());
+		double recall = coveredInstances.size()/(double)classInstances.size();
+		double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size());
 		// for each description with less than 100% coverage, we check whether it is
 		// leads to an inconsistent knowledge base
 		
 		double acc = 0;
 		if(heuristic.equals(HeuristicType.FMEASURE)) {
-			acc = getFMeasure(coverage, protusion);
-		} else if(heuristic.equals(HeuristicType.OWN)) {
-			acc = getAccuracy(coverage, protusion);
+			acc = getFMeasure(recall, precision);
+		} else if(heuristic.equals(HeuristicType.AMEASURE)) {
+			acc = getAccuracy(recall, precision);
 		} else {
 			// TODO: some superfluous instance checks are required to compute accuracy => 
 			// move accuracy computation here if possible 
@@ -264,10 +248,10 @@
 			boolean isConsistent = followsFromKB || isConsistent(description);
 			
 //			double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion);
-			return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), coverage, additionalInstances, protusion, acc, isConsistent, followsFromKB);
+			return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB);
 		
 		} else {
-			return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), coverage, additionalInstances, protusion, acc);
+			return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc);
 		}
 	}	
 	
@@ -280,27 +264,8 @@
 	 */
 	@Override
 	public double getAccuracy(Description description) {
-		
-		// overhang
-		int additionalInstances = 0;
-		for(Individual ind : superClassInstances) {
-			if(reasoner.hasType(description, ind)) {
-				additionalInstances++;
-			}
-		}
-		
-		// coverage
-		int coveredInstances = 0;
-		for(Individual ind : classInstances) {
-			if(reasoner.hasType(description, ind)) {
-				coveredInstances++;
-			}
-		}
-		
-		double coverage = coveredInstances/(double)classInstances.size();
-		double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances);
-		
-		return getAccuracy(coverage, protusion);
+		// a noise value of 1.0 means that we never return too weak (-1.0) 
+		return getAccuracyOrTooWeak(description, 1.0);
 	}
 
 	@Override
@@ -501,14 +466,10 @@
 				}
 			}
 					
-			// TODO: easier computation |R(A) \cap R(C)| / |R(A) \cup R(C)|
-			
-			// for Jaccard: covered instances is the intersection of the sets
-			// R(A) and R(C); 
 			Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet);
-			return (1 - (union.size() - coveredInstancesSet.size()) / (double) union.size());
+			return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());
 			
-		} else if (heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) {
+		} else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) {
 			
 			// computing R(C) restricted to relevant instances
 			int additionalInstances = 0;
@@ -542,7 +503,7 @@
 			double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
 
 			
-			if(heuristic.equals(HeuristicType.OWN)) {
+			if(heuristic.equals(HeuristicType.AMEASURE)) {
 				// best reachable concept has same recall and precision 1:
 				// 1/t+1 * (t*r + 1)
 				if((coverageFactor*recall+1)/(double)(coverageFactor+1) <(1-noise)) {

Modified: trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	2010-07-13 17:02:04 UTC (rev 2196)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	2010-07-14 08:58:23 UTC (rev 2197)
@@ -29,6 +29,8 @@
  */
 public class Heuristics {
 
+	public static enum HeuristicType { PRED_ACC, AMEASURE, JACCARD, FMEASURE, GEN_FMEASURE };	
+	
 	/**
 	 * Computes F1-Score.
 	 * @param recall Recall.
@@ -164,4 +166,10 @@
 		return (noise * nrOfPositiveExamples) < nrOfNegClassifiedPositives;
 	}
 	
+	// TODO: F-Measure mit bereits gemessenem |R(A) \cap R(C)| und |R(C)\R(A)| soll approximiert werden
+	public double getFMeasureApproximation() {
+		// TOOD: return mean and interval length?
+		return 0;
+	}
+	
 }

Modified: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	2010-07-13 17:02:04 UTC (rev 2196)
+++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	2010-07-14 08:58:23 UTC (rev 2197)
@@ -19,10 +19,23 @@
  */
 package org.dllearner.test.junit;
 
+import static org.junit.Assert.*;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import org.dllearner.core.ComponentInitException;
+import org.dllearner.core.ComponentManager;
+import org.dllearner.core.KnowledgeSource;
+import org.dllearner.core.ReasonerComponent;
 import org.dllearner.core.owl.ClassAssertionAxiom;
+import org.dllearner.core.owl.Description;
 import org.dllearner.core.owl.Individual;
 import org.dllearner.core.owl.KB;
 import org.dllearner.core.owl.NamedClass;
+import org.dllearner.kb.KBFile;
+import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.reasoning.OWLAPIReasoner;
 import org.junit.Test;
 
 /**
@@ -33,22 +46,80 @@
  */
 public class HeuristicTests {
 
+	// when comparing heuristic values, this is the maximum allowed difference between actual and returned value
+	// (there can always be precision errors, so cannot assume that actual and returned values are exactly equal)
+	private static double delta = 0.000001;
+	
 	@Test
-	public void classLearningTests() {
+	public void classLearningTests() throws ComponentInitException, MalformedURLException {
 		// create artificial ontology
 		KB kb = new KB();
+		String ns = "http://dl-learner.org/junit/";
 		NamedClass[] nc = new NamedClass[5];
 		for(int i=0; i<5; i++) {
-			nc[i] = new NamedClass("A" + i);
+			nc[i] = new NamedClass(ns + "A" + i);
 		}
-		Individual[] ind = new Individual[5];
+		Individual[] ind = new Individual[100];
 		for(int i=0; i<100; i++) {
-			ind[i] = new Individual("a" + i);
+			ind[i] = new Individual(ns + "i" + i);
 		}
 		
+		// A1 has instances i0 to i19 
 		for(int i=0; i<20; i++) {
 			kb.addAxiom(new ClassAssertionAxiom(nc[0],ind[i]));
 		}
+		
+		// A2 has instances i10 to i29
+		for(int i=10; i<30; i++) {
+			kb.addAxiom(new ClassAssertionAxiom(nc[1],ind[i]));
+		}
+		
+		ComponentManager cm = ComponentManager.getInstance();
+		KnowledgeSource ks = new KBFile(kb);
+		ReasonerComponent reasoner = cm.reasoner(OWLAPIReasoner.class, ks);
+		ClassLearningProblem problem = cm.learningProblem(ClassLearningProblem.class, reasoner);
+		ks.init();
+		reasoner.init();
+
+		// evaluate A2 wrt. A1 using Jaccard
+		HeuristicTests.configureClassLP(problem, nc[0], "jaccard");
+		// the value should be 10 (i10-i19) divided by 30 (i0-i29)
+		assertEqualsClassLP(problem, nc[1], 1/(double)3);
+
 	}
 	
+	// the class learning problem provides several ways to get the accuracy of a description, this method
+	// tests all of those
+	private static void assertEqualsClassLP(ClassLearningProblem problem, Description description, double accuracy) {
+		assertEquals(accuracy, problem.getAccuracy(description), delta);
+		assertEquals(accuracy, problem.getAccuracyOrTooWeak(description, 1.0), delta);
+		assertEquals(accuracy, problem.computeScore(description).getAccuracy(), delta);
+		assertEquals(accuracy, problem.evaluate(description).getAccuracy(), delta);
+	}
+	
+	// convencience method to set the learning problem to a desired configuration (approximations disabled)
+	private static void configureClassLP(ClassLearningProblem problem, NamedClass classToDescribe, String accuracyMethod) throws ComponentInitException {
+		try {
+			problem.getConfigurator().setClassToDescribe(new URL(classToDescribe.getName()));
+		} catch (MalformedURLException e) {
+			e.printStackTrace();
+		}
+		problem.getConfigurator().setAccuracyMethod(accuracyMethod);
+		problem.getConfigurator().setUseApproximations(false);
+		problem.init();		
+	}
+	
+	// convencience method to set the learning problem to a desired configuration
+	private static void configureClassLP(ClassLearningProblem problem, NamedClass classToDescribe, String accuracyMethod, boolean useApproximations, double approxAccuracy) throws ComponentInitException {
+		try {
+			problem.getConfigurator().setClassToDescribe(new URL(classToDescribe.getName()));
+		} catch (MalformedURLException e) {
+			e.printStackTrace();
+		}
+		problem.getConfigurator().setAccuracyMethod(accuracyMethod);
+		problem.getConfigurator().setUseApproximations(useApproximations);
+		problem.getConfigurator().setApproxAccuracy(approxAccuracy);
+		problem.init();		
+	}
+	
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.