[DL-Learner SVN] SF.net SVN: dl-learner:[2335] trunk

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2335
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2335&view=rev
Author:   jenslehmann
Date:     2010-09-24 12:08:03 +0000 (Fri, 24 Sep 2010)

Log Message:
-----------
implemented method for approximating predictive accuracy (experimental)

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java
    trunk/components-core/src/main/java/org/dllearner/learningproblems/ClassLearningProblem.java
    trunk/components-core/src/main/java/org/dllearner/learningproblems/Heuristics.java
    trunk/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStandard.java
    trunk/interfaces/src/main/java/org/dllearner/cli/ConfMapper.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -215,6 +215,7 @@
 		
 		// we put important parameters in class variables
 		noise = configurator.getNoisePercentage()/100d;
+//		System.out.println("noise " + noise);
 		maxDepth = configurator.getMaxDepth();
 		// (filterFollowsFromKB is automatically set to false if the problem
 		// is not a class learning problem
@@ -343,6 +344,7 @@
 		
 		int loop = 0;
 		while (!terminationCriteriaSatisfied()) {
+//			System.out.println("loop " + loop);
 			
 			if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) {
 				highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/ocel/OCEL.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -49,6 +49,7 @@
 import org.dllearner.core.owl.ObjectProperty;
 import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
 import org.dllearner.learningproblems.PosNegLP;
+import org.dllearner.learningproblems.PosNegLPStandard;
 import org.dllearner.learningproblems.PosOnlyLP;
 import org.dllearner.learningproblems.ScorePosNeg;
 import org.dllearner.reasoning.ReasonerType;
@@ -338,6 +339,16 @@
 			}
 		}
 		
+		// warn the user if he/she sets any non-standard heuristic, because it will just be ignored
+		if(learningProblem instanceof PosNegLPStandard) {
+			if(((PosNegLPStandard)learningProblem).getConfigurator().getUseApproximations()) {
+				System.err.println("You actived approximations for the considered learning problem, but OCEL does not support it. Option will be ignored. (Recommendation: Use CELOE instead.)");
+			}
+			if(!((PosNegLPStandard)learningProblem).getConfigurator().getAccuracyMethod().equals("predacc")) {
+				System.err.println("You have chosen a non-standard (predictive accuracy) heuristic in your learning problem, but OCEL does not support it. Option will be ignored. (Recommendation: Use CELOE instead.)");
+			}
+		}
+		
 		// compute used concepts/roles from allowed/ignored
 		// concepts/roles
 		if(allowedConcepts != null) {

Modified: trunk/components-core/src/main/java/org/dllearner/learningproblems/ClassLearningProblem.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/learningproblems/ClassLearningProblem.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/components-core/src/main/java/org/dllearner/learningproblems/ClassLearningProblem.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -141,7 +141,11 @@
 			heuristic = HeuristicType.PRED_ACC;
 		}
 		
-		if(useApproximations && !(heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) {
+		if(useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) {
+			System.err.println("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works.");
+		}		
+		
+		if(useApproximations && !(heuristic.equals(HeuristicType.PRED_ACC) || heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) {
 			throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\"");
 		}
 		

Modified: trunk/components-core/src/main/java/org/dllearner/learningproblems/Heuristics.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/learningproblems/Heuristics.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/components-core/src/main/java/org/dllearner/learningproblems/Heuristics.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -93,23 +93,21 @@
 		return elementsIntersection / (double) elementsUnion;
 	}
 	
-	public double getPredictiveAccuracy(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives) {
+	public static double getPredictiveAccuracy(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives) {
 		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
 	}	
 
-	public double getPredictiveAccuracy(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
-//		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
-		return 0;
+	public static double getPredictiveAccuracy(int nrOfPosExamples, int nrOfNegExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
+		return (nrOfPosClassifiedPositives + beta * nrOfNegClassifiedNegatives) / (double) (nrOfPosExamples + beta * nrOfNegExamples);
 	}		
 	
-	public double getPredictiveAccuracy2(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfPosClassifiedNegatives) {
+	public static double getPredictiveAccuracy2(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfPosClassifiedNegatives) {
 		return (nrOfPosClassifiedPositives + nrOfExamples - nrOfPosClassifiedNegatives) / (double) nrOfExamples;
 	}
 	
-	public double getPredictiveAccuracy2(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
-//		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
-		return 0;
-	}	
+	public static double getPredictiveAccuracy2(int nrOfPosExamples, int nrOfNegExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
+		return (nrOfPosClassifiedPositives + beta * nrOfNegClassifiedNegatives) / (double) (nrOfPosExamples + beta * nrOfNegExamples);
+	}
 	
 	/**
 	 * Computes the 95% confidence interval of an experiment with boolean outcomes,
@@ -176,7 +174,7 @@
 	 * @param beta Weights precision and recall. If beta is >1, then recall is more important
 	 * than precision.
 	 * @param nrOfRelevantInstances Number of relevant instances, i.e. number of instances, which
-	 * would have been tested without approximations.
+	 * would have been tested without approximations. TODO: relevant = pos + neg examples?
 	 * @param nrOfInstanceChecks Performed instance checks for the approximation.
 	 * @param nrOfSuccessfulInstanceChecks Number of successful performed instance checks.
 	 * @return A two element array, where the first element is the computed F-beta score and the
@@ -262,4 +260,25 @@
 		return ret;
 	}
 	
+	// WARNING: unstable/untested
+	// uses the following formula: (|R(C) \cap E^+| + beta * |E^- \ R(C)|) / (|E^+|+|E^-|)
+	// approximates |R(C) \cap E^+| and beta * |E^- \ R(C)| separately; and adds their lower and upper borders (pessimistic estimate)
+	// TODO: only works well if there are many negatives at the moment, so speedup is not great
+	public static double[] getPredAccApproximation(int nrOfPositiveExamples, int nrOfNegativeExamples, double beta, int nrOfPosExampleInstanceChecks, int nrOfSuccessfulPosExampleChecks, int nrOfNegExampleInstanceChecks, int nrOfNegativeNegExampleChecks) {
+		// compute both 95% confidence intervals
+		double[] intervalPos = Heuristics.getConfidenceInterval95Wald(nrOfPosExampleInstanceChecks, nrOfSuccessfulPosExampleChecks);
+		double[] intervalNeg = Heuristics.getConfidenceInterval95Wald(nrOfNegExampleInstanceChecks, nrOfNegativeNegExampleChecks);
+		// multiply by number of instances from which the random samples are drawn
+		double lowerBorder = intervalPos[0] * nrOfPositiveExamples + beta * intervalNeg[0] * nrOfNegativeExamples;
+		double upperBorder = intervalNeg[1] * nrOfPositiveExamples + beta * intervalNeg[1] * nrOfNegativeExamples;
+		double predAccLow = lowerBorder / (double) (nrOfPositiveExamples + beta * nrOfNegativeExamples);
+		double predAccHigh = upperBorder / (double) (nrOfPositiveExamples + beta * nrOfNegativeExamples);
+		double diff = predAccHigh - predAccLow;
+		// return interval length and center
+		double[] ret = new double[2];
+		ret[0] = predAccLow + 0.5 * diff;
+		ret[1] = diff;
+		return ret;
+	}
+	
 }

Modified: trunk/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStandard.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStandard.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/components-core/src/main/java/org/dllearner/learningproblems/PosNegLPStandard.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -20,6 +20,7 @@
 package org.dllearner.learningproblems;
 
 import java.util.Collection;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Set;
 import java.util.SortedSet;
@@ -34,6 +35,7 @@
 import org.dllearner.core.options.StringConfigOption;
 import org.dllearner.core.owl.Description;
 import org.dllearner.core.owl.Individual;
+import org.dllearner.learningproblems.Heuristics.HeuristicType;
 import org.dllearner.utilities.Helper;
 
 /**
@@ -56,10 +58,12 @@
 	// approximation and F-measure
 	// (taken from class learning => super class instances corresponds to negative examples
 	// and class instances to positive examples)
-	private double approx = 0.05;
+	private double approxDelta = 0.05;
 	private boolean useApproximations;
-	private boolean useFMeasure;	
+//	private boolean useFMeasure;	
 	
+	private HeuristicType heuristic = HeuristicType.PRED_ACC;
+	
 	@Override
 	public PosNegLPStandardConfigurator getConfigurator() {
 		return configurator;
@@ -81,14 +85,32 @@
 	public void init() {
 		super.init();
 		useApproximations = configurator.getUseApproximations();
-		useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
+		approxDelta = configurator.getApproxAccuracy();
 		
-		if((!useApproximations && useFMeasure) || (useApproximations && !useFMeasure)) {
-			System.err.println("Currently F measure can only be used in combination with approximated reasoning.");
-			System.exit(0);
+		String accM = configurator.getAccuracyMethod();
+		if(accM.equals("standard")) {
+			heuristic = HeuristicType.AMEASURE;
+		} else if(accM.equals("fmeasure")) {
+			heuristic = HeuristicType.FMEASURE;
+		} else if(accM.equals("generalised_fmeasure")) {
+			heuristic = HeuristicType.GEN_FMEASURE;
+		} else if(accM.equals("jaccard")) {
+			heuristic = HeuristicType.JACCARD;
+		} else if(accM.equals("pred_acc")) {
+			heuristic = HeuristicType.PRED_ACC;
 		}
 		
-		approx = configurator.getApproxAccuracy();
+//		useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
+		
+//		if((!useApproximations && useFMeasure) || (useApproximations && !useFMeasure)) {
+//			System.err.println("Currently F measure can only be used in combination with approximated reasoning.");
+//			System.exit(0);
+//		}
+		
+		if(useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) {
+			System.err.println("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first and verify that it works.");
+		}
+		
 	}
 	
 	/*
@@ -127,7 +149,7 @@
 	 */
 	@Override
 	public int coveredNegativeExamplesOrTooWeak(Description concept) {
-
+		
 		if (useRetrievalForClassification) {
 			SortedSet<Individual> posClassified = reasoner.getIndividuals(concept);
 			SortedSet<Individual> negAsPos = Helper.intersection(negativeExamples, posClassified);
@@ -267,7 +289,9 @@
 	 */
 	@Override
 	public double getAccuracy(Description description) {
-		
+		// a noise value of 1.0 means that we never return too weak (-1.0) 
+		return getAccuracyOrTooWeak(description, 1.0);		
+		/*				
 		int coveredPos = 0;
 		int coveredNeg = 0;
 		
@@ -283,10 +307,14 @@
 		}
 		
 		return coveredPos + negativeExamples.size() - coveredNeg / (double) allExamples.size();
+		*/
 	}
 
 	@Override
-	public double getAccuracyOrTooWeak(Description description, double noise) {
+	public double getAccuracyOrTooWeak(Description description, double noise) {	
+		// delegates to the appropriate methods
+		return useApproximations ? getAccuracyOrTooWeakApprox(description, noise) : getAccuracyOrTooWeakExact(description, noise);			
+		/*
 		if(useApproximations) {
 			if(useFMeasure) {
 				return getFMeasureOrTooWeakApprox(description, noise);
@@ -295,9 +323,145 @@
 			}
 		} else {
 			return getPredAccuracyOrTooWeakExact(description, noise);
-		}			
+		}	
+		*/		
 	}	
 	
+	public double getAccuracyOrTooWeakApprox(Description description, double noise) {
+		if(heuristic.equals(HeuristicType.PRED_ACC)) {
+			int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
+			
+			int notCoveredPos = 0;
+//			int notCoveredNeg = 0;
+			
+			int posClassifiedAsPos = 0;
+			int negClassifiedAsNeg = 0;
+			
+			int nrOfPosChecks = 0;
+			int nrOfNegChecks = 0;
+			
+			// special case: we test positive and negative examples in turn
+			Iterator<Individual> itPos = positiveExamples.iterator();
+			Iterator<Individual> itNeg = negativeExamples.iterator();
+			
+			do {
+				// in each loop we pick 0 or 1 positives and 0 or 1 negative
+				// and classify it
+				
+				if(itPos.hasNext()) {
+					Individual posExample = itPos.next();
+//					System.out.println(posExample);
+					
+					if(reasoner.hasType(description, posExample)) {
+						posClassifiedAsPos++;
+					} else {
+						notCoveredPos++;
+					}
+					nrOfPosChecks++;
+					
+					// take noise into account
+					if(notCoveredPos > maxNotCovered) {
+						return -1;
+					}
+				}
+				
+				if(itNeg.hasNext()) {
+					Individual negExample = itNeg.next();
+					if(!reasoner.hasType(description, negExample)) {
+						negClassifiedAsNeg++;
+					}
+					nrOfNegChecks++;
+				}
+			
+				// compute how accurate our current approximation is and return if it is sufficiently accurate
+				double approx[] = Heuristics.getPredAccApproximation(positiveExamples.size(), negativeExamples.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg);
+				if(approx[1]<approxDelta) {
+//					System.out.println(approx[0]);
+					return approx[0];
+				}
+				
+			} while(itPos.hasNext() || itNeg.hasNext());
+			
+			double ret = Heuristics.getPredictiveAccuracy(positiveExamples.size(), negativeExamples.size(), posClassifiedAsPos, negClassifiedAsNeg, 1);
+			return ret;
+					
+		} else if(heuristic.equals(HeuristicType.FMEASURE)) {
+			// we abort when there are too many uncovered positives
+			int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
+			int instancesCovered = 0;
+			int instancesNotCovered = 0;
+			
+			for(Individual ind : positiveExamples) {
+				if(reasoner.hasType(description, ind)) {
+					instancesCovered++;
+				} else {
+					instancesNotCovered ++;
+					if(instancesNotCovered > maxNotCovered) {
+						return -1;
+					}
+				}
+			}	
+			
+			double recall = instancesCovered/(double)positiveExamples.size();
+			
+			int testsPerformed = 0;
+			int instancesDescription = 0;
+			
+			for(Individual ind : negativeExamples) {
+
+				if(reasoner.hasType(description, ind)) {
+					instancesDescription++;
+				}
+				testsPerformed++;
+				
+				// check whether approximation is sufficiently accurate
+				double[] approx = Heuristics.getFScoreApproximation(instancesCovered, recall, 1, negativeExamples.size(), testsPerformed, instancesDescription);
+				if(approx[1]<approxDelta) {
+					return approx[0];
+				}
+				
+			}		
+			
+			// standard computation (no approximation)
+			double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
+//			if(instancesCovered + instancesDescription == 0) {
+//				precision = 0;
+//			}
+			return Heuristics.getFScore(recall, precision, 1);			
+		} else {
+			throw new Error("Approximation for " + heuristic + " not implemented.");
+		}
+	}
+	
+	public double getAccuracyOrTooWeakExact(Description description, double noise) {
+		if(heuristic.equals(HeuristicType.PRED_ACC)) {
+			return getPredAccuracyOrTooWeakExact(description, noise);
+		} else if(heuristic.equals(HeuristicType.FMEASURE)) {
+			// computing R(C) restricted to relevant instances
+			int additionalInstances = 0;
+			for(Individual ind : negativeExamples) {
+				if(reasoner.hasType(description, ind)) {
+					additionalInstances++;
+				}
+			}
+			
+			// computing R(A)
+			int coveredInstances = 0;
+			for(Individual ind : positiveExamples) {
+				if(reasoner.hasType(description, ind)) {
+					coveredInstances++;
+				}
+			}
+			
+			double recall = coveredInstances/(double)positiveExamples.size();
+			double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);			
+			
+			return Heuristics.getFScore(recall, precision);
+		} else {
+			throw new Error("Heuristic " + heuristic + " not implemented.");
+		}		
+	}
+	
 	/* (non-Javadoc)
 	 * @see org.dllearner.core.LearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double)
 	 */
@@ -358,7 +522,9 @@
 	}
 	
 	// instead of using the standard operation, we use optimisation
-	// and approximation here
+	// and approximation here;
+	// now deprecated because the Heuristics helper class is used
+	@Deprecated
 	public double getFMeasureOrTooWeakApprox(Description description, double noise) {
 		// we abort when there are too many uncovered positives
 		int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
@@ -392,7 +558,7 @@
 				upperBorderA = Math.min(1, p1 + p2);
 				double size = upperBorderA - lowerBorderA;
 				// if the interval has a size smaller than 10%, we can be confident
-				if(size < 2 * approx) {
+				if(size < 2 * approxDelta) {
 					// we have to distinguish the cases that the accuracy limit is
 					// below, within, or above the limit and that the mean is below
 					// or above the limit

Modified: trunk/interfaces/src/main/java/org/dllearner/cli/ConfMapper.java
===================================================================
--- trunk/interfaces/src/main/java/org/dllearner/cli/ConfMapper.java	2010-09-24 09:26:02 UTC (rev 2334)
+++ trunk/interfaces/src/main/java/org/dllearner/cli/ConfMapper.java	2010-09-24 12:08:03 UTC (rev 2335)
@@ -101,6 +101,7 @@
 		learningAlgorithmMapping.put("gp", GP.class);
 		learningAlgorithmMapping.put("refinement", ROLearner.class);
 		learningAlgorithmMapping.put("refexamples", OCEL.class);
+		learningAlgorithmMapping.put("ocel", OCEL.class);
 		learningAlgorithmMapping.put("el", ELLearningAlgorithm.class);
 		learningAlgorithmMapping.put("disjunctiveEL", ELLearningAlgorithmDisjunctive.class);
 		learningAlgorithmMapping.put("celoe", CELOE.class);


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.