[DL-Learner SVN] SF.net SVN: dl-learner:[2206] trunk/src/dl-learner/org/dllearner

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2206
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2206&view=rev
Author:   jenslehmann
Date:     2010-07-20 08:06:33 +0000 (Tue, 20 Jul 2010)

Log Message:
-----------
heuristic approximation tests

Modified Paths:
--------------
    trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
    trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java

Modified: trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
===================================================================

--- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	2010-07-17 15:26:41 UTC (rev 2205)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	2010-07-20 08:06:33 UTC (rev 2206)
@@ -120,7 +120,7 @@
 	 * @return A two element double array, where element 0 is the lower border and element
 	 * 1 the upper border of the 95% confidence interval.
 	 */
-	public static double[] getConfidenceInterval95Wald(int success, int total) {
+	public static double[] getConfidenceInterval95Wald(int total, int success) {
 		if(success > total || total < 1) {
 			throw new IllegalArgumentException();
 		}
@@ -178,13 +178,13 @@
 	 * @param nrOfRelevantInstances Number of relevant instances, i.e. number of instances, which
 	 * would have been tested without approximations.
 	 * @param nrOfInstanceChecks Performed instance checks for the approximation.
-	 * @param successfulInstanceChecks Number of successful performed instance checks.
+	 * @param nrOfSuccessfulInstanceChecks Number of successful performed instance checks.
 	 * @return A two element array, where the first element is the computed F-beta score and the
 	 * second element is the length of the 95% confidence interval around it.
 	 */
-	public static double[] getFMeasureApproximation(int nrOfPosClassifiedPositives, double recall, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int successfulInstanceChecks) {
+	public static double[] getFMeasureApproximation(int nrOfPosClassifiedPositives, double recall, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) {
 		// compute 95% confidence interval
-		double[] interval = Heuristics.getConfidenceInterval95Wald(successfulInstanceChecks, nrOfInstanceChecks);
+		double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks);
 		// multiply by number of instances from which the random samples are drawn
 		double lowerBorder = interval[0] * nrOfRelevantInstances;
 		double upperBorder = interval[1] * nrOfRelevantInstances;
@@ -203,4 +203,29 @@
 		return ret;
 	}
 	
+	public static double[] getAMeasureApproximationStep1(double beta, int nrOfPosExamples, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) {
+		// the method is just a wrapper around a single confidence interval approximation;
+		// method approximates t * a / |R(A)|
+		double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks);
+		double diff = beta * (interval[1] - interval[0]);
+		double ret[] = new double[2];
+		ret[0] = beta * interval[0] + 0.5*diff;
+		ret[1] = diff;
+		return ret;
+	}
+	
+	public static double[] getAMeasureApproximationStep2(int nrOfPosClassifiedPositives, double[] recallInterval, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) {
+		// TODO: code untested
+		double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks);
+		double precisionLowerBorder = nrOfPosClassifiedPositives / interval[1] * nrOfRelevantInstances;
+		double precisionUpperBorder = nrOfPosClassifiedPositives / interval[0] * nrOfRelevantInstances;
+		double lowerBorder = Heuristics.getAScore(recallInterval[0] / beta, precisionLowerBorder, beta);
+		double upperBorder = Heuristics.getAScore(recallInterval[0] / beta, precisionUpperBorder, beta);
+		double diff = upperBorder - lowerBorder;
+		double ret[] = new double[2];
+		ret[0] = lowerBorder + 0.5*diff;
+		ret[1] = diff;
+		return ret;
+	}
+	
 }

Modified: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	2010-07-17 15:26:41 UTC (rev 2205)
+++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	2010-07-20 08:06:33 UTC (rev 2206)
@@ -36,6 +36,7 @@
 import org.dllearner.core.owl.Thing;
 import org.dllearner.kb.KBFile;
 import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.learningproblems.Heuristics;
 import org.dllearner.reasoning.OWLAPIReasoner;
 import org.junit.Test;
 
@@ -157,6 +158,23 @@
 
 	}
 	
+	@Test
+	public void approximationTests() {
+		// perform F-Measure example in ontology engineering paper, which was computed on paper
+		// TODO: compute again, because unit tests fails (probably rounding errors)
+		double[] approx1 = Heuristics.getFMeasureApproximation(800, 0.8, 1, 10000, 41, 31);
+		assertEquals(0.0505, approx1[1], delta);
+		double[] approx2 = Heuristics.getFMeasureApproximation(800, 0.8, 1, 10000, 42, 32);
+		assertEquals(0.1699, approx2[0], delta);
+		assertEquals(0.0489, approx2[1], delta);
+		
+		// perform A-Measure example in ontology engineering paper
+		// setup: 1000 class instances, 10000 relevant instances, delta=0.10
+		// input1: 90 out of 95 tests => no success para 1, 91 out of 96 => success
+		// input2: using estimation from input 1, 32 out of 64 => success
+		// overall accuracy: 64%
+	}
+	
 	// the class learning problem provides several ways to get the accuracy of a description, this method
 	// tests all of those
 	private static void assertEqualsClassLP(ClassLearningProblem problem, Description description, double accuracy) {


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.