From: <jen...@us...> - 2010-07-20 08:06:42
|
Revision: 2206 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2206&view=rev Author: jenslehmann Date: 2010-07-20 08:06:33 +0000 (Tue, 20 Jul 2010) Log Message: ----------- heuristic approximation tests Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java Modified: trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java 2010-07-17 15:26:41 UTC (rev 2205) +++ trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java 2010-07-20 08:06:33 UTC (rev 2206) @@ -120,7 +120,7 @@ * @return A two element double array, where element 0 is the lower border and element * 1 the upper border of the 95% confidence interval. */ - public static double[] getConfidenceInterval95Wald(int success, int total) { + public static double[] getConfidenceInterval95Wald(int total, int success) { if(success > total || total < 1) { throw new IllegalArgumentException(); } @@ -178,13 +178,13 @@ * @param nrOfRelevantInstances Number of relevant instances, i.e. number of instances, which * would have been tested without approximations. * @param nrOfInstanceChecks Performed instance checks for the approximation. - * @param successfulInstanceChecks Number of successful performed instance checks. + * @param nrOfSuccessfulInstanceChecks Number of successful performed instance checks. * @return A two element array, where the first element is the computed F-beta score and the * second element is the length of the 95% confidence interval around it. */ - public static double[] getFMeasureApproximation(int nrOfPosClassifiedPositives, double recall, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int successfulInstanceChecks) { + public static double[] getFMeasureApproximation(int nrOfPosClassifiedPositives, double recall, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) { // compute 95% confidence interval - double[] interval = Heuristics.getConfidenceInterval95Wald(successfulInstanceChecks, nrOfInstanceChecks); + double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks); // multiply by number of instances from which the random samples are drawn double lowerBorder = interval[0] * nrOfRelevantInstances; double upperBorder = interval[1] * nrOfRelevantInstances; @@ -203,4 +203,29 @@ return ret; } + public static double[] getAMeasureApproximationStep1(double beta, int nrOfPosExamples, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) { + // the method is just a wrapper around a single confidence interval approximation; + // method approximates t * a / |R(A)| + double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks); + double diff = beta * (interval[1] - interval[0]); + double ret[] = new double[2]; + ret[0] = beta * interval[0] + 0.5*diff; + ret[1] = diff; + return ret; + } + + public static double[] getAMeasureApproximationStep2(int nrOfPosClassifiedPositives, double[] recallInterval, double beta, int nrOfRelevantInstances, int nrOfInstanceChecks, int nrOfSuccessfulInstanceChecks) { + // TODO: code untested + double[] interval = Heuristics.getConfidenceInterval95Wald(nrOfInstanceChecks, nrOfSuccessfulInstanceChecks); + double precisionLowerBorder = nrOfPosClassifiedPositives / interval[1] * nrOfRelevantInstances; + double precisionUpperBorder = nrOfPosClassifiedPositives / interval[0] * nrOfRelevantInstances; + double lowerBorder = Heuristics.getAScore(recallInterval[0] / beta, precisionLowerBorder, beta); + double upperBorder = Heuristics.getAScore(recallInterval[0] / beta, precisionUpperBorder, beta); + double diff = upperBorder - lowerBorder; + double ret[] = new double[2]; + ret[0] = lowerBorder + 0.5*diff; + ret[1] = diff; + return ret; + } + } Modified: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-17 15:26:41 UTC (rev 2205) +++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-20 08:06:33 UTC (rev 2206) @@ -36,6 +36,7 @@ import org.dllearner.core.owl.Thing; import org.dllearner.kb.KBFile; import org.dllearner.learningproblems.ClassLearningProblem; +import org.dllearner.learningproblems.Heuristics; import org.dllearner.reasoning.OWLAPIReasoner; import org.junit.Test; @@ -157,6 +158,23 @@ } + @Test + public void approximationTests() { + // perform F-Measure example in ontology engineering paper, which was computed on paper + // TODO: compute again, because unit tests fails (probably rounding errors) + double[] approx1 = Heuristics.getFMeasureApproximation(800, 0.8, 1, 10000, 41, 31); + assertEquals(0.0505, approx1[1], delta); + double[] approx2 = Heuristics.getFMeasureApproximation(800, 0.8, 1, 10000, 42, 32); + assertEquals(0.1699, approx2[0], delta); + assertEquals(0.0489, approx2[1], delta); + + // perform A-Measure example in ontology engineering paper + // setup: 1000 class instances, 10000 relevant instances, delta=0.10 + // input1: 90 out of 95 tests => no success para 1, 91 out of 96 => success + // input2: using estimation from input 1, 32 out of 64 => success + // overall accuracy: 64% + } + // the class learning problem provides several ways to get the accuracy of a description, this method // tests all of those private static void assertEqualsClassLP(ClassLearningProblem problem, Description description, double accuracy) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |