[DL-Learner SVN] SF.net SVN: dl-learner:[2196] trunk/src/dl-learner/org/dllearner

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2196
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2196&view=rev
Author:   jenslehmann
Date:     2010-07-13 17:02:04 +0000 (Tue, 13 Jul 2010)

Log Message:
-----------
- prepared class which encapsulates heuristic computations to
  avoid bugs by using them incorrectly in various pieces of
  code
- started unit test for heuristics

Added Paths:
-----------
    trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
    trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java

Added: trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java
===================================================================

--- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	                        (rev 0)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java	2010-07-13 17:02:04 UTC (rev 2196)
@@ -0,0 +1,167 @@
+/**
+ * Copyright (C) 2007-2010, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ * 
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package org.dllearner.learningproblems;
+
+/**
+ * Implementation of various heuristics. The methods can be used in learning
+ * problems and various evaluation scripts. They are verified in unit tests
+ * and, thus, should be fairly stable.
+ * 
+ * @author Jens Lehmann
+ * 
+ */
+public class Heuristics {
+
+	/**
+	 * Computes F1-Score.
+	 * @param recall Recall.
+	 * @param precision Precision.
+	 * @return Harmonic mean of precision and recall.
+	 */
+	public static double getFScore(double recall, double precision) {
+		return (precision + recall == 0) ? 0 :
+			  ( 2 * (precision * recall) / (double) (precision + recall) ); 		
+	}
+	
+	/**
+	 * Computes F-beta-Score.
+	 * @param recall Recall.
+	 * @param precision Precision.
+	 * @param beta Weights precision and recall. If beta is >1, then recall is more important
+	 * than precision.
+	 * @return Harmonic mean of precision and recall weighted by beta.
+	 */
+	public static double getFScore(double recall, double precision, double beta) {
+		return (precision + recall == 0) ? 0 :
+			  ( (1+Math.sqrt(beta)) * (precision * recall)
+					/ (Math.sqrt(beta) * precision + recall) ); 		
+	}	
+
+	/**
+	 * Computes arithmetic mean of precision and recall, which is called "A-Score"
+	 * here (A=arithmetic), but is not an established notion in machine learning.  
+	 * @param recall Recall.
+	 * @param precision Precison.
+	 * @return Arithmetic mean of precision and recall.
+	 */
+	public static double getAScore(double recall, double precision) {
+		return (recall + precision) / (double) 2;
+	}
+
+	/**
+	 * Computes arithmetic mean of precision and recall, which is called "A-Score"
+	 * here (A=arithmetic), but is not an established notion in machine learning.  
+	 * @param recall Recall.
+	 * @param precision Precison.
+	 * @param beta Weights precision and recall. If beta is >1, then recall is more important
+	 * than precision.
+	 * @return Arithmetic mean of precision and recall.
+	 */
+	public static double getAScore(double recall, double precision, double beta) {
+		return (beta * recall + precision) / (beta + 1);
+	}
+	
+	/**
+	 * Computes the Jaccard coefficient of two sets.
+	 * @param elementsIntersection Number of elements in the intersection of the two sets.
+	 * @param elementsUnion Number of elements in the union of the two sets.
+	 * @return #intersection divided by #union.
+	 */
+	public static double getJaccardCoefficient(int elementsIntersection, int elementsUnion) {
+		if(elementsIntersection > elementsUnion || elementsUnion < 1) {
+			throw new IllegalArgumentException();
+		}
+		return elementsIntersection / (double) elementsUnion;
+	}
+	
+	public double getPredictiveAccuracy(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives) {
+		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
+	}	
+
+	public double getPredictiveAccuracy(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
+//		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
+		return 0;
+	}		
+	
+	public double getPredictiveAccuracy2(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfPosClassifiedNegatives) {
+		return (nrOfPosClassifiedPositives + nrOfExamples - nrOfPosClassifiedNegatives) / (double) nrOfExamples;
+	}
+	
+	public double getPredictiveAccuracy2(int nrOfExamples, int nrOfPosClassifiedPositives, int nrOfNegClassifiedNegatives, double beta) {
+//		return (nrOfPosClassifiedPositives + nrOfNegClassifiedNegatives) / (double) nrOfExamples;
+		return 0;
+	}	
+	
+	/**
+	 * Computes the 95% confidence interval of an experiment with boolean outcomes,
+	 * e.g. heads or tails coin throws. It uses the very efficient, but still accurate
+	 * Wald method. 
+	 * @param success Number of successes, e.g. number of times the coin shows head.
+	 * @param total Total number of tries, e.g. total number of times the coin was thrown.
+	 * @return A two element double array, where element 0 is the lower border and element
+	 * 1 the upper border of the 95% confidence interval.
+	 */
+	public static double[] getConfidenceInterval95Wald(int success, int total) {
+		if(success > total || total < 1) {
+			throw new IllegalArgumentException();
+		}
+		double[] ret = new double[2];
+		double p1 = (success+2)/(double)(total+4);
+		double p2 = 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
+		ret[0] = Math.max(0, p1 - p2);
+		ret[1] = Math.min(1, p1 + p2);
+		return ret;
+	}
+	
+	/**
+	 * Computes whether a hypothesis is too weak, i.e. it has more errors on the positive examples
+	 * than allowed by the noise parameter.
+	 * @param nrOfPositiveExamples The number of positive examples in the learning problem.
+	 * @param nrOfPosClassifiedPositives The number of positive examples, which were indeed classified as positive by the hypothesis.
+	 * @param noise The noise parameter is a value between 0 and 1, which indicates how noisy the example data is (0 = no noise, 1 = completely random).
+	 * If a hypothesis contains more errors on the positive examples than the noise value multiplied by the
+	 * number of all examples, then the hypothesis is too weak.  
+	 * @return True if the hypothesis is too weak and false otherwise.
+	 */
+	public boolean isTooWeak(int nrOfPositiveExamples, int nrOfPosClassifiedPositives, double noise) {
+		if(noise < 0 || noise > 1 || nrOfPosClassifiedPositives <= nrOfPositiveExamples || nrOfPositiveExamples < 1) {
+			throw new IllegalArgumentException();
+		}
+		return (noise * nrOfPositiveExamples) < (nrOfPositiveExamples - nrOfPosClassifiedPositives);
+	}
+
+	/**
+	 * Computes whether a hypothesis is too weak, i.e. it has more errors on the positive examples
+	 * than allowed by the noise parameter.
+	 * @param nrOfPositiveExamples The number of positive examples in the learning problem.
+	 * @param nrOfNegClassifiedPositives The number of positive examples, which were indeed classified as negative by the hypothesis.
+	 * @param noise The noise parameter is a value between 0 and 1, which indicates how noisy the example data is (0 = no noise, 1 = completely random).
+	 * If a hypothesis contains more errors on the positive examples than the noise value multiplied by the
+	 * number of all examples, then the hypothesis is too weak.  
+	 * @return True if the hypothesis is too weak and false otherwise.
+	 */
+	public boolean isTooWeak2(int nrOfPositiveExamples, int nrOfNegClassifiedPositives, double noise) {
+		if(noise < 0 || noise > 1 || nrOfNegClassifiedPositives <= nrOfPositiveExamples || nrOfPositiveExamples < 1) {
+			throw new IllegalArgumentException();
+		}		
+		return (noise * nrOfPositiveExamples) < nrOfNegClassifiedPositives;
+	}
+	
+}

Added: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	                        (rev 0)
+++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java	2010-07-13 17:02:04 UTC (rev 2196)
@@ -0,0 +1,54 @@
+/**
+ * Copyright (C) 2007-2010, Jens Lehmann
+ *
+ * This file is part of DL-Learner.
+ * 
+ * DL-Learner is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * DL-Learner is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package org.dllearner.test.junit;
+
+import org.dllearner.core.owl.ClassAssertionAxiom;
+import org.dllearner.core.owl.Individual;
+import org.dllearner.core.owl.KB;
+import org.dllearner.core.owl.NamedClass;
+import org.junit.Test;
+
+/**
+ * Tests for various heuristics employed in learning problems.
+ * 
+ * @author Jens Lehmann
+ * 
+ */
+public class HeuristicTests {
+
+	@Test
+	public void classLearningTests() {
+		// create artificial ontology
+		KB kb = new KB();
+		NamedClass[] nc = new NamedClass[5];
+		for(int i=0; i<5; i++) {
+			nc[i] = new NamedClass("A" + i);
+		}
+		Individual[] ind = new Individual[5];
+		for(int i=0; i<100; i++) {
+			ind[i] = new Individual("a" + i);
+		}
+		
+		for(int i=0; i<20; i++) {
+			kb.addAxiom(new ClassAssertionAxiom(nc[0],ind[i]));
+		}
+	}
+	
+}


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.