[DL-Learner SVN] SF.net SVN: dl-learner:[1625] trunk

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 1625
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=1625&view=rev
Author:   jenslehmann
Date:     2009-02-24 16:08:18 +0000 (Tue, 24 Feb 2009)

Log Message:
-----------
- class description evaluation for ontology engineering switched to instance checks limited to relevant instances
- intelligent random sampling for accuracy estimation such that algorithm scales to larger ontologies; instance checks are only performed until a reasonable estimate of accuracy can be calculated
- difficult bug fix in heuristic caused by double occurences of nodes due to very small node score changes after node is added to candidate set
- some smaller fixes

Modified Paths:
--------------
    trunk/examples/epc/rs004_oe.conf
    trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java
    trunk/src/dl-learner/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java
    trunk/src/dl-learner/org/dllearner/algorithms/celoe/OENode.java
    trunk/src/dl-learner/org/dllearner/algorithms/refinement2/ROLearner2.java
    trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java
    trunk/src/dl-learner/org/dllearner/core/configurators/FastInstanceCheckerConfigurator.java
    trunk/src/dl-learner/org/dllearner/core/options/CommonConfigOptions.java
    trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java
    trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java
    trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java
    trunk/src/dl-learner/org/dllearner/test/junit/RefinementOperatorTests.java
    trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java
    trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java

Modified: trunk/examples/epc/rs004_oe.conf
===================================================================

--- trunk/examples/epc/rs004_oe.conf	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/examples/epc/rs004_oe.conf	2009-02-24 16:08:18 UTC (rev 1625)
@@ -5,3 +5,4 @@
 classLearning.classToDescribe = "http://localhost/aris/sap_model.owl#EPC_RS004"; 
 
 algorithm = celoe;
+celoe.maxExecutionTimeInSeconds = 20;

Modified: trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/algorithms/celoe/CELOE.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -28,6 +28,8 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import javax.sound.midi.SysexMessage;
+
 import org.apache.log4j.Logger;
 import org.dllearner.core.ComponentInitException;
 import org.dllearner.core.EvaluatedDescription;
@@ -45,6 +47,8 @@
 import org.dllearner.core.owl.Restriction;
 import org.dllearner.core.owl.Thing;
 import org.dllearner.learningproblems.ClassLearningProblem;
+import org.dllearner.parser.KBParser;
+import org.dllearner.parser.ParseException;
 import org.dllearner.refinementoperators.RefinementOperator;
 import org.dllearner.refinementoperators.RhoDRDown;
 import org.dllearner.utilities.owl.ConceptComparator;
@@ -52,6 +56,9 @@
 import org.dllearner.utilities.owl.DescriptionMinimizer;
 import org.dllearner.utilities.owl.EvaluatedDescriptionSet;
 
+import com.jamonapi.Monitor;
+import com.jamonapi.MonitorFactory;
+
 /**
  * The CELOE (Class Expression Learner for Ontology Engineering) algorithm.
  * It adapts and extends the standard supervised learning algorithm for the
@@ -78,11 +85,13 @@
 	private TreeSet<OENode> nodes;
 	// root of search tree
 	private OENode startNode;
+	// the class with which we start the refinement process
+	private Description startClass;
 	
 	// all descriptions in the search tree plus those which were too weak (for fast redundancy check)
 	private TreeSet<Description> descriptions;
 	
-	private EvaluatedDescriptionSet bestEvaluatedDescriptions = new EvaluatedDescriptionSet(LearningAlgorithm.MAX_NR_OF_RESULTS);
+	private EvaluatedDescriptionSet bestEvaluatedDescriptions;
 	
 	private NamedClass classToDescribe;
 	private boolean isEquivalenceProblem;
@@ -99,6 +108,9 @@
 	private DecimalFormat dfPercent = new DecimalFormat("0.00%");
 	private ConceptComparator descriptionComparator = new ConceptComparator();
 	
+	// statistical variables
+	private int descriptionTests = 0;
+	
 	@Override
 	public Configurator getConfigurator() {
 		return configurator;
@@ -130,7 +142,8 @@
 		options.add(CommonConfigOptions.useDoubleDatatypes());
 		options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
 		options.add(CommonConfigOptions.getNoisePercentage());
-		options.add(CommonConfigOptions.getMaxDepth(4));
+		options.add(CommonConfigOptions.getMaxDepth(7));
+		options.add(CommonConfigOptions.maxNrOfResults(10));
 		return options;
 	}
 	
@@ -147,11 +160,28 @@
 		
 		minimizer = new DescriptionMinimizer(reasoner);
 		
+		// start class: intersection of super classes for definitions (since it needs to
+		// capture all instances), but owl:Thing for learning subclasses (since it is
+		// superfluous to add super classes in this case)
+		if(isEquivalenceProblem) {
+			Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
+			if(superClasses.size() > 1) {
+				startClass = new Intersection(new LinkedList<Description>(superClasses));
+			} else {
+				startClass = (Description) superClasses.toArray()[0];
+			}
+			
+		} else {
+			startClass = Thing.instance;
+		}		
+		
 		// create refinement operator
-		operator = new RhoDRDown(reasoner, classHierarchy, configurator);
+		operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator);
 		baseURI = reasoner.getBaseURI();
 		prefixes = reasoner.getPrefixes();
 		
+		 bestEvaluatedDescriptions = new EvaluatedDescriptionSet(configurator.getMaxNrOfResults());
+			
 		// we put important parameters in class variables
 		minAcc = configurator.getNoisePercentage()/100d;
 		maxDepth = configurator.getMaxDepth();
@@ -184,25 +214,25 @@
 		reset();
 		nanoStartTime = System.nanoTime();
 		
+		// test
+//		Description testD = null;
+//		try {
+////			testD = KBParser.parseConcept("(\"EPC\" AND EXISTS hasModelElements.(\"Function\" AND ALL previousObjects.BOTTOM))", "http://localhost/aris/sap_model.owl#");
+//			testD = KBParser.parseConcept("(\"EPC\" AND EXISTS hasModelElements.(\"Function\" AND ALL nextObject.BOTTOM))", "http://localhost/aris/sap_model.owl#");
+//		} catch (ParseException e) {
+//			// TODO Auto-generated catch block
+//			e.printStackTrace();
+//		}
+//		double val = learningProblem.getAccuracyOrTooWeak(testD, minAcc);
+//		System.out.println(testD);
+//		System.out.println(val);
+//		System.out.println(testD.getDepth());
+//		System.exit(0);
+		
 		// highest accuracy so far
 		double highestAccuracy = 0.0;
 		OENode bestNode;
-		
-		// start class: intersection of super classes for definitions (since it needs to
-		// capture all instances), but owl:Thing for learning subclasses (since it is
-		// superfluous to add super classes in this case)
-		Description startClass;
-		if(isEquivalenceProblem) {
-			Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
-			if(superClasses.size() > 1) {
-				startClass = new Intersection(new LinkedList<Description>(superClasses));
-			} else {
-				startClass = (Description) superClasses.toArray()[0];
-			}
-			
-		} else {
-			startClass = Thing.instance;
-		}
+
 		addNode(startClass, null);
 		
 		int loop = 0;
@@ -220,7 +250,9 @@
 			int horizExp = bestNode.getHorizontalExpansion();
 			
 			// apply operator
-			TreeSet<Description> refinements = refineNode(bestNode); 
+			Monitor mon = MonitorFactory.start("refineNode");
+			TreeSet<Description> refinements = refineNode(bestNode);
+			mon.stop();
 				
 			while(refinements.size() != 0) {
 				// pick element from set
@@ -230,8 +262,10 @@
 				// we ignore all refinements with lower length and too high depth
 				// (this also avoids duplicate node children)
 				if(length > horizExp && refinement.getDepth() <= maxDepth) {
-		
+					
+					Monitor mon2 = MonitorFactory.start("addNode");
 					boolean added = addNode(refinement, bestNode);
+					mon2.stop();
 					
 					// if refinements have the same length, we apply the operator again
 					// (descending the subsumption hierarchy)
@@ -249,9 +283,9 @@
 		}
 
 		if (stop) {
-			logger.info("Algorithm stopped.\n");
+			logger.info("Algorithm stopped ("+descriptionTests+" descriptions tested).\n");
 		} else {
-			logger.info("Algorithm terminated succesfully.\n");
+			logger.info("Algorithm terminated succesfully ("+descriptionTests+" descriptions tested).\n");
 		}
 		
 		// print solution(s)
@@ -266,10 +300,14 @@
 	// expand node horizontically
 	private TreeSet<Description> refineNode(OENode node) {
 		// we have to remove and add the node since its heuristic evaluation changes through the expansion
+		// (you *must not* include any criteria in the heuristic which are modified outside of this method,
+		// otherwise you may see rarely occuring but critical false ordering in the nodes set)
 		nodes.remove(node);
+//		System.out.println("refining: " + node);
 		int horizExp = node.getHorizontalExpansion();
 		TreeSet<Description> refinements = (TreeSet<Description>) operator.refine(node.getDescription(), horizExp+1);
 		node.incHorizontalExpansion();
+		node.setRefinementCount(refinements.size());
 		nodes.add(node);
 		return refinements;
 	}
@@ -291,6 +329,8 @@
 		
 		// quality of description (return if too weak)
 		double accuracy = learningProblem.getAccuracyOrTooWeak(description, minAcc);
+		descriptionTests++;
+//		System.out.println(description + " " + accuracy);
 		if(accuracy == -1) {
 			return false;
 		}
@@ -309,7 +349,7 @@
 		// maybe add to best descriptions (method keeps set size fixed);
 		// we need to make sure that this does not get called more often than
 		// necessary since rewriting is expensive
-		boolean isCandidate = (bestEvaluatedDescriptions.size()==0);
+		boolean isCandidate = !bestEvaluatedDescriptions.isFull();
 		if(!isCandidate) {
 			EvaluatedDescription worst = bestEvaluatedDescriptions.getWorst();
 			double accThreshold = worst.getAccuracy();
@@ -391,6 +431,7 @@
 		nodes = new TreeSet<OENode>(new OEHeuristicRuntime());
 		descriptions = new TreeSet<Description>(new ConceptComparator());
 		bestEvaluatedDescriptions.getSet().clear();
+		descriptionTests = 0;
 	}
 	
 	@Override
@@ -419,15 +460,15 @@
 	}	
 	
 	private String getSolutionString() {
-		int max = 10;
+//		int max = 10;
 		int current = 1;
 		String str = "";
 		for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet().descendingSet()) {
 			str += current + ": " + descriptionToString(ed.getDescription()) + " " + dfPercent.format(ed.getAccuracy()) + "\n";
 			current++;
-			if(current == max) {
-				break;
-			}
+//			if(current == max) {
+//				break;
+//			}
 		}
 		return str;
 	}

Modified: trunk/src/dl-learner/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/algorithms/celoe/OEHeuristicRuntime.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -35,21 +35,28 @@
 	private double expansionPenaltyFactor = 0.1;
 	// bonus for being better than parent node
 	private double gainBonusFactor = 0.3;
-	// penalty if a node has very many children since exploring such a node is
-	// computationally very expensive
-	private double nodeChildPenalty = 0.0005;
+	// penalty if a node description has very many refinements since exploring 
+	// such a node is computationally very expensive
+	private double nodeRefinementPenalty = 0.0001;
 	// syntactic comparison as final comparison criterion
 	private ConceptComparator conceptComparator = new ConceptComparator();
 	
 	@Override
 	public int compare(OENode node1, OENode node2) {
+//		System.out.println("node1 " + node1);
+//		System.out.println("score: " + getNodeScore(node1));
+//		System.out.println("node2 " + node2);
+//		System.out.println("score: " + getNodeScore(node2));
+		
 		double diff = getNodeScore(node1) - getNodeScore(node2);
-		if(diff>0)
+		
+		if(diff>0) {		
 			return 1;
-		else if(diff<0)
+		} else if(diff<0) {
 			return -1;
-		else
+		} else {
 			return conceptComparator.compare(node1.getDescription(), node2.getDescription());
+		}
 	}
 
 	public double getNodeScore(OENode node) {
@@ -63,7 +70,7 @@
 		// penalty for horizontal expansion
 		score -= node.getHorizontalExpansion() * expansionPenaltyFactor;
 		// penalty for having many child nodes (stuck prevention)
-		score -= node.getChildren().size() * nodeChildPenalty;
+		score -= node.getRefinementCount() * nodeRefinementPenalty;
 		return score;
 	}	
 }

Modified: trunk/src/dl-learner/org/dllearner/algorithms/celoe/OENode.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/algorithms/celoe/OENode.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/algorithms/celoe/OENode.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -53,8 +53,13 @@
 	private OENode parent;
 	private List<OENode> children = new LinkedList<OENode>();
 	
-	DecimalFormat dfPercent = new DecimalFormat("0.00%");
+	// the refinement count corresponds to the number of refinements of the
+	// description in this node - it is a better heuristic indicator than child count
+	// (and avoids the problem that adding children changes the heuristic value)
+	private int refinementCount = 0;
 	
+	private static DecimalFormat dfPercent = new DecimalFormat("0.00%");
+	
 	public OENode(OENode parentNode, Description description, double accuracy) {
 		this.parent = parentNode;
 		this.description = description;
@@ -113,10 +118,16 @@
 		String ret = description.toString(baseURI,null) + " [";
 		ret += "acc:" + dfPercent.format(accuracy) + ", ";
 		ret += "he:" + horizontalExpansion + ", ";
-		ret += "c:" + children.size() + "]";
+		ret += "c:" + children.size() + ", ";
+		ret += "ref:" + refinementCount + "]";
 		return ret;
-	}	
+	}
 	
+	@Override
+	public String toString() {
+		return getShortDescription(null);
+	}
+	
 	public String toTreeString() {
 		return toTreeString(0, null).toString();
 	}
@@ -136,5 +147,19 @@
 			treeString.append(child.toTreeString(depth+1,baseURI));
 		}
 		return treeString;
+	}
+
+	/**
+	 * @return the refinementCount
+	 */
+	public int getRefinementCount() {
+		return refinementCount;
+	}
+
+	/**
+	 * @param refinementCount the refinementCount to set
+	 */
+	public void setRefinementCount(int refinementCount) {
+		this.refinementCount = refinementCount;
 	}	
 }

Modified: trunk/src/dl-learner/org/dllearner/algorithms/refinement2/ROLearner2.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/algorithms/refinement2/ROLearner2.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/algorithms/refinement2/ROLearner2.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -544,11 +544,12 @@
 
 		printStatistics(true);
 
+		int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
 		if (stop) {
-			logger.info("Algorithm stopped.\n");
+			logger.info("Algorithm stopped ("+conceptTests+" descriptions tested).\n");
 		} else {
-			logger.info("Algorithm terminated succesfully.\n");
-		}
+			logger.info("Algorithm terminated succesfully ("+conceptTests+" descriptions tested).\n");
+		}		
 
 		totalLearningTime.stop();
 		isRunning = false;

Modified: trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/core/configurators/CELOEConfigurator.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -140,7 +140,7 @@
 /**
 * maxExecutionTimeInSeconds algorithm will stop after specified seconds.
 * mandatory: false| reinit necessary: true
-* default value: 0
+* default value: 10
 * @return int 
 **/
 public int getMaxExecutionTimeInSeconds() {
@@ -158,12 +158,21 @@
 /**
 * maxDepth maximum depth of description.
 * mandatory: false| reinit necessary: true
-* default value: 3
+* default value: 4
 * @return int 
 **/
 public int getMaxDepth() {
 return (Integer) ComponentManager.getInstance().getConfigOptionValue(cELOE,  "maxDepth") ;
 }
+/**
+* maxNrOfResults Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions)..
+* mandatory: false| reinit necessary: true
+* default value: 10
+* @return int 
+**/
+public int getMaxNrOfResults() {
+return (Integer) ComponentManager.getInstance().getConfigOptionValue(cELOE,  "maxNrOfResults") ;
+}
 
 /**
 * @param useAllConstructor specifies whether the universal concept constructor is used in the learning algorithm.
@@ -249,7 +258,7 @@
 /**
 * @param maxExecutionTimeInSeconds algorithm will stop after specified seconds.
 * mandatory: false| reinit necessary: true
-* default value: 0
+* default value: 10
 **/
 public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) {
 ComponentManager.getInstance().applyConfigEntry(cELOE, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds);
@@ -267,12 +276,21 @@
 /**
 * @param maxDepth maximum depth of description.
 * mandatory: false| reinit necessary: true
-* default value: 3
+* default value: 4
 **/
 public void setMaxDepth(int maxDepth) {
 ComponentManager.getInstance().applyConfigEntry(cELOE, "maxDepth", maxDepth);
 reinitNecessary = true;
 }
+/**
+* @param maxNrOfResults Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions)..
+* mandatory: false| reinit necessary: true
+* default value: 10
+**/
+public void setMaxNrOfResults(int maxNrOfResults) {
+ComponentManager.getInstance().applyConfigEntry(cELOE, "maxNrOfResults", maxNrOfResults);
+reinitNecessary = true;
+}
 
 /**
 * true, if this component needs reinitializsation.

Modified: trunk/src/dl-learner/org/dllearner/core/configurators/FastInstanceCheckerConfigurator.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/core/configurators/FastInstanceCheckerConfigurator.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/core/configurators/FastInstanceCheckerConfigurator.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -73,7 +73,7 @@
 /**
 * forallRetrievalSemantics This option controls how to interpret the all quantifier in orall r.C. The standard option isto return all those which do not have an r-filler not in C. The domain semantics is to use thosewhich are in the domain of r and do not have an r-filler not in C. The forallExists semantics is touse those which have at least one r-filler and do not have an r-filler not in C..
 * mandatory: false| reinit necessary: true
-* default value: forallExists
+* default value: standard
 * @return String 
 **/
 public String getForallRetrievalSemantics() {
@@ -101,7 +101,7 @@
 /**
 * @param forallRetrievalSemantics This option controls how to interpret the all quantifier in orall r.C. The standard option isto return all those which do not have an r-filler not in C. The domain semantics is to use thosewhich are in the domain of r and do not have an r-filler not in C. The forallExists semantics is touse those which have at least one r-filler and do not have an r-filler not in C..
 * mandatory: false| reinit necessary: true
-* default value: forallExists
+* default value: standard
 **/
 public void setForallRetrievalSemantics(String forallRetrievalSemantics) {
 ComponentManager.getInstance().applyConfigEntry(fastInstanceChecker, "forallRetrievalSemantics", forallRetrievalSemantics);

Modified: trunk/src/dl-learner/org/dllearner/core/options/CommonConfigOptions.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/core/options/CommonConfigOptions.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/core/options/CommonConfigOptions.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -19,8 +19,10 @@
  */
 package org.dllearner.core.options;
 
+import org.dllearner.core.LearningAlgorithm;
 
 
+
 /**
  * Contains methods for creating common configuration options, i.e. options
  * which are or may be of use for several components. 
@@ -169,6 +171,13 @@
 		return new IntegerConfigOption("guaranteeXgoodDescriptions", "algorithm will run until X good (100%) concept descritpions are found",guaranteeXgoodDescriptionsDefault);
 	}
 	
+	public static IntegerConfigOption maxNrOfResults(int defaultValue) {
+		IntegerConfigOption opt = new IntegerConfigOption("maxNrOfResults", "Sets the maximum number of results one is interested in. (Setting this to a lower value may increase performance as the learning algorithm has to store/evaluate/beautify less descriptions).", defaultValue);
+		opt.setLowerLimit(1);
+		opt.setUpperLimit(LearningAlgorithm.MAX_NR_OF_RESULTS);
+		return opt;
+	}
+	
 	public static IntegerConfigOption maxClassDescriptionTests() {
 		return new IntegerConfigOption("maxClassDescriptionTests", "The maximum number of candidate hypothesis the algorithm is allowed to test (0 = no limit). The algorithm will stop afterwards. " +
 				"(The real number of tests can be slightly higher, because this criterion usually won't be checked after each single test.)",maxClassDescriptionTestsDefault);

Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -20,7 +20,10 @@
 package org.dllearner.learningproblems;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
 
@@ -51,6 +54,9 @@
 	private boolean equivalence = true;
 	private ClassLearningProblemConfigurator configurator;
 	
+	// instances of super classes excluding instances of the class itself
+	private List<Individual> superClassInstances;
+	
 	@Override
 	public ClassLearningProblemConfigurator getConfigurator(){
 		return configurator;
@@ -83,6 +89,20 @@
 		
 		classInstances = reasoner.getIndividuals(classToDescribe);
 		equivalence = (configurator.getType().equals("equivalence"));
+		
+		// we compute the instances of the super class to perform
+		// optimisations later on
+		Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
+		TreeSet<Individual> superClassInstancesTmp = new TreeSet<Individual>(reasoner.getIndividuals());
+		for(Description superClass : superClasses) {
+			superClassInstancesTmp.retainAll(reasoner.getIndividuals(superClass));
+		}
+		superClassInstancesTmp.removeAll(classInstances);
+		// since we use the instance list for approximations, we want to avoid
+		// any bias through URI names, so we shuffle the list once pseudo-randomly
+		superClassInstances = new LinkedList<Individual>(superClassInstancesTmp);
+		Random rand = new Random(1);
+		Collections.shuffle(superClassInstances, rand);
 	}
 	
 	/**
@@ -148,11 +168,88 @@
 		return 0.5d * (coverage + protusion);
 	}
 
+	@Override
+	public double getAccuracyOrTooWeak(Description description, double minAccuracy) {
+		// instead of using the standard operation, we use optimisation
+		// and approximation here
+		
+		// we abort when there are too many uncovered positives
+		int maxNotCovered = (int) Math.ceil(minAccuracy*classInstances.size());
+		int instancesCovered = 0;
+		int instancesNotCovered = 0;
+		
+		for(Individual ind : classInstances) {
+			if(reasoner.hasType(description, ind)) {
+				instancesCovered++;
+//				System.out.println("covered");
+			} else {
+//				System.out.println(ind + " not covered.");
+				instancesNotCovered ++;
+				if(instancesNotCovered > maxNotCovered) {
+					return -1;
+				}
+			}
+		}	
+		
+		double coverage = instancesCovered/(double)classInstances.size();
+		
+		// we know that a definition candidate is always subclass of the
+		// intersection of all super classes, so we test only the relevent instances
+		// (leads to undesired effects for descriptions not following this rule,
+		// but improves performance a lot);
+		// for learning a superclass of a defined class, similar observations apply;
+
+		// we only test 10 * instances covered; while this is only an
+		// approximation, it is unlikely that further tests will have any
+		// significant impact on the overall accuracy
+		int maxTests = 10 * instancesCovered;
+//		int tests = Math.min(maxTests, superClassInstances.size());
+		int testsPerformed = 0;
+		int instancesDescription = 0;
+		
+		for(Individual ind : superClassInstances) {
+			
+//			System.out.println(ind);
+			
+			if(reasoner.hasType(description, ind)) {
+//				System.out.println("ind: " + ind);
+				instancesDescription++;
+			}
+			
+			testsPerformed++;
+			
+			if(testsPerformed > maxTests) {
+//				System.out.println(testsPerformed);
+//				System.out.println("estimating accuracy by random sampling");
+				// estimate for the number of instances of the description
+				instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size());
+				break;
+			}
+		}
+		
+//		System.out.println(description);
+//		System.out.println("A and C: " + instancesCovered);
+//		System.out.println("instances description: " + instancesDescription);
+		
+		// since we measured/estimated accuracy only on instances outside A (superClassInstances
+		// does not include instances of A), we need to add it in the denominator
+		double protusion = instancesCovered/(double)(instancesDescription+instancesCovered);
+		
+//		System.out.println(description);
+//		System.out.println(instancesDescription);
+//		System.out.println("prot: " + protusion);
+		
+		double acc =  0.5d * (coverage + protusion);
+		
+//		System.out.println("acc: " + acc);
+		
+		return acc;
+	}	
+	
 	/* (non-Javadoc)
 	 * @see org.dllearner.core.LearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double)
 	 */
-	@Override
-	public double getAccuracyOrTooWeak(Description description, double minAccuracy) {
+	public double getAccuracyOrTooWeakStandard(Description description, double minAccuracy) {
 		// since we have to perform a retrieval operation anyway, we cannot easily
 		// get a benefit from the accuracy limit
 		double accuracy = getAccuracy(description);

Modified: trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/reasoning/FastInstanceChecker.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -295,7 +295,7 @@
 			// the union, we return true
 			List<Description> children = description.getChildren();
 			for (Description child : children) {
-				if (hasType(child, individual)) {
+				if (hasTypeImpl(child, individual)) {
 					return true;
 				}
 			}
@@ -305,7 +305,7 @@
 			// the union, we return true
 			List<Description> children = description.getChildren();
 			for (Description child : children) {
-				if (!hasType(child, individual)) {
+				if (!hasTypeImpl(child, individual)) {
 					return false;
 				}
 			}
@@ -330,7 +330,7 @@
 				return false;
 			}
 			for (Individual roleFiller : roleFillers) {
-				if (hasType(child, roleFiller)) {
+				if (hasTypeImpl(child, roleFiller)) {
 					return true;
 				}
 			}
@@ -355,7 +355,7 @@
 				return true;
 			}
 			for (Individual roleFiller : roleFillers) {
-				if (!hasType(child, roleFiller)) {
+				if (!hasTypeImpl(child, roleFiller)) {
 					return false;
 				}
 			}
@@ -395,7 +395,7 @@
 			int index = 0;
 			for (Individual roleFiller : roleFillers) {
 				index++;
-				if (hasType(child, roleFiller)) {
+				if (hasTypeImpl(child, roleFiller)) {
 					nrOfFillers++;
 					if (nrOfFillers == number) {
 						return true;
@@ -438,7 +438,7 @@
 			int index = 0;
 			for (Individual roleFiller : roleFillers) {
 				index++;
-				if (hasType(child, roleFiller)) {
+				if (hasTypeImpl(child, roleFiller)) {
 					nrOfFillers++;
 					if (nrOfFillers > number) {
 						return false;

Modified: trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/refinementoperators/RhoDRDown.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -187,9 +187,10 @@
 		this.rs = reasoningService;
 	}
 	
-	public RhoDRDown(ReasonerComponent reasoner, ClassHierarchy subHierarchy, RefinementOperatorConfigurator configurator) {
+	public RhoDRDown(ReasonerComponent reasoner, ClassHierarchy subHierarchy, Description startClass, RefinementOperatorConfigurator configurator) {
 		this.rs = reasoner;
 		this.subHierarchy = subHierarchy;
+		this.startClass = startClass;
 		useCardinalityRestrictions = configurator.getUseCardinalityRestrictions();
 		// TODO add more options from configurator object
 		init();

Modified: trunk/src/dl-learner/org/dllearner/test/junit/RefinementOperatorTests.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/test/junit/RefinementOperatorTests.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/test/junit/RefinementOperatorTests.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -35,6 +35,7 @@
 import org.dllearner.parser.ParseException;
 import org.dllearner.reasoning.OWLAPIReasoner;
 import org.dllearner.refinementoperators.RhoDRDown;
+import org.dllearner.test.junit.TestOntologies.TestOntology;
 import org.junit.Test;
 
 /**
@@ -94,6 +95,26 @@
 		}
 	}
 	
+	@Test
+	public void rhoDRDownTest2() throws ParseException {
+		ReasonerComponent reasoner = TestOntologies.getTestOntology(TestOntology.EPC_OE);
+		baseURI = reasoner.getBaseURI();
+		
+		RhoDRDown op = new RhoDRDown(reasoner);
+		Description concept = KBParser.parseConcept("(\"http://localhost/aris/sap_model.owl#EPC\" AND EXISTS \"http://localhost/aris/sap_model.owl#hasModelElements\".\"http://localhost/aris/sap_model.owl#Object\")");
+		Set<Description> results = op.refine(concept, 6);
+
+		for(Description result : results) {
+			System.out.println(result);
+		}
+			
+		int desiredResultSize = 141;
+		if(results.size() != desiredResultSize) {
+			System.out.println(results.size() + " results found, but should be " + desiredResultSize + ".");
+		}
+		assertTrue(results.size()==desiredResultSize);
+	}
+	
 	private String uri(String name) {
 		return "\""+baseURI+name+"\"";
 	}

Modified: trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/test/junit/TestOntologies.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -41,7 +41,7 @@
  */
 public final class TestOntologies {
 
-	public enum TestOntology { EMPTY, SIMPLE, SIMPLE_NO_DR, SIMPLE_NO_DISJOINT, SIMPLE_NO_DR_DISJOINT, SIMPLE2, SIMPLE3, R1SUBR2, DATA1, FIVE_ROLES, FATHER_OE };
+	public enum TestOntology { EMPTY, SIMPLE, SIMPLE_NO_DR, SIMPLE_NO_DISJOINT, SIMPLE_NO_DR_DISJOINT, SIMPLE2, SIMPLE3, R1SUBR2, DATA1, FIVE_ROLES, FATHER_OE, CARCINOGENESIS, EPC_OE };
 	
 	public static ReasonerComponent getTestOntology(TestOntology ont) {
 		String kbString = "";
@@ -111,6 +111,10 @@
 			kbString += "r5(a,b).\n";
 		} else if(ont.equals(TestOntology.FATHER_OE)) {
 			owlFile = "examples/family/father_oe.owl";
+		} else if(ont.equals(TestOntology.CARCINOGENESIS)) {
+			owlFile = "examples/carcinogenesis/carcinogenesis.owl";
+		} else if(ont.equals(TestOntology.EPC_OE)) {
+			owlFile = "examples/epc/sap_epc_oe.owl";
 		}
 		
 		try {	

Modified: trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java	2009-02-23 16:08:34 UTC (rev 1624)
+++ trunk/src/dl-learner/org/dllearner/utilities/owl/EvaluatedDescriptionSet.java	2009-02-24 16:08:18 UTC (rev 1625)
@@ -112,4 +112,11 @@
 	public String toString() {
 		return set.toString();
 	}
+
+	/**
+	 * @return the maxSize
+	 */
+	public int getMaxSize() {
+		return maxSize;
+	}
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.