[DL-Learner SVN] SF.net SVN: dl-learner: [426] trunk/src

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 426
          http://dl-learner.svn.sourceforge.net/dl-learner/?rev=426&view=rev
Author:   jenslehmann
Date:     2008-01-24 04:52:57 -0800 (Thu, 24 Jan 2008)

Log Message:
-----------
- cleanup in new learning algorithm
- php-client renamed to php-client-old as it is not longer working with the current DL-Learner web service

Modified Paths:
--------------
    trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java
    trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java

Added Paths:
-----------
    trunk/src/php-client-old/

Removed Paths:
-------------
    trunk/src/php-client/

Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java
===================================================================

--- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java	2008-01-24 10:13:14 UTC (rev 425)
+++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLComponent.java	2008-01-24 12:52:57 UTC (rev 426)
@@ -104,6 +104,7 @@
 	private boolean useAllConstructor = true;
 	private boolean useExistsConstructor = true;
 	private boolean useNegation = true;	
+	private double noisePercentage = 0.0;
 	
 	// Variablen zur Einstellung der Protokollierung
 	// boolean quiet = false;
@@ -159,7 +160,11 @@
 		options.add(CommonConfigOptions.ignoredRoles());
 		options.add(CommonConfigOptions.useAllConstructor());
 		options.add(CommonConfigOptions.useExistsConstructor());
-		options.add(CommonConfigOptions.useNegation());		
+		options.add(CommonConfigOptions.useNegation());
+		DoubleConfigOption noisePercentage = new DoubleConfigOption("noisePercentage", "the (approximated) percentage of noise within the examples");
+		noisePercentage.setLowerLimit(0.0);
+		noisePercentage.setUpperLimit(1.0);
+		options.add(noisePercentage);
 		return options;
 	}
 	
@@ -208,6 +213,8 @@
 			useExistsConstructor = (Boolean) entry.getValue();
 		} else if(name.equals("useNegation")) {
 			useNegation = (Boolean) entry.getValue();
+		} else if(name.equals("noisePercentage")) {
+			noisePercentage = (Double) entry.getValue();
 		}
 			
 	}
@@ -282,8 +289,9 @@
 				learningProblem,
 				operator,
 				algHeuristic,
-				usedConcepts,
-				usedRoles,
+				// usedConcepts,
+				// usedRoles,
+				noisePercentage,
 				writeSearchTree,
 				replaceSearchTree,
 				searchTreeFile,
@@ -291,6 +299,9 @@
 				useOverlyGeneralList,
 				useShortConceptConstruction
 		);		
+		// note: used concepts and roles do not need to be passed
+		// as argument, because it is sufficient to prepare the
+		// concept and role hierarchy accordingly
 	}
 	
 	public static String getName() {

Modified: trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java
===================================================================
--- trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java	2008-01-24 10:13:14 UTC (rev 425)
+++ trunk/src/dl-learner/org/dllearner/algorithms/refexamples/ExampleBasedROLearner.java	2008-01-24 12:52:57 UTC (rev 426)
@@ -21,7 +21,6 @@
 
 import java.io.File;
 import java.text.DecimalFormat;
-import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -29,13 +28,12 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.apache.log4j.Logger;
 import org.dllearner.algorithms.refinement.RefinementOperator;
 import org.dllearner.algorithms.refinement.RhoDown;
 import org.dllearner.core.LearningProblem;
 import org.dllearner.core.ReasoningService;
 import org.dllearner.core.Score;
-import org.dllearner.core.dl.AtomicConcept;
-import org.dllearner.core.dl.AtomicRole;
 import org.dllearner.core.dl.Concept;
 import org.dllearner.core.dl.MultiConjunction;
 import org.dllearner.core.dl.MultiDisjunction;
@@ -51,98 +49,86 @@
  * Implements the example based refinement operator learning
  * approach.
  * 
+ * TODO: Implement noise handling (here and/or in heuristics). When the 
+ * noise level in the examples is set to a certain percentage, we
+ * compute the number of positive examples corresponding to this
+ * percentage. This is the maximum number of positive examples, which
+ * can be misclassified (everything above is considered a too weak
+ * concept wrt. the noise percentage).
+ * 
  * @author Jens Lehmann
  *
  */
 public class ExampleBasedROLearner {
 
-	// all configuration options, which were passed to the
-	// learning algorithms
+	private static Logger logger = Logger
+	.getLogger(ExampleBasedROLearner.class);	
+	
+	// basic setup: learning problem and reasoning service
+	private ReasoningService rs;
+	private PosNegLP learningProblem;
+	private PosOnlyDefinitionLP posOnlyLearningProblem;
+	private boolean posOnly = false;	
+	
+	// search tree options
 	private boolean writeSearchTree;
 	private File searchTreeFile;
 	private boolean replaceSearchTree = false;
-	Set<AtomicConcept> allowedConcepts;
-	Set<AtomicRole> allowedRoles;
-	Set<AtomicConcept> ignoredConcepts;
-	Set<AtomicRole> ignoredRoles;
-	// these are computed as the result of the previous four settings
-	Set<AtomicConcept> usedConcepts;
-	Set<AtomicRole> usedRoles;	
 
+	// constructs to improve performance
 	private boolean useTooWeakList = true;
 	private boolean useOverlyGeneralList = true;
 	private boolean useShortConceptConstruction = true;
-	private double horizontalExpansionFactor = 0.6;
-
 	
-	private boolean quiet = false;
-	
+	// setting to true gracefully stops the algorithm
 	private boolean stop = false;
 	
-	private ReasoningService rs;
+	// solution protocol
+	private boolean solutionFound = false;
+	private List<Concept> solutions = new LinkedList<Concept>();	
 	
-	private PosNegLP learningProblem;
-	private PosOnlyDefinitionLP posOnlyLearningProblem;
-	private boolean posOnly = false;
+	// used refinement operator and heuristic (exchangeable)
+	private RhoDown operator;
+	// private ExampleBasedHeuristic heuristic;
 	
-	// non-configuration variables	
-
-	// Lösungen protokollieren
-	boolean solutionFound = false;
-	List<Concept> solutions = new LinkedList<Concept>();	
+	// specifies whether to compute and log benchmark information
+	private boolean computeBenchmarkInformation = false;
 	
-	// verwendeter Refinement-Operator (momentan werden für Statistik RhoDown-spezifische
-	// Sachen abgefragt)
-	// RefinementOperator operator;
-	RhoDown operator;
-	
-	// Variablen zur Einstellung der Protokollierung
-	// boolean quiet = false;
-	boolean showBenchmarkInformation = false;
-	// boolean createTreeString = false;
-	// String searchTree = new String();
-
-	// Konfiguration des Algorithmus
-	// Faktor für horizontale Erweiterung (notwendig für completeness)
-	// double horizontalExpansionFactor = 0.6;	
-
-	
-	
-	private Comparator<ExampleBasedNode> nodeComparator;
+	// comparator used to maintain a stable ordering of nodes, i.e.
+	// an ordering which does not change during the run of the algorithm
 	private NodeComparatorStable nodeComparatorStable = new NodeComparatorStable();
+	// stable candidate set; it has no functional part in the algorithm,
+	// but is a list of the currently best concepts found
+	private TreeSet<ExampleBasedNode> candidatesStable = new TreeSet<ExampleBasedNode>(nodeComparatorStable);
+	
+	// comparator used to create ordered sets of concepts
 	private ConceptComparator conceptComparator = new ConceptComparator();
-	DecimalFormat df = new DecimalFormat();		
 	
-	// Menge von Kandidaten für Refinement
-	// (wird für Direktzugriff auf Baumknoten verwendet)
+	// utility variables
+	private DecimalFormat df = new DecimalFormat();		
+	
+	// candidates for refinement (used for directly accessing 
+	// nodes in the search tree)
 	private TreeSet<ExampleBasedNode> candidates;
-	// während eines Durchlaufs neu gefundene Knoten
+	
+	// new nodes found during a run of the algorithm
 	private List<ExampleBasedNode> newCandidates = new LinkedList<ExampleBasedNode>();
-	// stabiles candidate set, da sich die Knoten nach dem einfügen nicht
-	// verschieben können => das Set enthält nicht die aktuellen horizontal
-	// expansions, es dient nur dazu die besten Konzepte zu speichern; hat also
-	// keine Funktion im Algorithmus
-	private TreeSet<ExampleBasedNode> candidatesStable = new TreeSet<ExampleBasedNode>(nodeComparatorStable);
-	// vorhandene Konzepte, die irgendwann als proper eingestuft worden
+	
+	// all concepts which have been evaluated as being proper refinements
 	private SortedSet<Concept> properRefinements = new TreeSet<Concept>(conceptComparator);
-	// speichert Konzept und deren Evaluierung, um sie leicht wiederzufinden für
-	// Strategien wie Konzeptverkürzung etc.
-	// Zahl = covered negatives, -1 = too weak
-	// private Map<Concept, Integer> evaluationCache = new TreeMap<Concept, Integer>(conceptComparator);
-	// Blacklists
+
+	// blacklists
 	private SortedSet<Concept> tooWeakList = new TreeSet<Concept>(conceptComparator);
 	private SortedSet<Concept> overlyGeneralList = new TreeSet<Concept>(conceptComparator);
 		
+	// set of expanded nodes (TODO: better explanation)
 	TreeSet<ExampleBasedNode> expandedNodes = new TreeSet<ExampleBasedNode>(nodeComparatorStable);
 	
-	// statistische Variablen
+	// statistic variables
 	private int maxRecDepth = 0;
 	private int maxNrOfRefinements = 0;
 	private int maxNrOfChildren = 0;
 	private int redundantConcepts = 0;
-	int maximumHorizontalExpansion;
-	int minimumHorizontalExpansion;
-	// private int propernessTests = 0;
 	private int propernessTestsReasoner = 0;
 	private int propernessTestsAvoidedByShortConceptConstruction = 0;
 	private int propernessTestsAvoidedByTooWeakList = 0;
@@ -150,7 +136,7 @@
 	private int conceptTestsOverlyGeneralList = 0;
 	private int conceptTestsReasoner = 0;
 	
-	// Zeitvariablen
+	// time variables
 	private long algorithmStartTime;
 	private long propernessCalcTimeNs = 0;
 	private long propernessCalcReasoningTimeNs = 0;	
@@ -159,15 +145,14 @@
 	private long redundancyCheckTimeNs = 0;
 	private long evaluateSetCreationTimeNs = 0;
 	private long improperConceptsRemovalTimeNs = 0;
-	long someTimeNs = 0;
-	int someCount = 0;	
 	
 	public ExampleBasedROLearner(
 			LearningProblem learningProblem,
 			RefinementOperator operator, 
 			ExampleBasedHeuristic heuristic,
-			Set<AtomicConcept> allowedConcepts,
-			Set<AtomicRole> allowedRoles, 
+			// Set<AtomicConcept> allowedConcepts,
+			// Set<AtomicRole> allowedRoles, 
+			double noisePercentage,
 			boolean writeSearchTree, 
 			boolean replaceSearchTree, 
 			File searchTreeFile, 
@@ -183,8 +168,9 @@
 			posOnly = true;
 		}
 		
+		// this.heuristic = heuristic;
 		// candidate sets entsprechend der gewählten Heuristik initialisieren
-		candidates = new TreeSet<ExampleBasedNode>(nodeComparator);
+		candidates = new TreeSet<ExampleBasedNode>(heuristic);
 		// newCandidates = new TreeSet<Node>(nodeComparator);
 	}
 	
@@ -208,22 +194,11 @@
 			solutions.add(top);
 		
 		int loop = 0;
-
-		// Voreinstellung für horizontal expansion
-		maximumHorizontalExpansion = 0;
-		minimumHorizontalExpansion = 0;		
 		
 		algorithmStartTime = System.nanoTime();
 		
-		// TODO: effizienter Traversal der Subsumption-Hierarchie
-		// TODO: Äquivalenzen nutzen
-		// TODO: Gibt es auch eine andere Abbruchbedingung? Es könnte sein, dass irgendwann keine
-		// proper refinements mehr gefunden werden, aber wie stelle man das fest?
-		while(!solutionFound && !stop) {
+		while(!solutionFound && !stop) {		
 			
-			if(!quiet) 
-				printStatistics(false);			
-			
 			// besten Knoten nach Heuristik auswählen
 			ExampleBasedNode bestNode = candidates.last();
 			// besten Knoten erweitern			
@@ -234,67 +209,7 @@
 			candidates.add(bestNode);
 			candidates.addAll(newCandidates);
 			candidatesStable.addAll(newCandidates);			
-			
-			// minimum horizontal expansion berechnen
-			if(bestNode.getHorizontalExpansion()>maximumHorizontalExpansion)
-				maximumHorizontalExpansion = bestNode.getHorizontalExpansion();
-			minimumHorizontalExpansion = (int) Math.floor(horizontalExpansionFactor*maximumHorizontalExpansion);
-			
-			// neu: es werden solange Knoten erweitert bis wirklich jeder Knoten die
-			// notwendige minimum horizontal expansion hat
-			boolean nodesExpanded;
-			do {
-				nodesExpanded = false;
-				
-
-				// es darf nicht candidatesStable geklont werden, da diese Menge nicht
-				// aktualisiert wird, also die falschen horizontal expansions vorliegen
-				// TODO: bei Tests war die Performance der clone-Operation ganz gut, aber
-				// es skaliert natürlich nicht so gut mit größer werdenden candidate set
-				// => Lösung ist vielleicht einfach einen Iterator zu verwenden und das
-				// aktuelle Konzept gleich hier zu löschen (wird dann bei expansion wieder
-				// hinzugefügt)
-				// TreeSet<Node> candidatesClone = (TreeSet<Node>) candidates.clone();
-				newCandidates.clear();
-
-
-				// for(Node candidate : candidatesClone) {
-				Iterator<ExampleBasedNode> it = candidates.iterator();
-				List<ExampleBasedNode> changedNodes = new LinkedList<ExampleBasedNode>();
-				 while(it.hasNext()){
-					ExampleBasedNode candidate = it.next();
-					// alle Kandidaten, die nicht too weak sind und unter minimumHorizontalExpansion
-					// liegen, werden erweitert
-					if(!candidate.isTooWeak() && candidate.getHorizontalExpansion()<minimumHorizontalExpansion) {
-						// Vorsicht, falls candidates irgendwann in extendProper benutzt
-						// werden sollten! Es könnten auf diese Weise Knoten fehlen 
-						// (momentan wird candidates nur zur Auswahl des besten Knotens
-						// benutzt).
-						it.remove();
 						
-						extendNodeProper(candidate, minimumHorizontalExpansion);
-						nodesExpanded = true;
-
-						changedNodes.add(candidate);
-					}
-				}
-				 
-				long someTimeStart = System.nanoTime();
-				someCount++;				 
-				// geänderte temporär entfernte Knoten wieder hinzufügen
-				candidates.addAll(changedNodes);
-				// neu gefundene Knoten hinzufügen
-				candidates.addAll(newCandidates);
-				candidatesStable.addAll(newCandidates);
-				someTimeNs += System.nanoTime() - someTimeStart;
-
-			} while(nodesExpanded && !stop);
-			
-			//System.out.println("candidate set:");
-			//for(Node n : candidates) {
-			//	System.out.println(n);
-			//}
-			
 			if(writeSearchTree) {
 				// String treeString = "";
 				String treeString = "best expanded node: " + bestNode+ "\n";
@@ -305,13 +220,9 @@
 					}
 				}
 				expandedNodes.clear();
-				treeString += "horizontal expansion: " + minimumHorizontalExpansion + " to " + maximumHorizontalExpansion + "\n";
 				treeString += topNode.getTreeString();
 				treeString += "\n";
-				// System.out.println(treeString);
-				// searchTree += treeString + "\n";
-				// TODO: ev. immer nur einen search tree speichern und den an die
-				// Datei anhängen => spart Speicher
+
 				if(replaceSearchTree)
 					Files.createFile(searchTreeFile, treeString);
 				else
@@ -321,38 +232,16 @@
 			// Anzahl Schleifendurchläufe
 			loop++;
 			
-			if(!quiet)
-				System.out.println("--- loop " + loop + " finished ---");	
+			logger.debug("--- loop " + loop + " finished ---");	
 			
 		}
 		
-		// Suchbaum in Datei schreiben
-//		if(writeSearchTree)
-//			Files.createFile(searchTreeFile, searchTree);
-		
-		// Ergebnisausgabe
-		/*
-		System.out.println("candidate set:");
-		for(Node n : candidates) {
-			System.out.println(n);
-		}*/
-		
-		// Set<Concept> solutionsSorted = new TreeSet(conceptComparator);
-		// solutionsSorted.addAll(solutions);
-		
-		// System.out.println("retrievals:");
-		// for(Concept c : ReasoningService.retrievals) {
-		// 	System.out.println(c);
-		// }
-		
 		if(solutionFound) {
-			System.out.println();
-			System.out.println("solutions:");
+			logger.info("\nsolutions:");
 			for(Concept c : solutions) {
-				System.out.println("  " + c + " (length " + c.getLength() +", depth " + c.getDepth() + ")");
+				logger.info("  " + c + " (length " + c.getLength() +", depth " + c.getDepth() + ")");
 			}
 		}
-		System.out.println("horizontal expansion: " + minimumHorizontalExpansion + " to " + maximumHorizontalExpansion);
 		System.out.println("size of candidate set: " + candidates.size());
 		printStatistics(true);
 		
@@ -724,16 +613,13 @@
 			// searchTree += expandedNodeString + "\n";
 			System.out.println(expandedNodeString);		
 			System.out.println("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime));
-			String expansionString = "horizontal expansion: " + minimumHorizontalExpansion + " to " + maximumHorizontalExpansion;
-			// searchTree += expansionString + "\n";
-			System.out.println(expansionString);
 			System.out.println("size of candidate set: " + candidates.size());
 			// System.out.println("properness max recursion depth: " + maxRecDepth);
 			// System.out.println("max. number of one-step refinements: " + maxNrOfRefinements);
 			// System.out.println("max. number of children of a node: " + maxNrOfChildren);
 		}
 		
-		if(showBenchmarkInformation) {
+		if(computeBenchmarkInformation) {
 			
 
 			long reasoningTime = rs.getOverallReasoningTimeNs();
@@ -753,15 +639,11 @@
 			double onnfTimePercentage = 100 * ConceptTransformation.onnfTimeNs/(double)algorithmRuntime;
 			double shorteningTimePercentage = 100 * ConceptTransformation.shorteningTimeNs/(double)algorithmRuntime;
 			
-			// nur temporär 
-			double someTimePercentage = 100 * someTimeNs/(double)algorithmRuntime;			
-			
 			System.out.println("reasoning percentage: " + df.format(reasoningPercentage) + "%");
 			System.out.println("   subsumption check time: " + df.format(subPercentage) + "%");		
 			System.out.println("proper calculation percentage (wo. reasoning): " + df.format(propPercentage) + "%");
 			System.out.println("   deletion time percentage: " + df.format(deletionPercentage) + "%");
 			System.out.println("   refinement calculation percentage: " + df.format(refinementPercentage) + "%");
-			System.out.println("      some time percentage: " + df.format(someTimePercentage) + "% " + Helper.prettyPrintNanoSeconds(someTimeNs) + " " + someCount + " times");
 			System.out.println("      m calculation percentage: " + df.format(mComputationTimePercentage) + "%");
 			System.out.println("      top calculation percentage: " + df.format(topComputationTimePercentage) + "%");
 			System.out.println("   redundancy check percentage: " + df.format(redundancyCheckPercentage) + "%");

Copied: trunk/src/php-client-old (from rev 425, trunk/src/php-client)


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.