From: <jen...@us...> - 2010-07-14 08:58:29
|
Revision: 2197 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2197&view=rev Author: jenslehmann Date: 2010-07-14 08:58:23 +0000 (Wed, 14 Jul 2010) Log Message: ----------- - further work unit tests for heuristics of class learning problem - max. execution time of computing a heuristic value can now be configured in class learning problem (avoids that the algorithm gets stuck in case a heuristic value is very expensive to compute) Modified Paths: -------------- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java Modified: trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java =================================================================== --- trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java 2010-07-13 17:02:04 UTC (rev 2196) +++ trunk/src/dl-learner/org/dllearner/core/configurators/ClassLearningProblemConfigurator.java 2010-07-14 08:58:23 UTC (rev 2197) @@ -108,6 +108,15 @@ public boolean getCheckConsistency() { return (Boolean) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem, "checkConsistency") ; } +/** +* maxExecutionTimeInSeconds algorithm will stop after specified seconds. +* mandatory: false| reinit necessary: true +* default value: 10 +* @return int +**/ +public int getMaxExecutionTimeInSeconds() { +return (Integer) ComponentManager.getInstance().getConfigOptionValue(classLearningProblem, "maxExecutionTimeInSeconds") ; +} /** * @param classToDescribe class of which a description should be learned. @@ -162,6 +171,15 @@ ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "checkConsistency", checkConsistency); reinitNecessary = true; } +/** +* @param maxExecutionTimeInSeconds algorithm will stop after specified seconds. +* mandatory: false| reinit necessary: true +* default value: 10 +**/ +public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds) { +ComponentManager.getInstance().applyConfigEntry(classLearningProblem, "maxExecutionTimeInSeconds", maxExecutionTimeInSeconds); +reinitNecessary = true; +} /** * true, if this component needs reinitializsation. Modified: trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2010-07-13 17:02:04 UTC (rev 2196) +++ trunk/src/dl-learner/org/dllearner/learningproblems/ClassLearningProblem.java 2010-07-14 08:58:23 UTC (rev 2197) @@ -1,5 +1,5 @@ /** - * Copyright (C) 2007-2009, Jens Lehmann + * Copyright (C) 2007-2010, Jens Lehmann * * This file is part of DL-Learner. * @@ -34,6 +34,7 @@ import org.dllearner.core.ReasonerComponent; import org.dllearner.core.configurators.ClassLearningProblemConfigurator; import org.dllearner.core.options.BooleanConfigOption; +import org.dllearner.core.options.CommonConfigOptions; import org.dllearner.core.options.ConfigOption; import org.dllearner.core.options.DoubleConfigOption; import org.dllearner.core.options.StringConfigOption; @@ -45,6 +46,7 @@ import org.dllearner.core.owl.NamedClass; import org.dllearner.core.owl.Negation; import org.dllearner.core.owl.SubClassAxiom; +import org.dllearner.learningproblems.Heuristics.HeuristicType; import org.dllearner.utilities.Helper; /** @@ -59,36 +61,28 @@ private static Logger logger = Logger.getLogger(ClassLearningProblem.class); private long nanoStartTime; private static int maxExecutionTimeInSeconds = 10; - - // TODO: naming needs to be cleaned up for consistency: - // coverage => recall - // protusion => precision private NamedClass classToDescribe; private List<Individual> classInstances; private TreeSet<Individual> classInstancesSet; private boolean equivalence = true; private ClassLearningProblemConfigurator configurator; - // approximation of accuracy +- 0.05 % + // approximation of accuracy private double approx = 0.05; private boolean useApproximations; -// private boolean useFMeasure; - // factor for higher weight on coverage (needed for subclass learning) + // factor for higher weight on recall (needed for subclass learning) private double coverageFactor; // instances of super classes excluding instances of the class itself private List<Individual> superClassInstances; // instances of super classes including instances of the class itself private List<Individual> classAndSuperClassInstances; - // specific variables for generalised F-measure -// private Set<Individual> dcPos; => not need, is the same as classInstances private TreeSet<Individual> negatedClassInstances; - private enum HeuristicType { PRED_ACC, OWN, JACCARD, FMEASURE, GEN_FMEASURE }; - private HeuristicType heuristic = HeuristicType.OWN; + private HeuristicType heuristic = HeuristicType.AMEASURE; @Override public ClassLearningProblemConfigurator getConfigurator(){ @@ -116,7 +110,8 @@ accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "pred_acc", "generalised_fmeasure", "jaccard"}); options.add(accMethod); BooleanConfigOption consistency = new BooleanConfigOption("checkConsistency", "Specify whether to check consistency for solution candidates. This is convenient for user interfaces, but can be performance intensive.", true); - options.add(consistency); + options.add(consistency); + options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10)); return options; } @@ -131,7 +126,7 @@ String accM = configurator.getAccuracyMethod(); if(accM.equals("standard")) { - heuristic = HeuristicType.OWN; + heuristic = HeuristicType.AMEASURE; } else if(accM.equals("fmeasure")) { heuristic = HeuristicType.FMEASURE; } else if(accM.equals("generalised_fmeasure")) { @@ -142,7 +137,7 @@ heuristic = HeuristicType.PRED_ACC; } - if(useApproximations && !(heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE))) { + if(useApproximations && !(heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) { throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\""); } @@ -161,6 +156,7 @@ classInstancesSet = new TreeSet<Individual>(classInstances); equivalence = (configurator.getType().equals("equivalence")); + maxExecutionTimeInSeconds = configurator.getMaxExecutionTimeInSeconds(); if(equivalence) { coverageFactor = 1; @@ -199,25 +195,13 @@ // System.out.println(classInstances.size() + " " + superClassInstances.size()); } - - /** - * Computes the fraction of the instances of the class to learn, which - * is covered by the given description. - * @param description The description for which to compute coverage. - * @return The class coverage (between 0 and 1). - */ -// public double getCoverage(Description description) { -// int instancesCovered = 0; -// for(Individual instance : classInstances) { -// if(reasoner.hasType(description, instance)) { -// instancesCovered++; -// } -// } -// return instancesCovered/(double)classInstances.size(); -// } - + @Override public ClassScore computeScore(Description description) { + + // TODO: reuse code to ensure that we never return inconsistent results + // between getAccuracy, getAccuracyOrTooWeak and computeScore + // overhang Set<Individual> additionalInstances = new TreeSet<Individual>(); for(Individual ind : superClassInstances) { @@ -234,16 +218,16 @@ } } - double coverage = coveredInstances.size()/(double)classInstances.size(); - double protusion = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size()); + double recall = coveredInstances.size()/(double)classInstances.size(); + double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size()); // for each description with less than 100% coverage, we check whether it is // leads to an inconsistent knowledge base double acc = 0; if(heuristic.equals(HeuristicType.FMEASURE)) { - acc = getFMeasure(coverage, protusion); - } else if(heuristic.equals(HeuristicType.OWN)) { - acc = getAccuracy(coverage, protusion); + acc = getFMeasure(recall, precision); + } else if(heuristic.equals(HeuristicType.AMEASURE)) { + acc = getAccuracy(recall, precision); } else { // TODO: some superfluous instance checks are required to compute accuracy => // move accuracy computation here if possible @@ -264,10 +248,10 @@ boolean isConsistent = followsFromKB || isConsistent(description); // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion); - return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), coverage, additionalInstances, protusion, acc, isConsistent, followsFromKB); + return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB); } else { - return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), coverage, additionalInstances, protusion, acc); + return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc); } } @@ -280,27 +264,8 @@ */ @Override public double getAccuracy(Description description) { - - // overhang - int additionalInstances = 0; - for(Individual ind : superClassInstances) { - if(reasoner.hasType(description, ind)) { - additionalInstances++; - } - } - - // coverage - int coveredInstances = 0; - for(Individual ind : classInstances) { - if(reasoner.hasType(description, ind)) { - coveredInstances++; - } - } - - double coverage = coveredInstances/(double)classInstances.size(); - double protusion = additionalInstances == 0 ? 0 : coveredInstances/(double)(coveredInstances+additionalInstances); - - return getAccuracy(coverage, protusion); + // a noise value of 1.0 means that we never return too weak (-1.0) + return getAccuracyOrTooWeak(description, 1.0); } @Override @@ -501,14 +466,10 @@ } } - // TODO: easier computation |R(A) \cap R(C)| / |R(A) \cup R(C)| - - // for Jaccard: covered instances is the intersection of the sets - // R(A) and R(C); Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet); - return (1 - (union.size() - coveredInstancesSet.size()) / (double) union.size()); + return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size()); - } else if (heuristic.equals(HeuristicType.OWN) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { + } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) { // computing R(C) restricted to relevant instances int additionalInstances = 0; @@ -542,7 +503,7 @@ double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances); - if(heuristic.equals(HeuristicType.OWN)) { + if(heuristic.equals(HeuristicType.AMEASURE)) { // best reachable concept has same recall and precision 1: // 1/t+1 * (t*r + 1) if((coverageFactor*recall+1)/(double)(coverageFactor+1) <(1-noise)) { Modified: trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java =================================================================== --- trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java 2010-07-13 17:02:04 UTC (rev 2196) +++ trunk/src/dl-learner/org/dllearner/learningproblems/Heuristics.java 2010-07-14 08:58:23 UTC (rev 2197) @@ -29,6 +29,8 @@ */ public class Heuristics { + public static enum HeuristicType { PRED_ACC, AMEASURE, JACCARD, FMEASURE, GEN_FMEASURE }; + /** * Computes F1-Score. * @param recall Recall. @@ -164,4 +166,10 @@ return (noise * nrOfPositiveExamples) < nrOfNegClassifiedPositives; } + // TODO: F-Measure mit bereits gemessenem |R(A) \cap R(C)| und |R(C)\R(A)| soll approximiert werden + public double getFMeasureApproximation() { + // TOOD: return mean and interval length? + return 0; + } + } Modified: trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java =================================================================== --- trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-13 17:02:04 UTC (rev 2196) +++ trunk/src/dl-learner/org/dllearner/test/junit/HeuristicTests.java 2010-07-14 08:58:23 UTC (rev 2197) @@ -19,10 +19,23 @@ */ package org.dllearner.test.junit; +import static org.junit.Assert.*; + +import java.net.MalformedURLException; +import java.net.URL; + +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.ComponentManager; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.ReasonerComponent; import org.dllearner.core.owl.ClassAssertionAxiom; +import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; import org.dllearner.core.owl.KB; import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.KBFile; +import org.dllearner.learningproblems.ClassLearningProblem; +import org.dllearner.reasoning.OWLAPIReasoner; import org.junit.Test; /** @@ -33,22 +46,80 @@ */ public class HeuristicTests { + // when comparing heuristic values, this is the maximum allowed difference between actual and returned value + // (there can always be precision errors, so cannot assume that actual and returned values are exactly equal) + private static double delta = 0.000001; + @Test - public void classLearningTests() { + public void classLearningTests() throws ComponentInitException, MalformedURLException { // create artificial ontology KB kb = new KB(); + String ns = "http://dl-learner.org/junit/"; NamedClass[] nc = new NamedClass[5]; for(int i=0; i<5; i++) { - nc[i] = new NamedClass("A" + i); + nc[i] = new NamedClass(ns + "A" + i); } - Individual[] ind = new Individual[5]; + Individual[] ind = new Individual[100]; for(int i=0; i<100; i++) { - ind[i] = new Individual("a" + i); + ind[i] = new Individual(ns + "i" + i); } + // A1 has instances i0 to i19 for(int i=0; i<20; i++) { kb.addAxiom(new ClassAssertionAxiom(nc[0],ind[i])); } + + // A2 has instances i10 to i29 + for(int i=10; i<30; i++) { + kb.addAxiom(new ClassAssertionAxiom(nc[1],ind[i])); + } + + ComponentManager cm = ComponentManager.getInstance(); + KnowledgeSource ks = new KBFile(kb); + ReasonerComponent reasoner = cm.reasoner(OWLAPIReasoner.class, ks); + ClassLearningProblem problem = cm.learningProblem(ClassLearningProblem.class, reasoner); + ks.init(); + reasoner.init(); + + // evaluate A2 wrt. A1 using Jaccard + HeuristicTests.configureClassLP(problem, nc[0], "jaccard"); + // the value should be 10 (i10-i19) divided by 30 (i0-i29) + assertEqualsClassLP(problem, nc[1], 1/(double)3); + } + // the class learning problem provides several ways to get the accuracy of a description, this method + // tests all of those + private static void assertEqualsClassLP(ClassLearningProblem problem, Description description, double accuracy) { + assertEquals(accuracy, problem.getAccuracy(description), delta); + assertEquals(accuracy, problem.getAccuracyOrTooWeak(description, 1.0), delta); + assertEquals(accuracy, problem.computeScore(description).getAccuracy(), delta); + assertEquals(accuracy, problem.evaluate(description).getAccuracy(), delta); + } + + // convencience method to set the learning problem to a desired configuration (approximations disabled) + private static void configureClassLP(ClassLearningProblem problem, NamedClass classToDescribe, String accuracyMethod) throws ComponentInitException { + try { + problem.getConfigurator().setClassToDescribe(new URL(classToDescribe.getName())); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + problem.getConfigurator().setAccuracyMethod(accuracyMethod); + problem.getConfigurator().setUseApproximations(false); + problem.init(); + } + + // convencience method to set the learning problem to a desired configuration + private static void configureClassLP(ClassLearningProblem problem, NamedClass classToDescribe, String accuracyMethod, boolean useApproximations, double approxAccuracy) throws ComponentInitException { + try { + problem.getConfigurator().setClassToDescribe(new URL(classToDescribe.getName())); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + problem.getConfigurator().setAccuracyMethod(accuracyMethod); + problem.getConfigurator().setUseApproximations(useApproximations); + problem.getConfigurator().setApproxAccuracy(approxAccuracy); + problem.init(); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |