From: <lor...@us...> - 2014-05-02 19:18:37
|
Revision: 4255 http://sourceforge.net/p/dl-learner/code/4255 Author: lorenz_b Date: 2014-05-02 19:18:34 +0000 (Fri, 02 May 2014) Log Message: ----------- Continued disjunctive version of QTL. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGGMultithreaded.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Edge.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Vertex.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -3,9 +3,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.PriorityQueue; import java.util.Queue; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -27,11 +31,16 @@ import org.dllearner.core.owl.Individual; import org.dllearner.kb.OWLFile; import org.dllearner.learningproblems.AxiomScore; -import org.dllearner.learningproblems.Heuristics; import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.QueryTreeScore; import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; +import org.semanticweb.owlapi.io.ToStringRenderer; +import org.semanticweb.owlapi.util.SimpleShortFormProvider; import org.springframework.beans.factory.annotation.Autowired; +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; + +import com.google.common.collect.Sets; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.jamonapi.Monitor; @@ -50,8 +59,10 @@ private double currentlyBestScore = 0d; - private List<QueryTree<String>> posExamples; - private List<QueryTree<String>> negExamples; + private List<QueryTree<String>> currentPosExampleTrees; + private List<QueryTree<String>> currentNegExampleTrees; + + private Map<QueryTree<String>, Individual> tree2Indivual; private double coverageWeight = 1; private double specifityWeight = 0; @@ -67,6 +78,9 @@ private volatile boolean stop; private boolean isRunning; + private Monitor subMon; + private Monitor lggMon; + public QTL2() {} public QTL2(PosNegLP learningProblem, AbstractReasonerComponent reasoner) throws LearningProblemUnsupportedException{ @@ -87,17 +101,31 @@ public void init() throws ComponentInitException { logger.info("Initializing..."); treeCache = new QueryTreeCache(model); + tree2Indivual = new HashMap<QueryTree<String>, Individual>(lp.getPositiveExamples().size()+lp.getNegativeExamples().size()); - posExamples = new ArrayList<QueryTree<String>>(); - negExamples = new ArrayList<QueryTree<String>>(); + currentPosExampleTrees = new ArrayList<QueryTree<String>>(lp.getPositiveExamples().size()); + currentNegExampleTrees = new ArrayList<QueryTree<String>>(lp.getNegativeExamples().size()); //get the query trees + QueryTree<String> queryTree; for (Individual ind : lp.getPositiveExamples()) { - posExamples.add(treeCache.getQueryTree(ind.getName())); + queryTree = treeCache.getQueryTree(ind.getName()); + tree2Indivual.put(queryTree, ind); + currentPosExampleTrees.add(queryTree); } for (Individual ind : lp.getNegativeExamples()) { - negExamples.add(treeCache.getQueryTree(ind.getName())); + queryTree = treeCache.getQueryTree(ind.getName()); + tree2Indivual.put(queryTree, ind); + currentNegExampleTrees.add(treeCache.getQueryTree(ind.getName())); } + + //some logging + subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); + lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + + //console rendering of class expressions + ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); + ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); } /* (non-Javadoc) @@ -111,11 +139,8 @@ long startTime = System.currentTimeMillis(); currentlyBestScore = 0d; - Monitor subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); - Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + initTodoList(currentPosExampleTrees, currentNegExampleTrees); - init(posExamples, negExamples); - EvaluatedQueryTree<String> currentElement; do{ logger.trace("TODO list size: " + todoList.size()); @@ -134,7 +159,7 @@ //evaluate the LGG - EvaluatedQueryTree<String> solution = evaluate(lgg); + EvaluatedQueryTree<String> solution = evaluate(lgg, true); if(solution.getScore() >= currentlyBestScore){ //add to todo list, if not already contained in todo list or solution list @@ -226,35 +251,48 @@ stop = true; } - private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree){ + private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree, boolean useSpecifity){ //1. get a score for the coverage = recall oriented //compute positive examples which are not covered by LGG - Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(tree, posExamples); + Collection<QueryTree<String>> uncoveredPositiveExampleTrees = getUncoveredTrees(tree, currentPosExampleTrees); + Set<Individual> uncoveredPosExamples = new HashSet<Individual>(); + for (QueryTree<String> queryTree : uncoveredPositiveExampleTrees) { + uncoveredPosExamples.add(tree2Indivual.get(queryTree)); + } //compute negative examples which are covered by LGG - Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(tree, negExamples); + Collection<QueryTree<String>> coveredNegativeExampleTrees = getCoveredTrees(tree, currentNegExampleTrees); + Set<Individual> coveredNegExamples = new HashSet<Individual>(); + for (QueryTree<String> queryTree : coveredNegativeExampleTrees) { + coveredNegExamples.add(tree2Indivual.get(queryTree)); + } //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size(); + double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size(); + double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0) ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size()); double coverageScore = recall;//Heuristics.getFScore(recall, precision); //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented - int numberOfSpecificNodes = 0; + int nrOfSpecificNodes = 0; for (QueryTree<String> childNode : tree.getChildrenClosure()) { if(!childNode.getUserObject().equals("?")){ - numberOfSpecificNodes++; + nrOfSpecificNodes++; } } - double specifityScore = Math.log(numberOfSpecificNodes); + double specifityScore = Math.log(nrOfSpecificNodes); //3.compute the total score double score = coverageWeight * coverageScore + specifityWeight * specifityScore; - EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExamples, coveredNegativeExamples, score); + QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, + uncoveredPosExamples, Sets.difference(lp.getPositiveExamples(), uncoveredPosExamples), + coveredNegExamples, Sets.difference(lp.getNegativeExamples(), coveredNegExamples), + specifityScore, nrOfSpecificNodes); + EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore); + return evaluatedTree; } @@ -297,7 +335,7 @@ * Firstly, distinct trees are computed and afterwards, for each tree a score is computed. * @param trees */ - private void init(List<QueryTree<String>> posExamples, List<QueryTree<String>> negExamples){ + private void initTodoList(List<QueryTree<String>> posExamples, List<QueryTree<String>> negExamples){ todoList = new PriorityQueue<EvaluatedQueryTree<String>>(); solutions = new TreeSet<EvaluatedQueryTree<String>>(); // EvaluatedQueryTree<String> dummy = new EvaluatedQueryTree<String>(new QueryTreeImpl<String>((N)"TOP"), trees, 0d); @@ -319,19 +357,8 @@ } } for (QueryTree<String> queryTree : distinctTrees) {//System.out.println(queryTree.getStringRepresentation()); - //compute positive examples which are not covered by LGG - Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(queryTree, posExamples); - //compute negative examples which are covered by LGG - Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(queryTree, negExamples); - //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) - ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - - double score = Heuristics.getFScore(recall, precision); - todoList.add(new EvaluatedQueryTree<String>(queryTree, uncoveredPositiveExamples, coveredNegativeExamples, score)); + EvaluatedQueryTree<String> evaluatedQueryTree = evaluate(queryTree, false); + todoList.add(evaluatedQueryTree); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -3,10 +3,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.PriorityQueue; import java.util.Queue; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -26,10 +31,11 @@ import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.Union; import org.dllearner.kb.OWLFile; -import org.dllearner.learningproblems.AxiomScore; import org.dllearner.learningproblems.Heuristics; import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.learningproblems.QueryTreeScore; import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; import org.dllearner.utilities.owl.OWLAPIConverter; import org.semanticweb.owlapi.io.ToStringRenderer; @@ -38,6 +44,7 @@ import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; +import com.google.common.collect.Sets; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.jamonapi.Monitor; @@ -52,15 +59,19 @@ private LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); private Queue<EvaluatedQueryTree<String>> todoList; - private SortedSet<EvaluatedQueryTree<String>> solutions; + private SortedSet<EvaluatedQueryTree<String>> currentPartialSolutions; private double currentlyBestScore = 0d; - private List<QueryTree<String>> posExamples; - private List<QueryTree<String>> negExamples; + private List<QueryTree<String>> currentPosExampleTrees; + private List<QueryTree<String>> currentNegExampleTrees; + private Set<Individual> currentPosExamples; + private Set<Individual> currentNegExamples; + + private Map<QueryTree<String>, Individual> tree2Individual; - private double coverageWeight = 0.6; - private double specifityWeight = 0.4; + private double coverageWeight = 0.8; + private double specifityWeight = 0.2; private QueryTreeCache treeCache; @@ -75,13 +86,14 @@ private Monitor subMon; private Monitor lggMon; + + private List<EvaluatedQueryTree<String>> partialSolutions; public QTL2Disjunctive() {} public QTL2Disjunctive(PosNegLP learningProblem, AbstractReasonerComponent reasoner) throws LearningProblemUnsupportedException{ this.lp = learningProblem; this.reasoner = reasoner; - } public QTL2Disjunctive(PosNegLP lp, Model model) { @@ -90,7 +102,7 @@ } public EvaluatedQueryTree<String> getBestSolution(){ - return solutions.first(); + return currentPartialSolutions.first(); } /* (non-Javadoc) @@ -100,17 +112,33 @@ public void init() throws ComponentInitException { logger.info("Initializing..."); treeCache = new QueryTreeCache(model); + tree2Individual = new HashMap<QueryTree<String>, Individual>(lp.getPositiveExamples().size()+lp.getNegativeExamples().size()); - posExamples = new ArrayList<QueryTree<String>>(); - negExamples = new ArrayList<QueryTree<String>>(); + currentPosExampleTrees = new ArrayList<QueryTree<String>>(lp.getPositiveExamples().size()); + currentNegExampleTrees = new ArrayList<QueryTree<String>>(lp.getNegativeExamples().size()); + currentPosExamples = new TreeSet<Individual>(lp.getPositiveExamples()); + currentNegExamples = new TreeSet<Individual>(lp.getNegativeExamples()); //get the query trees + QueryTree<String> queryTree; for (Individual ind : lp.getPositiveExamples()) { - posExamples.add(treeCache.getQueryTree(ind.getName())); + queryTree = treeCache.getQueryTree(ind.getName()); + tree2Individual.put(queryTree, ind); + currentPosExampleTrees.add(queryTree); } for (Individual ind : lp.getNegativeExamples()) { - negExamples.add(treeCache.getQueryTree(ind.getName())); + queryTree = treeCache.getQueryTree(ind.getName()); + tree2Individual.put(queryTree, ind); + currentNegExampleTrees.add(queryTree); } + + //some logging + subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); + lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + + //console rendering of class expressions + ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); + ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); } /* (non-Javadoc) @@ -119,49 +147,79 @@ @Override public void start() { logger.info("Running..."); - stop = false; - isRunning = true; long startTime = System.currentTimeMillis(); - currentlyBestScore = 0d; - subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); - lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + reset(); - - //outer loop: compute LGG, pick best solution and remove all covered positive and negative examples - List<EvaluatedQueryTree<String>> unionSolutions = new ArrayList<EvaluatedQueryTree<String>>(); + int i = 1; do { + logger.info(i++ + ". iteration..."); + logger.info("#Remaining pos. examples:" + currentPosExampleTrees.size()); + logger.info("#Remaining neg. examples:" + currentNegExampleTrees.size()); + //compute LGG computeLGG(); - //pick best solution computed so far - EvaluatedQueryTree<String> bestSolution = solutions.first(); - unionSolutions.add(bestSolution); - logger.info("#Uncovered pos. examples:" + bestSolution.getFalseNegatives().size()); + //pick best (partial) solution computed so far + EvaluatedQueryTree<String> bestPartialSolution = currentPartialSolutions.first(); + partialSolutions.add(bestPartialSolution); //remove all covered examples QueryTree<String> tree; - for (Iterator<QueryTree<String>> iterator = posExamples.iterator(); iterator.hasNext();) { + for (Iterator<QueryTree<String>> iterator = currentPosExampleTrees.iterator(); iterator.hasNext();) { tree = iterator.next(); - if(tree.isSubsumedBy(bestSolution.getTree())){ + if(tree.isSubsumedBy(bestPartialSolution.getTree())){ iterator.remove(); + currentPosExamples.remove(tree2Individual.get(tree)); } } - for (Iterator<QueryTree<String>> iterator = negExamples.iterator(); iterator.hasNext();) { + for (Iterator<QueryTree<String>> iterator = currentNegExampleTrees.iterator(); iterator.hasNext();) { tree = iterator.next(); - if(tree.isSubsumedBy(bestSolution.getTree())){ + if(tree.isSubsumedBy(bestPartialSolution.getTree())){ iterator.remove(); + currentNegExamples.remove(tree2Individual.get(tree)); } } - } while (!(stop || posExamples.isEmpty())); + } while (!(stop || currentPosExampleTrees.isEmpty())); + isRunning = false; + long endTime = System.currentTimeMillis(); + logger.info("Finished in " + (endTime-startTime) + "ms."); + + EvaluatedDescription combinedSolution = buildCombinedSolution(); + System.out.println(OWLAPIConverter.getOWLAPIDescription(combinedSolution.getDescription())); + } + private EvaluatedDescription buildCombinedSolution(){ + List<Description> disjuncts = new ArrayList<Description>(); + Description partialDescription; + for (EvaluatedQueryTree<String> partialSolution : partialSolutions) { + partialDescription = DLLearnerDescriptionConvertVisitor.getDLLearnerDescription( + partialSolution.getTree().asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION)); + disjuncts.add(partialDescription); + } + Description unionDescription = new Union(disjuncts); + + return new EvaluatedDescription(unionDescription, null); + } + + private void reset(){ + partialSolutions = new ArrayList<EvaluatedQueryTree<String>>(); + + stop = false; + isRunning = true; + + subMon.reset(); + lggMon.reset(); + } + private void computeLGG(){ currentlyBestScore = 0d; - initTodoList(posExamples, negExamples); + initTodoList(currentPosExampleTrees, currentNegExampleTrees); + long startTime = System.currentTimeMillis(); EvaluatedQueryTree<String> currentElement; do{ @@ -178,28 +236,27 @@ lggMon.stop(); //evaluate the LGG - EvaluatedQueryTree<String> solution = evaluate(lgg); + EvaluatedQueryTree<String> solution = evaluate(lgg, true); if(solution.getScore() >= currentlyBestScore){ //add to todo list, if not already contained in todo list or solution list todo(solution); if(solution.getScore() > currentlyBestScore){ - logger.info("Got better solution:" + currentlyBestScore); + logger.info("Got better solution:" + solution.getTreeScore()); } currentlyBestScore = solution.getScore(); } } - solutions.add(currentElement); + currentPartialSolutions.add(currentElement); // todoList.remove(currentElement); } while(!terminationCriteriaSatisfied()); long endTime = System.currentTimeMillis(); logger.info("Finished in " + (endTime-startTime) + "ms."); EvaluatedDescription bestSolution = getCurrentlyBestEvaluatedDescription(); - ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); - ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); - logger.info("Best solution:\n" + OWLAPIConverter.getOWLAPIDescription(bestSolution.getDescription()) + "\n(" + bestSolution.getAccuracy() + ")"); + logger.info("Best solution:\n" + OWLAPIConverter.getOWLAPIDescription(bestSolution.getDescription()) + "\n(" + bestSolution.getScore() + ")"); + logger.trace("LGG time: " + lggMon.getTotal() + "ms"); logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); logger.trace("#LGG computations: " + lggMon.getHits()); @@ -229,10 +286,8 @@ */ @Override public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { - EvaluatedQueryTree<String> bestSolution = solutions.first(); - Description description = DLLearnerDescriptionConvertVisitor.getDLLearnerDescription( - bestSolution.getTree().asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION)); - return new EvaluatedDescription(description, new AxiomScore(bestSolution.getScore())); + EvaluatedQueryTree<String> bestSolution = currentPartialSolutions.first(); + return bestSolution.asEvaluatedDescription(); } /* (non-Javadoc) @@ -270,35 +325,55 @@ return treeCache; } - private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree){ + private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree, boolean useSpecifity){ //1. get a score for the coverage = recall oriented //compute positive examples which are not covered by LGG - Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(tree, posExamples); + Set<QueryTree<String>> uncoveredPositiveExampleTrees = getUncoveredTrees(tree, currentPosExampleTrees); + Set<Individual> uncoveredPosExamples = new HashSet<Individual>(); + for (QueryTree<String> queryTree : uncoveredPositiveExampleTrees) { + uncoveredPosExamples.add(tree2Individual.get(queryTree)); + } //compute negative examples which are covered by LGG - Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(tree, negExamples); + Collection<QueryTree<String>> coveredNegativeExampleTrees = getCoveredTrees(tree, currentNegExampleTrees); + Set<Individual> coveredNegExamples = new HashSet<Individual>(); + for (QueryTree<String> queryTree : coveredNegativeExampleTrees) { + coveredNegExamples.add(tree2Individual.get(queryTree)); + } //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + int coveredPositiveExamples = currentPosExampleTrees.size() - uncoveredPositiveExampleTrees.size(); + double recall = coveredPositiveExamples / (double)currentPosExampleTrees.size(); + double precision = (coveredNegativeExampleTrees.size() + coveredPositiveExamples == 0) ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExampleTrees.size()); - double coverageScore = recall;//Heuristics.getFScore(recall, precision); + double coverageScore = Heuristics.getFScore(recall, precision); //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented - int numberOfSpecificNodes = 0; + int nrOfSpecificNodes = 0; for (QueryTree<String> childNode : tree.getChildrenClosure()) { if(!childNode.getUserObject().equals("?")){ - numberOfSpecificNodes++; + nrOfSpecificNodes++; } } - double specifityScore = Math.log(numberOfSpecificNodes); + double specifityScore = 0d; + if(useSpecifity){ + specifityScore = Math.log(nrOfSpecificNodes); + } //3.compute the total score double score = coverageWeight * coverageScore + specifityWeight * specifityScore; - EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExamples, coveredNegativeExamples, score); + QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, + Sets.difference(currentPosExamples, uncoveredPosExamples), uncoveredPosExamples, + coveredNegExamples, Sets.difference(currentNegExamples, coveredNegExamples), + specifityScore, nrOfSpecificNodes); +// QueryTreeScore queryTreeScore = new QueryTreeScore(score, coverageScore, +// null,null,null,null, +// specifityScore, nrOfSpecificNodes); + + EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExampleTrees, coveredNegativeExampleTrees, queryTreeScore); + return evaluatedTree; } @@ -308,8 +383,8 @@ * @param allTrees * @return */ - private Collection<QueryTree<String>> getCoveredTrees(QueryTree<String> tree, List<QueryTree<String>> trees){ - Collection<QueryTree<String>> coveredTrees = new ArrayList<QueryTree<String>>(); + private List<QueryTree<String>> getCoveredTrees(QueryTree<String> tree, List<QueryTree<String>> trees){ + List<QueryTree<String>> coveredTrees = new ArrayList<QueryTree<String>>(); for (QueryTree<String> queryTree : trees) { boolean subsumed = queryTree.isSubsumedBy(tree); if(subsumed){ @@ -325,8 +400,8 @@ * @param allTrees * @return */ - private Collection<QueryTree<String>> getUncoveredTrees(QueryTree<String> tree, List<QueryTree<String>> allTrees){ - Collection<QueryTree<String>> uncoveredTrees = new ArrayList<QueryTree<String>>(); + private Set<QueryTree<String>> getUncoveredTrees(QueryTree<String> tree, List<QueryTree<String>> allTrees){ + Set<QueryTree<String>> uncoveredTrees = new LinkedHashSet<QueryTree<String>>(); for (QueryTree<String> queryTree : allTrees) { boolean subsumed = queryTree.isSubsumedBy(tree); if(!subsumed){ @@ -343,7 +418,7 @@ */ private void initTodoList(List<QueryTree<String>> posExamples, List<QueryTree<String>> negExamples){ todoList = new PriorityQueue<EvaluatedQueryTree<String>>(); - solutions = new TreeSet<EvaluatedQueryTree<String>>(); + currentPartialSolutions = new TreeSet<EvaluatedQueryTree<String>>(); // EvaluatedQueryTree<String> dummy = new EvaluatedQueryTree<String>(new QueryTreeImpl<String>((N)"TOP"), trees, 0d); // todoList.add(dummy); //compute distinct trees @@ -363,19 +438,8 @@ } } for (QueryTree<String> queryTree : distinctTrees) {//System.out.println(queryTree.getStringRepresentation()); - //compute positive examples which are not covered by LGG - Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(queryTree, posExamples); - //compute negative examples which are covered by LGG - Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(queryTree, negExamples); - //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) - ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - - double score = Heuristics.getFScore(recall, precision); - todoList.add(new EvaluatedQueryTree<String>(queryTree, uncoveredPositiveExamples, coveredNegativeExamples, score)); + EvaluatedQueryTree<String> evaluatedQueryTree = evaluate(queryTree, false); + todoList.add(evaluatedQueryTree); } } @@ -384,7 +448,7 @@ } private boolean terminationCriteriaSatisfied(){ - return stop || todoList.isEmpty() || posExamples.isEmpty(); + return stop || todoList.isEmpty() || currentPosExampleTrees.isEmpty(); } /** @@ -399,14 +463,11 @@ } } //check if not already contained in solutions - for (EvaluatedQueryTree<String> evTree : solutions) { + for (EvaluatedQueryTree<String> evTree : currentPartialSolutions) { if(sameTrees(solution.getTree(), evTree.getTree())){ return; } } todoList.add(solution); } - - - } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -210,6 +210,8 @@ addChild(subTree, tree.getEdge(child)); } setIsResourceNode(tree.isResourceNode()); + setIsLiteralNode(tree.isLiteralNode()); + addLiterals(tree.getLiterals()); } public boolean sameType(QueryTree<N> tree){ Added: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Edge.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Edge.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Edge.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -0,0 +1,47 @@ +package org.dllearner.algorithms.qtl.datastructures.rendering; + +public class Edge { + int id; + String label; + + public Edge(int id, String label) { + this.id = id; + this.label = label; + } + + /** + * @return the id + */ + public int getId() { + return id; + } + + /** + * @return the label + */ + public String getLabel() { + return label; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Edge other = (Edge) obj; + if (id != other.id) + return false; + return true; + } +} \ No newline at end of file Added: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Vertex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Vertex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/rendering/Vertex.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -0,0 +1,47 @@ +package org.dllearner.algorithms.qtl.datastructures.rendering; + +public class Vertex { + int id; + String label; + + public Vertex(int id, String label) { + this.id = id; + this.label = label; + } + + /** + * @return the id + */ + public int getId() { + return id; + } + + /** + * @return the label + */ + public String getLabel() { + return label; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Vertex other = (Vertex) obj; + if (id != other.id) + return false; + return true; + } +} \ No newline at end of file Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -3,17 +3,21 @@ import java.util.Collection; import org.dllearner.algorithms.qtl.datastructures.QueryTree; -import org.dllearner.learningproblems.ScoreTwoValued; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.learningproblems.QueryTreeScore; +import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; public class EvaluatedQueryTree<N> implements Comparable<EvaluatedQueryTree<N>>{ private QueryTree<N> tree; private Collection<QueryTree<N>> falseNegatives; private Collection<QueryTree<N>> falsePositives; - private double score; + private QueryTreeScore score; // private ScoreTwoValued score; - public EvaluatedQueryTree(QueryTree<N> tree, Collection<QueryTree<N>> falseNegatives, Collection<QueryTree<N>> falsePositives, double score) { + public EvaluatedQueryTree(QueryTree<N> tree, Collection<QueryTree<N>> falseNegatives, + Collection<QueryTree<N>> falsePositives, QueryTreeScore score) { this.tree = tree; this.falseNegatives = falseNegatives; this.falsePositives = falsePositives; @@ -44,12 +48,16 @@ } public double getScore() { + return score.getScore(); + } + + public QueryTreeScore getTreeScore() { return score; } @Override public int compareTo(EvaluatedQueryTree<N> other) { - double diff = score - other.getScore(); + double diff = getScore() - other.getScore(); if(diff == 0){ return -1; } else if(diff > 0){ @@ -59,6 +67,11 @@ } } + public EvaluatedDescription asEvaluatedDescription(){ + return new EvaluatedDescription(DLLearnerDescriptionConvertVisitor.getDLLearnerDescription( + getTree().asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION)), score); + } + /* (non-Javadoc) * @see java.lang.Object#toString() */ Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -168,7 +168,7 @@ // } // } // } - if(!lgg.sameType(tree2) || !lgg.getUserObject().equals(tree2.getUserObject())){ + if(!(lgg.sameType(tree2) || lgg.getUserObject().equals(tree2.getUserObject()))){ lgg.setUserObject((N)"?"); lgg.setIsLiteralNode(false); lgg.setIsResourceNode(false); @@ -178,7 +178,7 @@ RDFDatatype d1 = tree1.getDatatype(); RDFDatatype d2 = tree2.getDatatype(); // if(d1 != null && d2 != null && d1 == d2){ - if(d1 == d2){ + if(d1.equals(d2)){ ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree1).getLiterals()); ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree2).getLiterals()); } @@ -194,6 +194,10 @@ addedChildren = new HashSet<QueryTreeImpl<N>>(); for(QueryTree<N> child1 : tree1.getChildren(edge)){ for(QueryTree<N> child2 : tree2.getChildren(edge)){ +// if(edge.equals("http://dl-learner.org/carcinogenesis#amesTestPositive")){ +// System.out.println(child1); +// System.out.println(child1.isLiteralNode());System.out.println(child2.isLiteralNode() + "\n#######"); +// } lggChild = (QueryTreeImpl<N>) computeLGG(child1, child2, learnFilters); boolean add = true; for(QueryTreeImpl<N> addedChild : addedChildren){ Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -1,228 +0,0 @@ -package org.dllearner.algorithms.qtl.operations.lgg; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.PriorityQueue; -import java.util.Queue; -import java.util.SortedSet; -import java.util.TreeSet; - -import org.apache.log4j.Logger; -import org.dllearner.algorithms.qtl.datastructures.QueryTree; -import org.dllearner.learningproblems.Heuristics; - -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -public class NoiseSensitiveLGG<N> { - - - private static final Logger logger = Logger.getLogger(NoiseSensitiveLGG.class.getName()); - - private LGGGenerator<N> lggGenerator = new LGGGeneratorImpl<N>(); - - private Queue<EvaluatedQueryTree<N>> todoList; - private SortedSet<EvaluatedQueryTree<N>> solutions; - - private double currentlyBestScore = 0d; - - private List<QueryTree<N>> posExamples; - - private List<QueryTree<N>> negExamples; - - private double coverageWeight = 0.6; - private double specifityWeight = 0.4; - - public NoiseSensitiveLGG() { - } - - public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> posExampleTrees){ - return computeLGG(posExampleTrees, Collections.<QueryTree<N>>emptyList()); - } - - public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> posExamples, List<QueryTree<N>> negExamples){ - this.posExamples = posExamples; - this.negExamples = negExamples; - - currentlyBestScore = 0d; - - Monitor subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); - Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); - init(posExamples, negExamples); - EvaluatedQueryTree<N> currentElement; - do{ - logger.trace("TODO list size: " + todoList.size()); - //pick best element from todo list - currentElement = todoList.poll(); - //generate the LGG between the chosen tree and each uncovered positive example - for (QueryTree<N> example : currentElement.getFalseNegatives()) { - QueryTree<N> tree = currentElement.getTree(); - //compute the LGG - lggMon.start(); - QueryTree<N> lgg = lggGenerator.getLGG(tree, example); - lggMon.stop(); - - //evaluate the LGG - EvaluatedQueryTree<N> solution = evaluate(lgg); - - if(solution.getScore() >= currentlyBestScore){ - //add to todo list, if not already contained in todo list or solution list - todo(solution); - currentlyBestScore = solution.getScore(); - } - - } - solutions.add(currentElement); -// todoList.remove(currentElement); - } while(!terminationCriteriaSatisfied()); - logger.trace("LGG time: " + lggMon.getTotal() + "ms"); - logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); - logger.trace("#LGG computations: " + lggMon.getHits()); - logger.trace("Subsumption test time: " + subMon.getTotal() + "ms"); - logger.trace("Avg. subsumption test time: " + subMon.getAvg() + "ms"); - logger.trace("#Subsumption tests: " + subMon.getHits()); - return new ArrayList<EvaluatedQueryTree<N>>(solutions); - } - - private EvaluatedQueryTree<N> evaluate(QueryTree<N> lgg){ - //1. get a score for the coverage = recall oriented - //compute positive examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(lgg, posExamples); - //compute negative examples which are covered by LGG - Collection<QueryTree<N>> coveredNegativeExamples = getCoveredTrees(lgg, negExamples); - //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) - ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - - double coverageScore = recall;//Heuristics.getFScore(recall, precision); - - //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented - int numberOfSpecificNodes = 0; - for (QueryTree<N> childNode : lgg.getChildrenClosure()) { - if(!childNode.getUserObject().equals("?")){ - numberOfSpecificNodes++; - } - } - double specifityScore = Math.log(numberOfSpecificNodes); - - //3.compute the total score - double score = coverageWeight * coverageScore + specifityWeight * specifityScore; - - EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredPositiveExamples, coveredNegativeExamples, score); - - return solution; - } - - /** - * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. - * @param tree - * @param allTrees - * @return - */ - private Collection<QueryTree<N>> getUncoveredTrees(QueryTree<N> tree, List<QueryTree<N>> allTrees){ - Collection<QueryTree<N>> uncoveredTrees = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> queryTree : allTrees) { - boolean subsumed = queryTree.isSubsumedBy(tree); - if(!subsumed){ - uncoveredTrees.add(queryTree); - } - } - return uncoveredTrees; - } - - /** - * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. - * @param tree - * @param allTrees - * @return - */ - private Collection<QueryTree<N>> getCoveredTrees(QueryTree<N> tree, List<QueryTree<N>> trees){ - Collection<QueryTree<N>> coveredTrees = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> queryTree : trees) { - boolean subsumed = queryTree.isSubsumedBy(tree); - if(subsumed){ - coveredTrees.add(queryTree); - } - } - return coveredTrees; - } - - /** - * Initializes the todo list with all distinct trees contained in the given list {@code trees}. - * Firstly, distinct trees are computed and afterwards, for each tree a score is computed. - * @param trees - */ - private void init(List<QueryTree<N>> posExamples, List<QueryTree<N>> negExamples){ - todoList = new PriorityQueue<EvaluatedQueryTree<N>>(); - solutions = new TreeSet<EvaluatedQueryTree<N>>(); -// EvaluatedQueryTree<N> dummy = new EvaluatedQueryTree<N>(new QueryTreeImpl<N>((N)"TOP"), trees, 0d); -// todoList.add(dummy); - //compute distinct trees - Collection<QueryTree<N>> distinctTrees = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> queryTree : posExamples) { - boolean distinct = true; - for (QueryTree<N> otherTree : distinctTrees) { - if(!queryTree.equals(otherTree)){ - if(queryTree.isSameTreeAs(otherTree)){ - distinct = false; - break; - } - } - } - if(distinct){ - distinctTrees.add(queryTree); - } - } - for (QueryTree<N> queryTree : distinctTrees) {//System.out.println(queryTree.getStringRepresentation()); - //compute positive examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(queryTree, posExamples); - //compute negative examples which are covered by LGG - Collection<QueryTree<N>> coveredNegativeExamples = getCoveredTrees(queryTree, negExamples); - //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) - ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - - double score = Heuristics.getFScore(recall, precision); - todoList.add(new EvaluatedQueryTree<N>(queryTree, uncoveredPositiveExamples, coveredNegativeExamples, score)); - } - } - - /** - * Add tree to todo list if not already contained in that list or the solutions. - * @param solution - */ - private void todo(EvaluatedQueryTree<N> solution){ - //check if not already contained in todo list - for (EvaluatedQueryTree<N> evTree : todoList) { - if(sameTrees(solution.getTree(), evTree.getTree())){ - return; - } - } - //check if not already contained in solutions - for (EvaluatedQueryTree<N> evTree : solutions) { - if(sameTrees(solution.getTree(), evTree.getTree())){ - return; - } - } - todoList.add(solution); - } - - private boolean sameTrees(QueryTree<N> tree1, QueryTree<N> tree2){ - return tree1.isSubsumedBy(tree2) && tree2.isSubsumedBy(tree1); - } - - private boolean terminationCriteriaSatisfied(){ - return todoList.isEmpty(); - } - - - -} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGGMultithreaded.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGGMultithreaded.java 2014-05-02 19:17:05 UTC (rev 4254) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGGMultithreaded.java 2014-05-02 19:18:34 UTC (rev 4255) @@ -20,6 +20,7 @@ import com.google.common.collect.Lists; import com.hp.hpl.jena.rdf.model.Model; +@Deprecated public class NoiseSensitiveLGGMultithreaded<N> { private LGGGenerator<N> lggGenerator = new LGGGeneratorImpl<N>(); @@ -80,8 +81,8 @@ //compute score double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); //add to todo list, if not already contained in todo list or solution list - EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredExamples, null, score); - todo(solution); +// EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredExamples, null, score); +// todo(solution); } solutions.add(evaluatedQueryTree); } @@ -110,7 +111,7 @@ Collection<QueryTree<N>> uncoveredExamples = new ArrayList<QueryTree<N>>(distinctTrees); uncoveredExamples.remove(queryTree); double score = (trees.size() - uncoveredExamples.size()) / (double)trees.size(); - todoList.add(new EvaluatedQueryTree<N>(queryTree, uncoveredExamples, null, score)); +// todoList.add(new EvaluatedQueryTree<N>(queryTree, uncoveredExamples, null, score)); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |