From: <lor...@us...> - 2014-04-30 11:43:26
|
Revision: 4252 http://sourceforge.net/p/dl-learner/code/4252 Author: lorenz_b Date: 2014-04-30 11:43:20 +0000 (Wed, 30 Apr 2014) Log Message: ----------- Added new version of QTL. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/cache/QueryTreeCache.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/NBR.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/PostLGG.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/EvaluatedQueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL.java 2014-04-23 10:47:59 UTC (rev 4251) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -19,6 +19,7 @@ */ package org.dllearner.algorithms.qtl; +import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -31,7 +32,15 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.TimeUnit; +import org.aksw.jena_sparql_api.cache.core.QueryExecutionFactoryCacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreEx; +import org.aksw.jena_sparql_api.cache.extra.CacheCoreH2; +import org.aksw.jena_sparql_api.cache.extra.CacheEx; +import org.aksw.jena_sparql_api.cache.extra.CacheExImpl; +import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp; +import org.aksw.jena_sparql_api.model.QueryExecutionFactoryModel; import org.apache.commons.collections15.ListUtils; import org.apache.log4j.Logger; import org.dllearner.algorithms.qtl.cache.QueryTreeCache; @@ -46,34 +55,36 @@ import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorImpl; import org.dllearner.algorithms.qtl.util.SPARQLEndpointEx; -import org.dllearner.core.AbstractComponent; +import org.dllearner.core.AbstractCELA; import org.dllearner.core.AbstractLearningProblem; import org.dllearner.core.ComponentAnn; -import org.dllearner.core.ComponentManager; +import org.dllearner.core.EvaluatedDescription; import org.dllearner.core.LearningProblem; import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.core.options.CommonConfigOptions; import org.dllearner.core.options.ConfigOption; import org.dllearner.core.options.IntegerConfigOption; +import org.dllearner.core.owl.Description; import org.dllearner.core.owl.Individual; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.CachingConciseBoundedDescriptionGenerator; import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; -import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.learningproblems.PosNegLP; import org.dllearner.learningproblems.PosOnlyLP; import org.dllearner.utilities.Helper; +import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; +import org.dllearner.utilities.owl.OWLAPIDescriptionConvertVisitor; +import org.semanticweb.owlapi.owllink.parser.OWLlinkDescriptionElementHandler; import org.springframework.beans.factory.annotation.Autowired; -import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.google.common.collect.Sets; +import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.ResultSetRewindable; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; @@ -89,7 +100,7 @@ * */ @ComponentAnn(name="query tree learner", shortName="qtl", version=0.8) -public class QTL extends AbstractComponent implements SparqlQueryLearningAlgorithm { +public class QTL extends AbstractCELA implements SparqlQueryLearningAlgorithm { private static final Logger logger = Logger.getLogger(QTL.class); @@ -99,7 +110,8 @@ private SparqlEndpoint endpoint; private Model model; - private ExtractionDBCache cache; + private org.aksw.jena_sparql_api.core.QueryExecutionFactory qef; + private String cacheDirectory; private QueryTreeCache treeCache; @@ -123,6 +135,7 @@ private SortedSet<String> lggInstances; private Set<String> objectNamespacesToIgnore = new HashSet<String>(); + private Set<String> allowedNamespaces = new HashSet<String>(); private Map<String, String> prefixes = new HashMap<String, String>(); private boolean enableNumericLiteralFilters = false; @@ -137,52 +150,44 @@ } public QTL(AbstractLearningProblem learningProblem, SparqlEndpointKS endpointKS) throws LearningProblemUnsupportedException{ - if(!(learningProblem instanceof PosOnlyLP || learningProblem instanceof PosNegLP)){ - throw new LearningProblemUnsupportedException(learningProblem.getClass(), getClass()); - } - this.learningProblem = learningProblem; - this.endpointKS = endpointKS; - -// this.configurator = new QTLConfigurator(this); + this(learningProblem, endpointKS, null); } - public QTL(AbstractLearningProblem learningProblem, SparqlEndpointKS endpointKS, ExtractionDBCache cache) throws LearningProblemUnsupportedException{ + public QTL(AbstractLearningProblem learningProblem, SparqlEndpointKS endpointKS, String cacheDirectory) throws LearningProblemUnsupportedException{ if(!(learningProblem instanceof PosOnlyLP || learningProblem instanceof PosNegLP)){ throw new LearningProblemUnsupportedException(learningProblem.getClass(), getClass()); } this.learningProblem = learningProblem; this.endpointKS = endpointKS; - this.cache = cache; - -// this.configurator = new QTLConfigurator(this); + this.cacheDirectory = cacheDirectory; } - public QTL(SPARQLEndpointEx endpoint, ExtractionDBCache cache) { + public QTL(SPARQLEndpointEx endpoint, String cacheDirectory) { this.endpoint = endpoint; - this.cache = cache; + this.cacheDirectory = cacheDirectory; treeCache = new QueryTreeCache(); - cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cacheDirectory)); cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); - nbr = new NBR<String>(endpoint, cache); + nbr = new NBR<String>(endpoint, cacheDirectory); nbr.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); posExampleTrees = new ArrayList<QueryTree<String>>(); negExampleTrees = new ArrayList<QueryTree<String>>(); } - public QTL(SparqlEndpointKS endpointKS, ExtractionDBCache cache) { + public QTL(SparqlEndpointKS endpointKS, String cacheDirectory) { this.endpointKS = endpointKS; - this.cache = cache; + this.cacheDirectory = cacheDirectory; treeCache = new QueryTreeCache(); - cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cacheDirectory)); cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); - nbr = new NBR<String>(endpoint, cache); + nbr = new NBR<String>(endpoint, cacheDirectory); nbr.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); posExampleTrees = new ArrayList<QueryTree<String>>(); @@ -369,20 +374,15 @@ private SortedSet<String> getResources(QueryTree<String> tree){ SortedSet<String> resources = new TreeSet<String>(); String query = getDistinctSPARQLQuery(tree); - ResultSet rs; - if(endpoint != null){ - rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - } else { - rs = QueryExecutionFactory.create(query, model).execSelect(); - } + QueryExecution qe = qef.createQueryExecution(query); + ResultSet rs = qe.execSelect(); - String uri; QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); - uri = qs.getResource("x0").getURI(); - resources.add(uri); + resources.add(qs.getResource("x0").getURI()); } + qe.close(); return resources; } @@ -392,12 +392,30 @@ return query; } +// @Override +// public void start(){ +// generatePositiveExampleTrees(); +// +// lgg = lggGenerator.getLGG(posExampleTrees); +// +// if(queryTreeFilter != null){ +// lgg = queryTreeFilter.getFilteredQueryTree(lgg); +// } +// if(logger.isDebugEnabled()){ +// logger.debug("LGG: \n" + lgg.getStringRepresentation()); +// } +// if(logger.isInfoEnabled()){ +// logger.info("Generated SPARQL query:\n" + lgg.toSPARQLQueryString(true, enableNumericLiteralFilters, prefixes)); +// } +// } + @Override public void start(){ + //build the query trees for the positive examples generatePositiveExampleTrees(); + //compute the LGG lgg = lggGenerator.getLGG(posExampleTrees); - if(queryTreeFilter != null){ lgg = queryTreeFilter.getFilteredQueryTree(lgg); } @@ -405,8 +423,36 @@ logger.debug("LGG: \n" + lgg.getStringRepresentation()); } if(logger.isInfoEnabled()){ - logger.info(lgg.toSPARQLQueryString(true, enableNumericLiteralFilters, prefixes)); + logger.info("Generated SPARQL query:\n" + lgg.toSPARQLQueryString(true, enableNumericLiteralFilters, prefixes)); } + + //build the query trees for the negative examples + if(!negExamples.isEmpty()){ + generateNegativeExampleTrees(); + + try { + //check if the LGG covers a negative example + int index = coversNegativeQueryTree(lgg); + if(index != -1){ + throw new NegativeTreeCoverageExecption(negExamples.get(index)); + } + + lggInstances = getResources(lgg); + nbr.setLGGInstances(lggInstances); + + String question; + if(negExamples.isEmpty()){ + question = nbr.getQuestion(lgg, negExampleTrees, getKnownResources()); + } else { + question = nbr.getQuestion(lgg, negExampleTrees, getKnownResources()); + } + logger.info("Question:\n" + question); + } catch (NegativeTreeCoverageExecption e) { + e.printStackTrace(); + } catch (TimeOutException e) { + e.printStackTrace(); + } + } } public void setEnableNumericLiteralFilters(boolean enableNumericLiteralFilters) { @@ -428,19 +474,43 @@ } public void init() { + if(endpointKS.isRemote()){ + SparqlEndpoint endpoint = endpointKS.getEndpoint(); + qef = new QueryExecutionFactoryHttp(endpoint.getURL().toString(), endpoint.getDefaultGraphURIs()); + if(cacheDirectory != null){ + try { + long timeToLive = TimeUnit.DAYS.toMillis(30); + CacheCoreEx cacheBackend = CacheCoreH2.create(cacheDirectory, timeToLive, true); + CacheEx cacheFrontend = new CacheExImpl(cacheBackend); + qef = new QueryExecutionFactoryCacheEx(qef, cacheFrontend); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (SQLException e) { + e.printStackTrace(); + } + } +// qef = new QueryExecutionFactoryPaginated(qef, 10000); + + } else { + qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)endpointKS).getModel()); + } + + if(learningProblem instanceof PosOnlyLP){ this.posExamples = convert(((PosOnlyLP)learningProblem).getPositiveExamples()); + this.negExamples = new ArrayList<String>(); } else if(learningProblem instanceof PosNegLP){ this.posExamples = convert(((PosNegLP)learningProblem).getPositiveExamples()); this.negExamples = convert(((PosNegLP)learningProblem).getNegativeExamples()); } treeCache = new QueryTreeCache(); + treeCache.addAllowedNamespaces(allowedNamespaces); if(endpointKS instanceof LocalModelBasedSparqlEndpointKS){ cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(((LocalModelBasedSparqlEndpointKS) endpointKS).getModel())); } else { endpoint = endpointKS.getEndpoint(); - cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, endpointKS.getCache())); } cbdGenerator.setRecursionDepth(maxQueryTreeDepth); @@ -464,6 +534,58 @@ return lgg; } + @Autowired + public void setLearningProblem(LearningProblem learningProblem) { + this.learningProblem = learningProblem; + } + + public SparqlEndpointKS getEndpointKS() { + return endpointKS; + } + + @Autowired + public void setEndpointKS(SparqlEndpointKS endpointKS) { + this.endpointKS = endpointKS; + } + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#stop() + */ + @Override + public void stop() { + } + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#isRunning() + */ + @Override + public boolean isRunning() { + return false; + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestDescription() + */ + @Override + public Description getCurrentlyBestDescription() { + return (lgg == null) ? null : DLLearnerDescriptionConvertVisitor.getDLLearnerDescription(lgg.asOWLClassExpression()); + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestEvaluatedDescription() + */ + @Override + public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { + return null; + } + + /** + * @param allowedNamespaces the allowedNamespaces to set + */ + public void setAllowedNamespaces(Set<String> allowedNamespaces) { + this.allowedNamespaces = allowedNamespaces; + } + public static void main(String[] args) throws Exception { Set<String> positiveExamples = new HashSet<String>(); positiveExamples.add("http://dbpedia.org/resource/Liverpool_F.C."); @@ -473,31 +595,14 @@ ks.init(); PosOnlyLP lp = new PosOnlyLP(); lp.setPositiveExamples(Helper.getIndividualSet(positiveExamples)); - QTL qtl = new QTL(lp, ks); + QTL qtl = new QTL(lp, ks, "cache"); + qtl.setAllowedNamespaces(Sets.newHashSet("http://dbpedia.org/ontology/", "http://dbpedia.org/resource/")); qtl.addQueryTreeFilter(new QuestionBasedQueryTreeFilter(Arrays.asList("soccer club", "Premier League"))); qtl.init(); qtl.start(); String query = qtl.getBestSPARQLQuery(); System.out.println(query); + System.out.println(qtl.getCurrentlyBestDescription()); } - - public LearningProblem getLearningProblem() { - return learningProblem; - } - - @Autowired - public void setLearningProblem(LearningProblem learningProblem) { - this.learningProblem = learningProblem; - } - - public SparqlEndpointKS getEndpointKS() { - return endpointKS; - } - - @Autowired - public void setEndpointKS(SparqlEndpointKS endpointKS) { - this.endpointKS = endpointKS; - } - } Added: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -0,0 +1,368 @@ +package org.dllearner.algorithms.qtl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.qtl.cache.QueryTreeCache; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; +import org.dllearner.algorithms.qtl.operations.lgg.EvaluatedQueryTree; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentAnn; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.kb.OWLFile; +import org.dllearner.learningproblems.AxiomScore; +import org.dllearner.learningproblems.Heuristics; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; +import org.springframework.beans.factory.annotation.Autowired; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +@ComponentAnn(name="query tree learner with noise", shortName="qtl2", version=0.8) +public class QTL2 extends AbstractCELA { + + + private static final Logger logger = Logger.getLogger(QTL2.class.getName()); + + private LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + + private Queue<EvaluatedQueryTree<String>> todoList; + private SortedSet<EvaluatedQueryTree<String>> solutions; + + private double currentlyBestScore = 0d; + + private List<QueryTree<String>> posExamples; + private List<QueryTree<String>> negExamples; + + private double coverageWeight = 1; + private double specifityWeight = 0; + + private QueryTreeCache treeCache; + + private PosNegLP lp; + + private Model model; + + private AbstractReasonerComponent reasoner; + + private volatile boolean stop; + private boolean isRunning; + + public QTL2() {} + + public QTL2(PosNegLP learningProblem, AbstractReasonerComponent reasoner) throws LearningProblemUnsupportedException{ + this.lp = learningProblem; + this.reasoner = reasoner; + + } + + public QTL2(PosNegLP lp, Model model) { + this.lp = lp; + this.model = model; + } + + /* (non-Javadoc) + * @see org.dllearner.core.Component#init() + */ + @Override + public void init() throws ComponentInitException { + logger.info("Initializing..."); + treeCache = new QueryTreeCache(model); + + posExamples = new ArrayList<QueryTree<String>>(); + negExamples = new ArrayList<QueryTree<String>>(); + + //get the query trees + for (Individual ind : lp.getPositiveExamples()) { + posExamples.add(treeCache.getQueryTree(ind.getName())); + } + for (Individual ind : lp.getNegativeExamples()) { + negExamples.add(treeCache.getQueryTree(ind.getName())); + } + } + + /* (non-Javadoc) + * @see org.dllearner.core.LearningAlgorithm#start() + */ + @Override + public void start() { + logger.info("Running..."); + stop = false; + isRunning = true; + long startTime = System.currentTimeMillis(); + currentlyBestScore = 0d; + + Monitor subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); + Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + + init(posExamples, negExamples); + + EvaluatedQueryTree<String> currentElement; + do{ + logger.trace("TODO list size: " + todoList.size()); + //pick best element from todo list + currentElement = todoList.poll(); + //generate the LGG between the chosen tree and each uncovered positive example + for (QueryTree<String> example : currentElement.getFalseNegatives()) { + QueryTree<String> tree = currentElement.getTree(); + //compute the LGG + lggMon.start(); + QueryTree<String> lgg = lggGenerator.getLGG(tree, example); + lggMon.stop(); +// tree.dump();System.out.println("++++++++++++++++++++++++++++++++"); +// example.dump();System.out.println("############################"); +// lgg.dump();System.out.println("******************************"); + + + //evaluate the LGG + EvaluatedQueryTree<String> solution = evaluate(lgg); + + if(solution.getScore() >= currentlyBestScore){ + //add to todo list, if not already contained in todo list or solution list + todo(solution); + currentlyBestScore = solution.getScore(); + } + + } + solutions.add(currentElement); +// todoList.remove(currentElement); + } while(!terminationCriteriaSatisfied()); + long endTime = System.currentTimeMillis(); + logger.info("Finished in " + (endTime-startTime) + "ms."); + logger.info("Best solution:\n" + getCurrentlyBestEvaluatedDescription()); + + logger.trace("LGG time: " + lggMon.getTotal() + "ms"); + logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); + logger.trace("#LGG computations: " + lggMon.getHits()); + logger.trace("Subsumption test time: " + subMon.getTotal() + "ms"); + logger.trace("Avg. subsumption test time: " + subMon.getAvg() + "ms"); + logger.trace("#Subsumption tests: " + subMon.getHits()); + } + + public EvaluatedQueryTree<String> getBestSolution(){ + return solutions.first(); + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestDescription() + */ + @Override + public Description getCurrentlyBestDescription() { + return getCurrentlyBestEvaluatedDescription().getDescription(); + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestEvaluatedDescription() + */ + @Override + public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { + EvaluatedQueryTree<String> bestSolution = solutions.first(); + Description description = DLLearnerDescriptionConvertVisitor.getDLLearnerDescription( + bestSolution.getTree().asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION)); + return new EvaluatedDescription(description, new AxiomScore(bestSolution.getScore())); + } + + /** + * @return the treeCache + */ + public QueryTreeCache getTreeCache() { + return treeCache; + } + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#isRunning() + */ + @Override + public boolean isRunning() { + return isRunning; + } + + @Autowired + public void setLearningProblem(PosNegLP learningProblem) { + this.lp = learningProblem; + } + + @Autowired + public void setReasoner(AbstractReasonerComponent reasoner){ + this.reasoner = reasoner; + model = ModelFactory.createDefaultModel(); + for (KnowledgeSource ks : reasoner.getSources()) { + if(ks instanceof OWLFile){ + try { + model.read(((OWLFile) ks).getURL().openStream(), null); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#stop() + */ + @Override + public void stop() { + stop = true; + } + + private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree){ + //1. get a score for the coverage = recall oriented + //compute positive examples which are not covered by LGG + Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(tree, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(tree, negExamples); + //compute score + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double coverageScore = recall;//Heuristics.getFScore(recall, precision); + + //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented + int numberOfSpecificNodes = 0; + for (QueryTree<String> childNode : tree.getChildrenClosure()) { + if(!childNode.getUserObject().equals("?")){ + numberOfSpecificNodes++; + } + } + double specifityScore = Math.log(numberOfSpecificNodes); + + //3.compute the total score + double score = coverageWeight * coverageScore + specifityWeight * specifityScore; + + EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExamples, coveredNegativeExamples, score); + + return evaluatedTree; + } + + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ + private Collection<QueryTree<String>> getCoveredTrees(QueryTree<String> tree, List<QueryTree<String>> trees){ + Collection<QueryTree<String>> coveredTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : trees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(subsumed){ + coveredTrees.add(queryTree); + } + } + return coveredTrees; + } + + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ + private Collection<QueryTree<String>> getUncoveredTrees(QueryTree<String> tree, List<QueryTree<String>> allTrees){ + Collection<QueryTree<String>> uncoveredTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : allTrees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(!subsumed){ + uncoveredTrees.add(queryTree); + } + } + return uncoveredTrees; + } + + /** + * Initializes the todo list with all distinct trees contained in the given list {@code trees}. + * Firstly, distinct trees are computed and afterwards, for each tree a score is computed. + * @param trees + */ + private void init(List<QueryTree<String>> posExamples, List<QueryTree<String>> negExamples){ + todoList = new PriorityQueue<EvaluatedQueryTree<String>>(); + solutions = new TreeSet<EvaluatedQueryTree<String>>(); +// EvaluatedQueryTree<String> dummy = new EvaluatedQueryTree<String>(new QueryTreeImpl<String>((N)"TOP"), trees, 0d); +// todoList.add(dummy); + //compute distinct trees + Collection<QueryTree<String>> distinctTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : posExamples) { + boolean distinct = true; + for (QueryTree<String> otherTree : distinctTrees) { + if(!queryTree.equals(otherTree)){ + if(queryTree.isSameTreeAs(otherTree)){ + distinct = false; + break; + } + } + } + if(distinct){ + distinctTrees.add(queryTree); + } + } + for (QueryTree<String> queryTree : distinctTrees) {//System.out.println(queryTree.getStringRepresentation()); + //compute positive examples which are not covered by LGG + Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(queryTree, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(queryTree, negExamples); + //compute score + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double score = Heuristics.getFScore(recall, precision); + todoList.add(new EvaluatedQueryTree<String>(queryTree, uncoveredPositiveExamples, coveredNegativeExamples, score)); + } + } + + private boolean sameTrees(QueryTree<String> tree1, QueryTree<String> tree2){ + return tree1.isSubsumedBy(tree2) && tree2.isSubsumedBy(tree1); + } + + private boolean terminationCriteriaSatisfied(){ + return stop || todoList.isEmpty(); + } + + /** + * Add tree to todo list if not already contained in that list or the solutions. + * @param solution + */ + private void todo(EvaluatedQueryTree<String> solution){ + //check if not already contained in todo list + for (EvaluatedQueryTree<String> evTree : todoList) { + if(sameTrees(solution.getTree(), evTree.getTree())){ + return; + } + } + //check if not already contained in solutions + for (EvaluatedQueryTree<String> evTree : solutions) { + if(sameTrees(solution.getTree(), evTree.getTree())){ + return; + } + } + todoList.add(solution); + } + + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/QTL2Disjunctive.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -0,0 +1,412 @@ +package org.dllearner.algorithms.qtl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.qtl.cache.QueryTreeCache; +import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; +import org.dllearner.algorithms.qtl.operations.lgg.EvaluatedQueryTree; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGenerator; +import org.dllearner.algorithms.qtl.operations.lgg.LGGGeneratorImpl; +import org.dllearner.core.AbstractCELA; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentAnn; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.EvaluatedDescription; +import org.dllearner.core.KnowledgeSource; +import org.dllearner.core.LearningProblemUnsupportedException; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Individual; +import org.dllearner.kb.OWLFile; +import org.dllearner.learningproblems.AxiomScore; +import org.dllearner.learningproblems.Heuristics; +import org.dllearner.learningproblems.PosNegLP; +import org.dllearner.utilities.owl.DLLearnerDescriptionConvertVisitor; +import org.dllearner.utilities.owl.OWLAPIConverter; +import org.semanticweb.owlapi.io.ToStringRenderer; +import org.semanticweb.owlapi.util.SimpleShortFormProvider; +import org.springframework.beans.factory.annotation.Autowired; + +import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +@ComponentAnn(name="query tree learner with noise (disjunctive)", shortName="qtl2dis", version=0.8) +public class QTL2Disjunctive extends AbstractCELA { + + + private static final Logger logger = Logger.getLogger(QTL2Disjunctive.class.getName()); + + private LGGGenerator<String> lggGenerator = new LGGGeneratorImpl<String>(); + + private Queue<EvaluatedQueryTree<String>> todoList; + private SortedSet<EvaluatedQueryTree<String>> solutions; + + private double currentlyBestScore = 0d; + + private List<QueryTree<String>> posExamples; + private List<QueryTree<String>> negExamples; + + private double coverageWeight = 0.6; + private double specifityWeight = 0.4; + + private QueryTreeCache treeCache; + + private PosNegLP lp; + + private Model model; + + private AbstractReasonerComponent reasoner; + + private volatile boolean stop; + private boolean isRunning; + + private Monitor subMon; + private Monitor lggMon; + + public QTL2Disjunctive() {} + + public QTL2Disjunctive(PosNegLP learningProblem, AbstractReasonerComponent reasoner) throws LearningProblemUnsupportedException{ + this.lp = learningProblem; + this.reasoner = reasoner; + + } + + public QTL2Disjunctive(PosNegLP lp, Model model) { + this.lp = lp; + this.model = model; + } + + public EvaluatedQueryTree<String> getBestSolution(){ + return solutions.first(); + } + + /* (non-Javadoc) + * @see org.dllearner.core.Component#init() + */ + @Override + public void init() throws ComponentInitException { + logger.info("Initializing..."); + treeCache = new QueryTreeCache(model); + + posExamples = new ArrayList<QueryTree<String>>(); + negExamples = new ArrayList<QueryTree<String>>(); + + //get the query trees + for (Individual ind : lp.getPositiveExamples()) { + posExamples.add(treeCache.getQueryTree(ind.getName())); + } + for (Individual ind : lp.getNegativeExamples()) { + negExamples.add(treeCache.getQueryTree(ind.getName())); + } + } + + /* (non-Javadoc) + * @see org.dllearner.core.LearningAlgorithm#start() + */ + @Override + public void start() { + logger.info("Running..."); + stop = false; + isRunning = true; + long startTime = System.currentTimeMillis(); + currentlyBestScore = 0d; + + subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); + lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); + + + //outer loop: compute LGG, pick best solution and remove all covered positive and negative examples + List<EvaluatedQueryTree<String>> unionSolutions = new ArrayList<EvaluatedQueryTree<String>>(); + do { + //compute LGG + computeLGG(); + + //pick best solution computed so far + EvaluatedQueryTree<String> bestSolution = solutions.first(); + unionSolutions.add(bestSolution); + logger.info("#Uncovered pos. examples:" + bestSolution.getFalseNegatives().size()); + + //remove all covered examples + QueryTree<String> tree; + for (Iterator<QueryTree<String>> iterator = posExamples.iterator(); iterator.hasNext();) { + tree = iterator.next(); + if(tree.isSubsumedBy(bestSolution.getTree())){ + iterator.remove(); + } + } + for (Iterator<QueryTree<String>> iterator = negExamples.iterator(); iterator.hasNext();) { + tree = iterator.next(); + if(tree.isSubsumedBy(bestSolution.getTree())){ + iterator.remove(); + } + } + } while (!(stop || posExamples.isEmpty())); + + + } + + private void computeLGG(){ + currentlyBestScore = 0d; + initTodoList(posExamples, negExamples); + + long startTime = System.currentTimeMillis(); + EvaluatedQueryTree<String> currentElement; + do{ + logger.trace("TODO list size: " + todoList.size()); + //pick best element from todo list + currentElement = todoList.poll(); + //generate the LGG between the chosen tree and each uncovered positive example + for (QueryTree<String> example : currentElement.getFalseNegatives()) { + QueryTree<String> tree = currentElement.getTree(); + + //compute the LGG + lggMon.start(); + QueryTree<String> lgg = lggGenerator.getLGG(tree, example); + lggMon.stop(); + + //evaluate the LGG + EvaluatedQueryTree<String> solution = evaluate(lgg); + + if(solution.getScore() >= currentlyBestScore){ + //add to todo list, if not already contained in todo list or solution list + todo(solution); + if(solution.getScore() > currentlyBestScore){ + logger.info("Got better solution:" + currentlyBestScore); + } + currentlyBestScore = solution.getScore(); + } + + } + solutions.add(currentElement); +// todoList.remove(currentElement); + } while(!terminationCriteriaSatisfied()); + long endTime = System.currentTimeMillis(); + logger.info("Finished in " + (endTime-startTime) + "ms."); + EvaluatedDescription bestSolution = getCurrentlyBestEvaluatedDescription(); + ToStringRenderer.getInstance().setRenderer(new ManchesterOWLSyntaxOWLObjectRendererImpl()); + ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); + logger.info("Best solution:\n" + OWLAPIConverter.getOWLAPIDescription(bestSolution.getDescription()) + "\n(" + bestSolution.getAccuracy() + ")"); + + logger.trace("LGG time: " + lggMon.getTotal() + "ms"); + logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); + logger.trace("#LGG computations: " + lggMon.getHits()); + logger.trace("Subsumption test time: " + subMon.getTotal() + "ms"); + logger.trace("Avg. subsumption test time: " + subMon.getAvg() + "ms"); + logger.trace("#Subsumption tests: " + subMon.getHits()); + } + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#stop() + */ + @Override + public void stop() { + stop = true; + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestDescription() + */ + @Override + public Description getCurrentlyBestDescription() { + return getCurrentlyBestEvaluatedDescription().getDescription(); + } + + /* (non-Javadoc) + * @see org.dllearner.core.AbstractCELA#getCurrentlyBestEvaluatedDescription() + */ + @Override + public EvaluatedDescription getCurrentlyBestEvaluatedDescription() { + EvaluatedQueryTree<String> bestSolution = solutions.first(); + Description description = DLLearnerDescriptionConvertVisitor.getDLLearnerDescription( + bestSolution.getTree().asOWLClassExpression(LiteralNodeConversionStrategy.FACET_RESTRICTION)); + return new EvaluatedDescription(description, new AxiomScore(bestSolution.getScore())); + } + + /* (non-Javadoc) + * @see org.dllearner.core.StoppableLearningAlgorithm#isRunning() + */ + @Override + public boolean isRunning() { + return isRunning; + } + + @Autowired + public void setLearningProblem(PosNegLP learningProblem) { + this.lp = learningProblem; + } + + @Autowired + public void setReasoner(AbstractReasonerComponent reasoner){ + this.reasoner = reasoner; + model = ModelFactory.createDefaultModel(); + for (KnowledgeSource ks : reasoner.getSources()) { + if(ks instanceof OWLFile){ + try { + model.read(((OWLFile) ks).getURL().openStream(), null); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + /** + * @return the treeCache + */ + public QueryTreeCache getTreeCache() { + return treeCache; + } + + private EvaluatedQueryTree<String> evaluate(QueryTree<String> tree){ + //1. get a score for the coverage = recall oriented + //compute positive examples which are not covered by LGG + Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(tree, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(tree, negExamples); + //compute score + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double coverageScore = recall;//Heuristics.getFScore(recall, precision); + + //2. get a score for the specifity of the query, i.e. how many edges/nodes = precision oriented + int numberOfSpecificNodes = 0; + for (QueryTree<String> childNode : tree.getChildrenClosure()) { + if(!childNode.getUserObject().equals("?")){ + numberOfSpecificNodes++; + } + } + double specifityScore = Math.log(numberOfSpecificNodes); + + //3.compute the total score + double score = coverageWeight * coverageScore + specifityWeight * specifityScore; + + EvaluatedQueryTree<String> evaluatedTree = new EvaluatedQueryTree<String>(tree, uncoveredPositiveExamples, coveredNegativeExamples, score); + + return evaluatedTree; + } + + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ + private Collection<QueryTree<String>> getCoveredTrees(QueryTree<String> tree, List<QueryTree<String>> trees){ + Collection<QueryTree<String>> coveredTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : trees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(subsumed){ + coveredTrees.add(queryTree); + } + } + return coveredTrees; + } + + /** + * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. + * @param tree + * @param allTrees + * @return + */ + private Collection<QueryTree<String>> getUncoveredTrees(QueryTree<String> tree, List<QueryTree<String>> allTrees){ + Collection<QueryTree<String>> uncoveredTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : allTrees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(!subsumed){ + uncoveredTrees.add(queryTree); + } + } + return uncoveredTrees; + } + + /** + * Initializes the todo list with all distinct trees contained in the given list {@code trees}. + * Firstly, distinct trees are computed and afterwards, for each tree a score is computed. + * @param trees + */ + private void initTodoList(List<QueryTree<String>> posExamples, List<QueryTree<String>> negExamples){ + todoList = new PriorityQueue<EvaluatedQueryTree<String>>(); + solutions = new TreeSet<EvaluatedQueryTree<String>>(); +// EvaluatedQueryTree<String> dummy = new EvaluatedQueryTree<String>(new QueryTreeImpl<String>((N)"TOP"), trees, 0d); +// todoList.add(dummy); + //compute distinct trees + Collection<QueryTree<String>> distinctTrees = new ArrayList<QueryTree<String>>(); + for (QueryTree<String> queryTree : posExamples) { + boolean distinct = true; + for (QueryTree<String> otherTree : distinctTrees) { + if(!queryTree.equals(otherTree)){ + if(queryTree.isSameTreeAs(otherTree)){ + distinct = false; + break; + } + } + } + if(distinct){ + distinctTrees.add(queryTree); + } + } + for (QueryTree<String> queryTree : distinctTrees) {//System.out.println(queryTree.getStringRepresentation()); + //compute positive examples which are not covered by LGG + Collection<QueryTree<String>> uncoveredPositiveExamples = getUncoveredTrees(queryTree, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<String>> coveredNegativeExamples = getCoveredTrees(queryTree, negExamples); + //compute score + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double score = Heuristics.getFScore(recall, precision); + todoList.add(new EvaluatedQueryTree<String>(queryTree, uncoveredPositiveExamples, coveredNegativeExamples, score)); + } + } + + private boolean sameTrees(QueryTree<String> tree1, QueryTree<String> tree2){ + return tree1.isSubsumedBy(tree2) && tree2.isSubsumedBy(tree1); + } + + private boolean terminationCriteriaSatisfied(){ + return stop || todoList.isEmpty() || posExamples.isEmpty(); + } + + /** + * Add tree to todo list if not already contained in that list or the solutions. + * @param solution + */ + private void todo(EvaluatedQueryTree<String> solution){ + //check if not already contained in todo list + for (EvaluatedQueryTree<String> evTree : todoList) { + if(sameTrees(solution.getTree(), evTree.getTree())){ + return; + } + } + //check if not already contained in solutions + for (EvaluatedQueryTree<String> evTree : solutions) { + if(sameTrees(solution.getTree(), evTree.getTree())){ + return; + } + } + todoList.add(solution); + } + + + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/cache/QueryTreeCache.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/cache/QueryTreeCache.java 2014-04-23 10:47:59 UTC (rev 4251) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/cache/QueryTreeCache.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -2,6 +2,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.Set; import org.dllearner.algorithms.qtl.QueryTreeFactory; import org.dllearner.algorithms.qtl.datastructures.QueryTree; @@ -17,12 +18,27 @@ private Map<Model, QueryTree<String>> cache; private QueryTreeFactory<String> factory; + private Model model; public QueryTreeCache(){ cache = new HashMap<Model, QueryTree<String>>(); factory = new QueryTreeFactoryImpl(); } + public QueryTreeCache(Model model){ + this.model = model; + cache = new HashMap<Model, QueryTree<String>>(); + factory = new QueryTreeFactoryImpl(); + } + + public QueryTree<String> getQueryTree(String root){ + QueryTree<String> tree = cache.get(model); + if(tree == null){ + tree = factory.getQueryTree(root, model); + } + return tree; + } + public QueryTree<String> getQueryTree(String root, Model model){ QueryTree<String> tree = cache.get(model); if(tree == null){ @@ -62,4 +78,8 @@ public void dispose(){ cache = null; } + + public void addAllowedNamespaces(Set<String> allowedNamespaces) { + factory.addAllowedNamespaces(allowedNamespaces); + } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2014-04-23 10:47:59 UTC (rev 4251) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -26,6 +26,8 @@ import java.util.Set; import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeConversionStrategy; +import org.dllearner.algorithms.qtl.datastructures.impl.QueryTreeImpl.LiteralNodeSubsumptionStrategy; import org.semanticweb.owlapi.model.OWLClassExpression; import com.hp.hpl.jena.datatypes.RDFDatatype; @@ -148,5 +150,29 @@ RDFDatatype getDatatype(); Set<Literal> getLiterals(); + + /** + * @param edge + */ + void removeChildren(Object edge); + + /** + * @param stopIfChildIsResourceNode + * @return + */ + String getStringRepresentation(boolean stopIfChildIsResourceNode); + + /** + * @param literalNodeConversionStrategy + * @return + */ + OWLClassExpression asOWLClassExpression(LiteralNodeConversionStrategy literalNodeConversionStrategy); + + /** + * @param tree + * @param s + * @return + */ + boolean isSubsumedBy(QueryTree<N> tree, LiteralNodeSubsumptionStrategy s); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-04-23 10:47:59 UTC (rev 4251) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-04-30 11:43:20 UTC (rev 4252) @@ -19,8 +19,12 @@ */ package org.dllearner.algorithms.qtl.datastructures.impl; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -36,20 +40,35 @@ import java.util.regex.Pattern; import javax.xml.bind.DatatypeConverter; +import javax.xml.transform.TransformerConfigurationException; import org.dllearner.algorithms.qtl.datastructures.NodeRenderer; import org.dllearner.algorithms.qtl.datastructures.QueryTree; +import org.dllearner.algorithms.qtl.datastructures.rendering.Edge; +import org.dllearner.algorithms.qtl.datastructures.rendering.Vertex; import org.dllearner.algorithms.qtl.filters.Filters; +import org.dllearner.utilities.PrefixCCMap; +import org.jgrapht.DirectedGraph; +import org.jgrapht.ext.EdgeNameProvider; +import org.jgrapht.ext.GraphMLExporter; +import org.jgrapht.ext.VertexNameProvider; +import org.jgrapht.graph.DefaultDirectedGraph; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLClassExpression; import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLDataProperty; +import org.semanticweb.owlapi.model.OWLDataRange; +import org.semanticweb.owlapi.model.OWLDatatype; +import org.semanticweb.owlapi.model.OWLFacetRestriction; import org.semanticweb.owlapi.model.OWLLiteral; import org.semanticweb.owlapi.model.OWLObjectProperty; import org.semanticweb.owlapi.vocab.OWL2Datatype; +import org.semanticweb.owlapi.vocab.OWLFacet; +import org.xml.sax.SAXException; import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; +import com.google.common.collect.Sets; import com.hp.hpl.jena.datatypes.BaseDatatype; import com.hp.hpl.jena.datatypes.RDFDatatype; import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; @@ -59,9 +78,14 @@ import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.sparql.core.TriplePath; import com.hp.hpl.jena.sparql.syntax.ElementGroup; +import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; +import com.hp.hpl.jena.sparql.syntax.ElementVisitorBase; +import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; /** * @@ -70,10 +94,32 @@ */ public class QueryTreeImpl<N> implements QueryTree<N>{ - enum NodeType{ + public enum NodeType{ RESOURCE, LITERAL, BLANK, VARIABLE; } + public enum LiteralNodeSubsumptionStrategy { + DATATYPE, + INTERVAL, + ENUMERATION, + OFF + } + + public enum LiteralNodeConversionStrategy{ + /** + * Literals in form of an enumeration, e.g. {3, 4, 10} + */ + DATA_ONE_OF, + /** + * Literals as an interval on the datatype, e.g. [>= 5 <=10] + */ + FACET_RESTRICTION, + /** + * Literals as datatype, e.g. xsd:integer + */ + SOME_VALUES_FROM + } + private N userObject; private QueryTreeImpl<N> parent; @@ -97,7 +143,6 @@ private Set<Literal> literals = new HashSet<Literal>(); - public QueryTreeImpl(N userObject) { this.userObject = userObject; children = new ArrayList<QueryTreeImpl<N>>(); @@ -116,9 +161,40 @@ label += "Values: " + object.getLiterals(); } } + label += object.isResourceNode() + "," + object.isLiteralNode(); + return label; + } + }; + + } + + public QueryTreeImpl(N userObject, NodeType nodeType) { + this.userObject = userObject; + children = new ArrayList<QueryTreeImpl<N>>(); + child2EdgeMap = new HashMap<QueryTree<N>, Object>(); + edge2ChildrenMap = new HashMap<String, List<QueryTree<N>>>(); + toStringRenderer = new NodeRenderer<N>() { + public String render(QueryTree<N> object) { + String label = object.toString() + "(" + object.getId() + ")"; + if(object.isLiteralNode()){ +// if(object.getLiterals().size() == 1){ +// label += object.getLiterals().iterator().next(); +// } else if(object.getLiterals().size() > 1){ +// label += "Values: " + object.getLiterals(); +// } + if(!object.getLiterals().isEmpty()){ + label += "Values: " + object.getLiterals(); + } + } + label += object.isResourceNode() + "," + object.isLiteralNode(); return label; } }; + if(nodeType == NodeType.RESOURCE){ + isResourceNode = true; + } else if(nodeType == NodeType.LITERAL){ + isResourceNode = true; + } } public QueryTreeImpl(QueryTree<N> tree){ @@ -130,8 +206,10 @@ subTree.setId(child.getId()); subTree.setIsLiteralNode(child.isLiteralNode()); subTree.setIsResourceNode(child.isResourceNode()); + subTree.addLiterals(child.getLiterals()); addChild(subTree, tree.getEdge(child)); } + setIsResourceNode(tree.isResourceNode()); } public boolean sameType(QueryTree<N> tree){ @@ -279,6 +357,29 @@ child.parent = null; } } + + /** + * Removes all children connected by the given edge. + */ + public void removeChildren(Object edge) { + List<QueryTree<N>> children = edge2ChildrenMap.remove(edge); + if(children != null){ + this.children.removeAll(children); + } +// List<QueryTree<N>> list = edge2ChildrenMap.get("http://dl-learner.org/carcinogenesis#hasAtom"); +// List<QueryTree<N>> newList = new ArrayList<QueryTree<N>>(); +// for (int i = 0; i < Math.min(list.size(), 11); i++) { +// QueryTreeImpl<N> keep = (QueryTreeImpl<N>) list.get(i); +// newList.add(keep); +// } +// list.clear(); +// list.addAll(newList); +// this.children.clear(); +// this.children = new ArrayList<QueryTreeImpl<N>>(); +// this.children.addAll((Collection<? extends QueryTreeImpl<N>>) list); +// edge2ChildrenMap.clear(); +// edge2ChildrenMap.put("http://dl-learner.org/carcinogenesis#hasAtom", list); + } public Object getEdge(QueryTree<N> child) { @@ -342,13 +443,15 @@ @Override public boolean isSubsumedBy(QueryTree<N> tree) { -// if(!sameType(tree)){ -// return false; -// } +// System.out.println("++++++++++++++++++++++++++++"); +// System.out.println(tree + "-" + this.userObject); +// System.out.println(tree.isResourceNode() + "," + tree.isLiteralNode() + "---" + this.isResourceNode + "," + this.isLiteralNode); +// if(tree.getParent() != null && getParent() != null) +// System.out.println(tree.getParent().getEdge(tree) + "#" + getParent().getEdge(this)); +// System.out.println(tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject)); if(!(tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject))){ return false; } - Object edge; for(QueryTree<N> child : tree.getChildren()){ boolean isSubsumed = false; @@ -364,8 +467,112 @@ } } return true; +// return isSubsumedBy(tree, LiteralNodeSubsumptionStrategy.OFF); } + public boolean isSubsumedBy(QueryTree<N> tree, LiteralNodeSubsumptionStrategy strategy) { +// System.out.println("++++++++++++++++++++++++++++"); +// System.out.println(tree + "-" + this.userObject); +// System.out.println(tree.isResourceNode() + "," + tree.isLiteralNode() + "---" + this.isResourceNode + "," + this.isLiteralNode); +// if(tree.getParent() != null && getParent() != null) +// System.out.println(tree.getParent().getEdge(tree) + "#" + getParent().getEdge(this)); +// System.out.println(tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject)); + + if(tree.isResourceNode() && this.isResourceNode){ + if(!(tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject))){ + return false; + } + } else if(tree.isLiteralNode() && this.isLiteralNode){ + if(!tree.getUserObject().equals(this.userObject)){ + if(strategy == LiteralNodeSubsumptionStrategy.OFF){ + return tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject); + } else { + return subsumes(tree.getLiterals(), this.getLiterals(), strategy); + } + } + } else if(!tree.isVarNode() && this.isVarNode()){ + return false; + } else if(tree.isResourceNode() && this.isLiteralNode || tree.isLiteralNode() && this.isResourceNode){//node type mismatch + return false; + } + Object edge; + for(QueryTree<N> child : tree.getChildren()){ + boolean isSubsumed = false; + edge = tree.getEdge(child); + for(QueryTree<N> child2 : this.getChildren(edge)){ + if(child2.isSubsumedBy(child, strategy)){ + isSubsumed = true; + break; + } + } + if(!isSubsumed){ + return false; + } + } + return true; + } + + private boolean subsumes(Set<Literal> subsumer, Set<Literal> subsumee, LiteralNodeSubsumptionStrategy strategy){ + if(strategy == LiteralNodeSubsumptionStrategy.DATATYPE){ + + } else if(strategy == LiteralNodeSubsumptionStrategy.ENUMERATION){ + return subsumer.containsAll(subsumee); + } else if(strategy == LiteralNodeSubsumptionStrategy.INTERVAL){ + //check if both datatypes are the same + RDFDatatype subsumerDatatype = getDatatype(subsumer); + RDFDatatype subsumeeDatatype = getDatatype(subsumee); + if(!subsumerDatatype.equals(subsumeeDatatype)){ + return false; + } + + //avoid boolean datatypes for interval check as there are only 2 possible values + if(subsumerDatatype.equals(XSDDatatype.XSDboolean)){ + return true; + } + + //check if subsumee interval is contained in subsumer interval + Literal subsumerMin = getMin(subsumer); + Literal subsumerMax = getMax(subsumer); + + Literal subsumeeMin = getMin(subsumee); + Literal subsumeeMax = getMax(subsumee); + + boolean leftMoreGeneral = isLessOrEqual(subsumerMin, subsumeeMin); + boolean rightMoreGeneral = isGreaterOrEqual(subsumerMax, subsumeeMax); + + if(!(leftMoreGeneral && rightMoreGeneral)){ +// System.out.println("[" + subsumeeMin + "," + subsumeeMax + "] not in interval " + "[" + subsumerMin + "," + subsumerMax + "]"); + return false; + } + } + return true; + } + + /** + * Returns the datatype of the literals. Throws exception if there are multiple datatypes. + * @param literals + */ + private RDFDatatype getDatatype(Set<Literal> literals){ + RDFDatatype datatype = literals.iterator().next().getDatatype(); + return datatype; + } + + /** + * Checks if all literals have the same datatype. + * @param literals + * @return + */ + private boolean sameDatatype(Set<Literal> literals){ + Iterator<Literal> iterator = literals.iterator(); + RDFDatatype datatype = iterator.next().getDatatype(); + while(iterator.hasNext()){ + if(!iterator.next().getDatatype().equals(datatype)){ + return false; + } + } + return true; + } + @Override public boolean isSubsumedBy(QueryTree<N> tree, boolean stopAfterError) { if(!(tree.getUserObject().equals("?") || tree.getUserObject().equals(this.userObject))){ @@ -515,6 +722,15 @@ } public String getStringRepresentation(){ + return getStringRepresentation(false); + } + + /** + * Prints the query tree and shows children of resources only if enabled. + * @param stopWhenLeafNode + * @return + */ + public String getStringRepresentation(boolean stopIfChildIsResourceNode){ int depth = getPathToRoot().size(); StringBuilder sb = new StringBuilder(); if(isRoot()){ @@ -524,17 +740,19 @@ ren = ren.replace("\n", "\n" + sb); sb.append(ren); sb.append("\n"); - for (QueryTree<N> child : getChildren()) { - for (int i = 0; i < depth; i++) { - sb.append("\t"); + if(isRoot() || !isResourceNode || (isResourceNo... [truncated message content] |