From: <lor...@us...> - 2013-07-29 08:12:38
|
Revision: 4022 http://sourceforge.net/p/dl-learner/code/4022 Author: lorenz_b Date: 2013-07-29 08:12:30 +0000 (Mon, 29 Jul 2013) Log Message: ----------- Cont. ISLE Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -67,7 +67,7 @@ } catch (IOException e) { e.printStackTrace(); } - return null; + return documents; } /* (non-Javadoc) Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -3,9 +3,14 @@ */ package org.dllearner.algorithms.isle.index; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; /** * @author Lorenz Buehmann @@ -13,13 +18,32 @@ */ public class SimpleSemanticIndex implements SemanticIndex{ + private SyntacticIndex syntacticIndex; + private RDFSLabelEntityTextRetriever labelRetriever; + /** + * + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + labelRetriever = new RDFSLabelEntityTextRetriever(ontology); + } + + /* (non-Javadoc) * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) */ @Override public Set<String> getDocuments(Entity entity) { - return null; + Set<String> documents = new HashSet<String>(); + Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String label = entry.getKey(); + documents.addAll(syntacticIndex.getDocuments(label)); + } + + return documents; } /* (non-Javadoc) @@ -27,7 +51,7 @@ */ @Override public int count(Entity entity) { - return 0; + return getDocuments(entity).size(); } /* (non-Javadoc) @@ -35,7 +59,7 @@ */ @Override public int getSize() { - return 0; + return syntacticIndex.getSize(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -8,6 +8,7 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.log4j.Logger; import org.dllearner.algorithms.qtl.datastructures.QueryTree; import org.dllearner.learningproblems.Heuristics; @@ -16,6 +17,9 @@ public class NoiseSensitiveLGG<N> { + + private static final Logger logger = Logger.getLogger(NoiseSensitiveLGG.class.getName()); + private LGGGenerator<N> lggGenerator = new LGGGeneratorImpl<N>(); private Queue<EvaluatedQueryTree<N>> todoList; @@ -29,7 +33,8 @@ Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); init(trees); EvaluatedQueryTree<N> currentElement; - do{System.out.println("TODO list size: " + todoList.size()); + do{ + logger.trace("TODO list size: " + todoList.size()); //pick best element from todo list currentElement = todoList.poll(); for (QueryTree<N> example : currentElement.getUncoveredExamples()) { @@ -39,33 +44,36 @@ QueryTree<N> lgg = lggGenerator.getLGG(tree, example); lggMon.stop(); //compute examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredExamples = new ArrayList<QueryTree<N>>(); - for (QueryTree<N> queryTree : trees) { - subMon.start(); - boolean subsumed = queryTree.isSubsumedBy(lgg); - subMon.stop(); - if(!subsumed){ - uncoveredExamples.add(queryTree); - } - } + Collection<QueryTree<N>> uncoveredExamples = getUncoveredTrees(lgg, trees); //compute score double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); //add to todo list, if not already contained in todo list or solution list EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredExamples, score); todo(solution); } - System.out.println("LGG time: " + lggMon.getTotal() + "ms"); - System.out.println("Avg. LGG time: " + lggMon.getAvg() + "ms"); - System.out.println("#LGG computations: " + lggMon.getHits()); - System.out.println("Subsumption test time: " + subMon.getTotal() + "ms"); - System.out.println("Avg. subsumption test time: " + subMon.getAvg() + "ms"); - System.out.println("#Subsumption tests: " + subMon.getHits()); solutions.add(currentElement); // todoList.remove(currentElement); } while(!terminationCriteriaSatisfied()); + logger.trace("LGG time: " + lggMon.getTotal() + "ms"); + logger.trace("Avg. LGG time: " + lggMon.getAvg() + "ms"); + logger.trace("#LGG computations: " + lggMon.getHits()); + logger.trace("Subsumption test time: " + subMon.getTotal() + "ms"); + logger.trace("Avg. subsumption test time: " + subMon.getAvg() + "ms"); + logger.trace("#Subsumption tests: " + subMon.getHits()); return new ArrayList<EvaluatedQueryTree<N>>(solutions); } + private Collection<QueryTree<N>> getUncoveredTrees(QueryTree<N> tree, List<QueryTree<N>> allTrees){ + Collection<QueryTree<N>> uncoveredTrees = new ArrayList<QueryTree<N>>(); + for (QueryTree<N> queryTree : allTrees) { + boolean subsumed = queryTree.isSubsumedBy(tree); + if(!subsumed){ + uncoveredTrees.add(queryTree); + } + } + return uncoveredTrees; + } + private void init(List<QueryTree<N>> trees){ todoList = new PriorityQueue<EvaluatedQueryTree<N>>(); solutions = new TreeSet<EvaluatedQueryTree<N>>(); @@ -76,9 +84,11 @@ for (QueryTree<N> queryTree : trees) {//System.out.println(queryTree.getStringRepresentation()); boolean distinct = true; for (QueryTree<N> otherTree : distinctTrees) { - if(queryTree.isSubsumedBy(otherTree)){ - distinct = false; - break; + if(!queryTree.equals(otherTree)){ + if(queryTree.isSameTreeAs(otherTree)){ + distinct = false; + break; + } } } if(distinct){ @@ -86,9 +96,8 @@ } } for (QueryTree<N> queryTree : distinctTrees) { - Collection<QueryTree<N>> uncoveredExamples = new ArrayList<QueryTree<N>>(distinctTrees); - uncoveredExamples.remove(queryTree); - double score = (trees.size() - uncoveredExamples.size()) / (double)trees.size(); + Collection<QueryTree<N>> uncoveredExamples = getUncoveredTrees(queryTree, trees); + double score = Heuristics.getConfidenceInterval95WaldAverage(trees.size(), trees.size() - uncoveredExamples.size()); todoList.add(new EvaluatedQueryTree<N>(queryTree, uncoveredExamples, score)); } } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeNTriplesAlt) ; + httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, Lang.NTRIPLES.getName()) ; + model.read(in, null, Lang.TURTLE.getName()) ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/main/java/org/dllearner/utilities/owl/OWLClassExpressionToSPARQLConverter.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -82,6 +82,8 @@ private Set<? extends OWLEntity> variableEntities = new HashSet<OWLEntity>(); private VariablesMapping mapping; + private boolean ignoreGenericTypeStatements = true; + private OWLClassExpression expr; public OWLClassExpressionToSPARQLConverter(VariablesMapping mapping) { this.mapping = mapping; @@ -96,6 +98,7 @@ } public String convert(String rootVariable, OWLClassExpression expr){ + this.expr = expr; reset(); variables.push(rootVariable); expr.accept(this); @@ -261,7 +264,9 @@ @Override public void visit(OWLClass ce) { - sparql += triple(variables.peek(), "a", render(ce)); + if(ce.equals(expr) || (ignoreGenericTypeStatements && !ce.isOWLThing())){ + sparql += triple(variables.peek(), "a", render(ce)); + } } @Override @@ -577,7 +582,9 @@ @Override public void visit(OWLDatatype node) { - sparql += "FILTER(DATATYPE(" + variables.peek() + "=<" + node.getIRI().toString() + ">))"; + if(ignoreGenericTypeStatements && !node.isRDFPlainLiteral() && !node.isTopDatatype()){ + sparql += "FILTER(DATATYPE(" + variables.peek() + "=<" + node.getIRI().toString() + ">))"; + } } @Override Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-17 11:44:41 UTC (rev 4021) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-07-29 08:12:30 UTC (rev 4022) @@ -6,6 +6,15 @@ import java.io.File; import java.util.Map; +import org.dllearner.algorithms.isle.index.OWLOntologyLuceneSyntacticIndexCreator; +import org.dllearner.algorithms.isle.index.SemanticIndex; +import org.dllearner.algorithms.isle.index.SimpleSemanticIndex; +import org.dllearner.algorithms.isle.index.SyntacticIndex; +import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceMetric; +import org.dllearner.algorithms.isle.metrics.RelevanceUtils; +import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.AbstractReasonerComponent; import org.dllearner.core.KnowledgeSource; import org.dllearner.core.owl.Entity; @@ -16,9 +25,12 @@ import org.junit.Before; import org.junit.Test; import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + import com.google.common.base.Joiner; /** @@ -29,10 +41,10 @@ private OWLOntologyManager manager; private OWLOntology ontology; + private OWLDataFactory df = new OWLDataFactoryImpl(); private NamedClass cls; private EntityTextRetriever textRetriever; - private LuceneSearcher searcher; - private Relevance relevance; + private RelevanceMetric relevance; private String searchField = "label"; /** @@ -43,9 +55,9 @@ ontology = manager.loadOntologyFromOntologyDocument(new File("../examples/isle/father_labeled.owl")); cls = new NamedClass("http://example.com/father#father"); textRetriever = new RDFSLabelEntityTextRetriever(ontology); - OWLOntologyLuceneIndex index = new OWLOntologyLuceneIndex(ontology, searchField); - searcher = new LuceneSearcher(index.getDirectory(), searchField); - relevance = new PMILuceneBasedRelevance(ontology, searcher, textRetriever); + SyntacticIndex syntacticIndex = new OWLOntologyLuceneSyntacticIndexCreator(ontology, df.getRDFSLabel(), searchField).buildIndex(); + SemanticIndex semanticIndex = new SimpleSemanticIndex(ontology, syntacticIndex); + relevance = new PMIRelevanceMetric(semanticIndex); } /** @@ -66,7 +78,7 @@ @Test public void testEntityRelevance() throws Exception { System.out.println("Relevant entities for entity " + cls + ":"); - Map<Entity, Double> entityRelevance = relevance.getEntityRelevance(cls); + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); System.out.println(Joiner.on("\n").join(entityRelevance.entrySet())); } @@ -80,7 +92,7 @@ lp.setClassToDescribe(cls); lp.init(); - Map<Entity, Double> entityRelevance = relevance.getEntityRelevance(cls); + Map<Entity, Double> entityRelevance = RelevanceUtils.getRelevantEntities(cls, ontology, relevance); NLPHeuristic heuristic = new NLPHeuristic(entityRelevance); ISLE isle = new ISLE(lp, reasoner); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |