From: <lor...@us...> - 2014-01-08 19:37:26
|
Revision: 4210 http://sourceforge.net/p/dl-learner/code/4210 Author: lorenz_b Date: 2014-01-08 19:37:22 +0000 (Wed, 08 Jan 2014) Log Message: ----------- Added ISLE test cases. Extended QTL2 DBpedia experiment. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/Experiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/qtl/QALDExperiment.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTokenizer.java trunk/components-core/src/main/java/org/dllearner/utilities/TriplePatternExtractor.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSemanticIndexExperiment.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSyntacticIndexBasedExperiment.java Removed Paths: ------------- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTokenizer.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTokenizer.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTokenizer.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -0,0 +1,31 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.util.HashMap; +import java.util.List; + +import org.dllearner.algorithms.isle.index.Token; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * @author Lorenz Buehmann + * + */ +public class EntityTokenizer extends HashMap<Entity, List<Token>>{ + + + + + /* (non-Javadoc) + * @see java.util.HashMap#get(java.lang.Object) + */ + @Override + public List<Token> get(Object key) { + return super.get(key); + } + + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SolrSyntacticIndex.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -4,6 +4,7 @@ package org.dllearner.algorithms.isle.index.syntactic; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -41,6 +42,8 @@ long totalNumberOfDocuments = -1; + Map<Entity, Long> cache = new HashMap<>(); + public SolrSyntacticIndex(OWLOntology ontology, String solrServerURL, String searchField) { this.searchField = searchField; solr = new HttpSolrServer(solrServerURL); @@ -102,6 +105,9 @@ */ @Override public long getNumberOfDocumentsFor(Entity entity) { + if(cache.containsKey(entity)){ + return cache.get(entity); + } Map<List<Token>, Double> relevantText = textRetriever.getRelevantText(entity); String queryString = "("; @@ -123,6 +129,7 @@ try { QueryResponse response = solr.query(query); SolrDocumentList list = response.getResults(); + cache.put(entity, list.getNumFound()); return list.getNumFound(); } catch (SolrServerException e) { e.printStackTrace(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -3,21 +3,28 @@ */ package org.dllearner.algorithms.isle.wsd; -import com.google.common.base.Joiner; -import com.google.common.collect.Sets; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + import org.dllearner.algorithms.isle.StructuralEntityContext; import org.dllearner.algorithms.isle.VSMCosineDocumentSimilarity; import org.dllearner.algorithms.isle.index.Annotation; import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.algorithms.isle.index.SemanticAnnotation; +import org.dllearner.algorithms.isle.index.Token; +import org.dllearner.algorithms.isle.textretrieval.AnnotationEntityTextRetriever; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; -import java.io.IOException; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import com.google.common.base.Joiner; +import com.google.common.collect.Sets; /** * @author Lorenz Buehmann @@ -26,6 +33,7 @@ public class StructureBasedWordSenseDisambiguation extends WordSenseDisambiguation{ private ContextExtractor contextExtractor; + private AnnotationEntityTextRetriever textRetriever; /** * @param ontology @@ -33,6 +41,8 @@ public StructureBasedWordSenseDisambiguation(ContextExtractor contextExtractor, OWLOntology ontology) { super(ontology); this.contextExtractor = contextExtractor; + + textRetriever = new RDFSLabelEntityTextRetriever(ontology); } /* (non-Javadoc) @@ -40,6 +50,41 @@ */ @Override public SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities) { + //filter out candidates for which the head noun does not match with the annotated token + for (Iterator<EntityScorePair> iterator = candidateEntities.iterator(); iterator.hasNext();) { + EntityScorePair entityPair = iterator.next(); + Entity entity = entityPair.getEntity(); + + Map<List<Token>, Double> relevantText = textRetriever.getRelevantText(entity); + + boolean matched = false; + + for (Entry<List<Token>, Double> entry : relevantText.entrySet()) { + List<Token> tokens = entry.getKey(); + + + for (Token token : tokens) { + if(token.isHead()){ + for (Token annotatedToken : annotation.getTokens()) { + if(token.getRawForm().equals(annotatedToken.getRawForm())){ + matched = true; + } + } + } + } + + } + + if(!matched){ + iterator.remove(); + } + } + + System.out.println(annotation); + for (EntityScorePair entityScorePair : candidateEntities) { + System.out.println(entityScorePair); + } + if(!candidateEntities.isEmpty()){ //get the context of the annotated token List<String> tokenContext = contextExtractor.extractContext(annotation); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/ObjectPropertyDomainAxiomLearner2.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -232,7 +232,7 @@ private void runSPARQL1_0_Mode() { workingModel = ModelFactory.createDefaultModel(); - int limit = 1000; + int limit = 10000; int offset = 0; String baseQuery = "CONSTRUCT {?s a ?type.} WHERE {?s <%s> ?o. ?s a ?type.} LIMIT %d OFFSET %d"; String query = String.format(baseQuery, propertyToDescribe.getName(), limit, offset); @@ -246,7 +246,7 @@ int all = 1; while (rs.hasNext()) { qs = rs.next(); - all = qs.getLiteral("all").getInt(); + all = qs.getLiteral("all").getInt();System.out.println(all); } // get class and number of instances @@ -318,21 +318,38 @@ ObjectPropertyDomainAxiomLearner2 l = new ObjectPropertyDomainAxiomLearner2(ks); l.setReasoner(reasoner); - for (ObjectProperty p : reasoner.getOWLObjectProperties("http://dbpedia.org/ontology/")) { - System.out.println(p); - l.setPropertyToDescribe(p); - l.setMaxExecutionTimeInSeconds(10); - l.addFilterNamespace("http://dbpedia.org/ontology/"); -// l.setReturnOnlyNewAxioms(true); - l.init(); -// l.start(); - l.run(); - List<EvaluatedAxiom> axioms = l.getCurrentlyBestEvaluatedAxioms(10, 0.5); -// System.out.println(axioms); - System.out.println(l.getBestEvaluatedAxiom()); - } + l.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/birthPlace")); + l.setMaxExecutionTimeInSeconds(20); + l.addFilterNamespace("http://dbpedia.org/ontology/"); + l.init(); + l.start(); +// l.run(); + System.out.println(l.getBestEvaluatedAxiom()); + ObjectPropertyDomainAxiomLearner l2 = new ObjectPropertyDomainAxiomLearner(ks); + l2.setReasoner(reasoner); + l2.setPropertyToDescribe(new ObjectProperty("http://dbpedia.org/ontology/birthPlace")); + l2.setMaxExecutionTimeInSeconds(10); + l2.addFilterNamespace("http://dbpedia.org/ontology/"); + l2.init(); + l2.start(); + System.out.println(l2.getCurrentlyBestEvaluatedAxioms(0.2)); + System.out.println(l2.getBestEvaluatedAxiom()); +// for (ObjectProperty p : reasoner.getOWLObjectProperties("http://dbpedia.org/ontology/")) { +// System.out.println(p); +// l.setPropertyToDescribe(p); +// l.setMaxExecutionTimeInSeconds(10); +// l.addFilterNamespace("http://dbpedia.org/ontology/"); +//// l.setReturnOnlyNewAxioms(true); +// l.init(); +//// l.start(); +// l.run(); +// List<EvaluatedAxiom> axioms = l.getCurrentlyBestEvaluatedAxioms(10, 0.5); +//// System.out.println(axioms); +// System.out.println(l.getBestEvaluatedAxiom()); +// } + // for(EvaluatedAxiom axiom : axioms){ // printSubset(l.getPositiveExamples(axiom), 10); // printSubset(l.getNegativeExamples(axiom), 10); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/QueryTree.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -147,6 +147,6 @@ RDFDatatype getDatatype(); - List<Literal> getLiterals(); + Set<Literal> getLiterals(); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/datastructures/impl/QueryTreeImpl.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -95,7 +95,7 @@ private boolean isResourceNode = false; private boolean isBlankNode = false; - private List<Literal> literals = new ArrayList<Literal>(); + private Set<Literal> literals = new HashSet<Literal>(); public QueryTreeImpl(N userObject) { @@ -107,6 +107,11 @@ public String render(QueryTree<N> object) { String label = object.toString() + "(" + object.getId() + ")"; if(object.isLiteralNode()){ +// if(object.getLiterals().size() == 1){ +// label += object.getLiterals().iterator().next(); +// } else if(object.getLiterals().size() > 1){ +// label += "Values: " + object.getLiterals(); +// } if(!object.getLiterals().isEmpty()){ label += "Values: " + object.getLiterals(); } @@ -809,7 +814,7 @@ } } - private String getFilter(String varName, List<Literal> literals){ + private String getFilter(String varName, Set<Literal> literals){ String filter = "FILTER("; Literal min = getMin(literals); @@ -824,7 +829,7 @@ return filter; } - private Literal getMin(List<Literal> literals){ + private Literal getMin(Set<Literal> literals){ Iterator<Literal> iter = literals.iterator(); Literal min = iter.next(); Literal l; @@ -841,7 +846,7 @@ return min; } - private Literal getMax(List<Literal> literals){ + private Literal getMax(Set<Literal> literals){ Iterator<Literal> iter = literals.iterator(); Literal max = iter.next(); Literal l; @@ -928,7 +933,7 @@ literals.add(l); } - public List<Literal> getLiterals() { + public Set<Literal> getLiterals() { return literals; } @@ -939,7 +944,7 @@ public RDFDatatype getDatatype(){ if(isLiteralNode){ if(!literals.isEmpty()){ - return literals.get(0).getDatatype(); + return literals.iterator().next().getDatatype(); } else { return null; } @@ -972,7 +977,7 @@ if(child.isLiteralNode()){ OWLDataProperty p = df.getOWLDataProperty(IRI.create((String) tree.getEdge(child))); if(childLabel.equals("?")){ - List<Literal> literals = child.getLiterals(); + Set<Literal> literals = child.getLiterals(); Literal lit = literals.iterator().next(); RDFDatatype datatype = lit.getDatatype(); String datatypeURI; @@ -983,7 +988,7 @@ } classExpressions.add(df.getOWLDataSomeValuesFrom(p, df.getOWLDatatype(IRI.create(datatypeURI)))); } else { - List<Literal> literals = child.getLiterals(); + Set<Literal> literals = child.getLiterals(); Literal lit = literals.iterator().next(); RDFDatatype datatype = lit.getDatatype(); OWLLiteral owlLiteral; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/impl/QueryTreeFactoryImpl.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -221,7 +221,12 @@ } }; } - Iterator<Statement> it = model.listStatements(s, null, (RDFNode)null).filterKeep(nsFilter); + Iterator<Statement> it; + if(nsFilter != null){ + it = model.listStatements(s, null, (RDFNode)null).filterKeep(nsFilter); + } else { + it = model.listStatements(s, null, (RDFNode)null); + } Statement st; SortedSet<Statement> statements; Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/LGGGeneratorImpl.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -131,11 +131,11 @@ logger.debug(tree2.getStringRepresentation()); } - QueryTree<N> lgg; //firstly, we check if both root nodes are resource nodes and have the same URI, i.e. the trees describe the same resource //if YES all child nodes should be also the same and we can just return one of the two tree as LGG - if(tree1.isResourceNode() && tree2.isResourceNode() && tree1.getUserObject().equals(tree2.getUserObject())){ + if((tree1.isResourceNode() && tree2.isResourceNode() || tree1.isLiteralNode() && tree2.isLiteralNode()) + && tree1.getUserObject().equals(tree2.getUserObject())){ if(logger.isDebugEnabled()){ logger.debug("Early termination. Tree 1(" + tree1 + ") and tree 2(" + tree2 + ") describe the same resource."); } @@ -171,7 +171,7 @@ lgg.setUserObject((N)"?"); lgg.setIsLiteralNode(false); lgg.setIsResourceNode(false); - } + } if(tree1.isLiteralNode() && tree2.isLiteralNode()){ RDFDatatype d1 = tree1.getDatatype(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/qtl/operations/lgg/NoiseSensitiveLGG.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -27,6 +27,10 @@ private SortedSet<EvaluatedQueryTree<N>> solutions; private double currentlyBestScore = 0d; + + private List<QueryTree<N>> posExamples; + + private List<QueryTree<N>> negExamples; public NoiseSensitiveLGG() { } @@ -36,7 +40,11 @@ } public List<EvaluatedQueryTree<N>> computeLGG(List<QueryTree<N>> posExamples, List<QueryTree<N>> negExamples){ + this.posExamples = posExamples; + this.negExamples = negExamples; + currentlyBestScore = 0d; + Monitor subMon = MonitorFactory.getTimeMonitor("subsumption-mon"); Monitor lggMon = MonitorFactory.getTimeMonitor("lgg-mon"); init(posExamples, negExamples); @@ -51,23 +59,14 @@ lggMon.start(); QueryTree<N> lgg = lggGenerator.getLGG(tree, example); lggMon.stop(); - //compute positive examples which are not covered by LGG - Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(lgg, posExamples); - //compute negative examples which are covered by LGG - Collection<QueryTree<N>> coveredNegativeExamples = getCoveredTrees(lgg, negExamples); - //compute score - int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); - double recall = coveredPositiveExamples / (double)posExamples.size(); - double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) - ? 0 - : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); - double score = Heuristics.getFScore(recall, precision); - if(score > currentlyBestScore){ + //evaluate the LGG + EvaluatedQueryTree<N> solution = evaluate(lgg); + + if(solution.getScore() > currentlyBestScore){ //add to todo list, if not already contained in todo list or solution list - EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredPositiveExamples, coveredNegativeExamples, score); todo(solution); - currentlyBestScore = score; + currentlyBestScore = solution.getScore(); } } @@ -83,6 +82,25 @@ return new ArrayList<EvaluatedQueryTree<N>>(solutions); } + private EvaluatedQueryTree<N> evaluate(QueryTree<N> lgg){ + //compute positive examples which are not covered by LGG + Collection<QueryTree<N>> uncoveredPositiveExamples = getUncoveredTrees(lgg, posExamples); + //compute negative examples which are covered by LGG + Collection<QueryTree<N>> coveredNegativeExamples = getCoveredTrees(lgg, negExamples); + //compute score + int coveredPositiveExamples = posExamples.size() - uncoveredPositiveExamples.size(); + double recall = coveredPositiveExamples / (double)posExamples.size(); + double precision = (coveredNegativeExamples.size() + coveredPositiveExamples == 0) + ? 0 + : coveredPositiveExamples / (double)(coveredPositiveExamples + coveredNegativeExamples.size()); + + double score = Heuristics.getFScore(recall, precision); + + EvaluatedQueryTree<N> solution = new EvaluatedQueryTree<N>(lgg, uncoveredPositiveExamples, coveredNegativeExamples, score); + + return solution; + } + /** * Return all trees from the given list {@code allTrees} which are not already subsumed by {@code tree}. * @param tree Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; + httpQuery.setAccept(WebContent.contentTypeRDFXML) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, Lang.TURTLE.getName()) ; + model.read(in, null, Lang.RDFXML.getName()) ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/PelletReasoner.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -1448,7 +1448,7 @@ concepts.add(new Thing()); } else if(concept.isOWLNothing()) { concepts.add(new Nothing()); - } else { + } else if(!concept.isBuiltIn() && !concept.getIRI().isReservedVocabulary()){ concepts.add(new NamedClass(concept.toStringID())); } } Added: trunk/components-core/src/main/java/org/dllearner/utilities/TriplePatternExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/utilities/TriplePatternExtractor.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/utilities/TriplePatternExtractor.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -0,0 +1,249 @@ +package org.dllearner.utilities; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.collections15.ListUtils; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.sparql.core.TriplePath; +import com.hp.hpl.jena.sparql.core.Var; +import com.hp.hpl.jena.sparql.syntax.Element; +import com.hp.hpl.jena.sparql.syntax.ElementFilter; +import com.hp.hpl.jena.sparql.syntax.ElementGroup; +import com.hp.hpl.jena.sparql.syntax.ElementOptional; +import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; +import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; +import com.hp.hpl.jena.sparql.syntax.ElementUnion; +import com.hp.hpl.jena.sparql.syntax.ElementVisitorBase; +import com.hp.hpl.jena.sparql.util.VarUtils; + +public class TriplePatternExtractor extends ElementVisitorBase { + + private Set<Triple> triplePattern; + + private Set<Triple> candidates; + + private boolean inOptionalClause = false; + + private int unionCount = 0; + private int optionalCount = 0; + private int filterCount = 0; + + /** + * Returns all triple patterns in given SPARQL query that have the given node in subject position, i.e. the outgoing + * triple patterns. + * @param query The SPARQL query. + * @param node + * @return + */ + public Set<Triple> extractOutgoingTriplePatterns(Query query, Node node){ + Set<Triple> triplePatterns = extractTriplePattern(query, false); + //remove triple patterns not containing triple patterns with given node in subject position + for (Iterator<Triple> iterator = triplePatterns.iterator(); iterator.hasNext();) { + Triple triple = iterator.next(); + if(!triple.subjectMatches(node)){ + iterator.remove(); + } + } + return triplePatterns; + } + + /** + * Returns all triple patterns in given SPARQL query that have the given node in object position, i.e. the ingoing + * triple patterns. + * @param query The SPARQL query. + * @param node + * @return + */ + public Set<Triple> extractIngoingTriplePatterns(Query query, Node node){ + Set<Triple> triplePatterns = extractTriplePattern(query, false); + //remove triple patterns not containing triple patterns with given node in object position + for (Iterator<Triple> iterator = triplePatterns.iterator(); iterator.hasNext();) { + Triple triple = iterator.next(); + if(!triple.objectMatches(node)){ + iterator.remove(); + } + } + return triplePatterns; + } + + /** + * Returns all triple patterns in given SPARQL query that have the given node either in subject or in object position, i.e. + * the ingoing and outgoing triple patterns. + * @param query The SPARQL query. + * @param node + * @return + */ + public Set<Triple> extractTriplePatterns(Query query, Node node){ + Set<Triple> triplePatterns = new HashSet<Triple>(); + triplePatterns.addAll(extractIngoingTriplePatterns(query, node)); + triplePatterns.addAll(extractOutgoingTriplePatterns(query, node)); + return triplePatterns; + } + + /** + * Returns triple patterns for each projection variable v such that v is either in subject or object position. + * @param query The SPARQL query. + * @param node + * @return + */ + public Map<Var,Set<Triple>> extractTriplePatternsForProjectionVars(Query query){ + Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<Var,Set<Triple>>(); + for (Var var : query.getProjectVars()) { + Set<Triple> triplePatterns = new HashSet<Triple>(); + triplePatterns.addAll(extractIngoingTriplePatterns(query, var)); + triplePatterns.addAll(extractOutgoingTriplePatterns(query, var)); + var2TriplePatterns.put(var, triplePatterns); + } + return var2TriplePatterns; + } + + /** + * Returns triple patterns for each projection variable v such that v is in subject position. + * @param query The SPARQL query. + * @param node + * @return + */ + public Map<Var,Set<Triple>> extractOutgoingTriplePatternsForProjectionVars(Query query){ + Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<Var,Set<Triple>>(); + for (Var var : query.getProjectVars()) { + Set<Triple> triplePatterns = new HashSet<Triple>(); + triplePatterns.addAll(extractOutgoingTriplePatterns(query, var)); + var2TriplePatterns.put(var, triplePatterns); + } + return var2TriplePatterns; + } + + /** + * Returns triple patterns for each projection variable v such that v is in object position. + * @param query The SPARQL query. + * @param node + * @return + */ + public Map<Var,Set<Triple>> extractIngoingTriplePatternsForProjectionVars(Query query){ + Map<Var,Set<Triple>> var2TriplePatterns = new HashMap<Var,Set<Triple>>(); + for (Var var : query.getProjectVars()) { + Set<Triple> triplePatterns = new HashSet<Triple>(); + triplePatterns.addAll(extractIngoingTriplePatterns(query, var)); + var2TriplePatterns.put(var, triplePatterns); + } + return var2TriplePatterns; + } + + public Set<Triple> extractTriplePattern(Query query){ + return extractTriplePattern(query, false); + } + + public Set<Triple> extractTriplePattern(Query query, boolean ignoreOptionals){ + triplePattern = new HashSet<Triple>(); + candidates = new HashSet<Triple>(); + + query.getQueryPattern().visit(this); + + //postprocessing: triplepattern in OPTIONAL clause + if(!ignoreOptionals){ + if(query.isSelectType()){ + for(Triple t : candidates){ + if(!ListUtils.intersection(new ArrayList<Var>(VarUtils.getVars(t)), query.getProjectVars()).isEmpty()){ + triplePattern.add(t); + } + } + } + } + + return triplePattern; + } + + public Set<Triple> extractTriplePattern(ElementGroup group){ + return extractTriplePattern(group, false); + } + + public Set<Triple> extractTriplePattern(ElementGroup group, boolean ignoreOptionals){ + triplePattern = new HashSet<Triple>(); + candidates = new HashSet<Triple>(); + + group.visit(this); + + //postprocessing: triplepattern in OPTIONAL clause + if(!ignoreOptionals){ + for(Triple t : candidates){ + triplePattern.add(t); + } + } + + return triplePattern; + } + + @Override + public void visit(ElementGroup el) { + for (Iterator<Element> iterator = el.getElements().iterator(); iterator.hasNext();) { + Element e = iterator.next(); + e.visit(this); + } + } + + @Override + public void visit(ElementOptional el) { + optionalCount++; + inOptionalClause = true; + el.getOptionalElement().visit(this); + inOptionalClause = false; + } + + @Override + public void visit(ElementTriplesBlock el) { + for (Iterator<Triple> iterator = el.patternElts(); iterator.hasNext();) { + Triple t = iterator.next(); + if(inOptionalClause){ + candidates.add(t); + } else { + triplePattern.add(t); + } + } + } + + @Override + public void visit(ElementPathBlock el) { + for (Iterator<TriplePath> iterator = el.patternElts(); iterator.hasNext();) { + TriplePath tp = iterator.next(); + if(inOptionalClause){ + candidates.add(tp.asTriple()); + } else { + triplePattern.add(tp.asTriple()); + } + } + } + + @Override + public void visit(ElementUnion el) { + unionCount++; + for (Iterator<Element> iterator = el.getElements().iterator(); iterator.hasNext();) { + Element e = iterator.next(); + e.visit(this); + } + } + + @Override + public void visit(ElementFilter el) { + filterCount++; + } + + public int getUnionCount() { + return unionCount; + } + + public int getOptionalCount() { + return optionalCount; + } + + public int getFilterCount() { + return filterCount; + } +} Deleted: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2014-01-02 13:07:18 UTC (rev 4209) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -1,209 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - -import org.apache.commons.compress.compressors.CompressorException; -import org.apache.commons.compress.compressors.CompressorInputStream; -import org.apache.commons.compress.compressors.CompressorStreamFactory; -import org.dllearner.algorithms.isle.index.Index; -import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.utilities.owl.OWLEntityTypeAdder; -import org.semanticweb.owlapi.apibinding.OWLManager; -import org.semanticweb.owlapi.model.AxiomType; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; - -import com.google.common.collect.Sets; -import com.hp.hpl.jena.rdf.model.Literal; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.rdf.model.StmtIterator; -import com.hp.hpl.jena.vocabulary.OWL; -import com.hp.hpl.jena.vocabulary.RDF; -import com.hp.hpl.jena.vocabulary.RDFS; -import com.hp.hpl.jena.vocabulary.XSD; - -/** - * @author Lorenz Buehmann - * - */ -public class DBpediaExperiment extends Experiment{ - - final SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); - final int maxNrOfInstancesPerClass = 10; - static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; - static final String searchField = "comment"; - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.Experiment#getIndex() - */ - @Override - protected Index getIndex() { - return new SolrSyntacticIndex(ontology, solrServerURL, searchField); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.Experiment#getOntology() - */ - @Override - protected OWLOntology getOntology() { - //load the DBpedia schema - OWLOntology schema = null; - try { - URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); - InputStream is = new BufferedInputStream(url.openStream()); - CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); - schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (CompressorException e) { - e.printStackTrace(); - } catch (OWLOntologyCreationException e) { - e.printStackTrace(); - } - //load some sample data for the machine learning part - Model sample = KnowledgebaseSampleGenerator.createKnowledgebaseSample( - endpoint, - "http://dbpedia.org/ontology/", - Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Person")), - maxNrOfInstancesPerClass); - cleanUpModel(sample); - filter(sample, "http://dbpedia.org/ontology/"); - OWLEntityTypeAdder.addEntityTypes(sample); -// StmtIterator iterator = sample.listStatements(); -// while(iterator.hasNext()){ -// System.out.println(iterator.next()); -// } - - try { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - sample.write(baos, "TURTLE", null); - OWLOntologyManager man = OWLManager.createOWLOntologyManager(); - OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); - man.addAxioms(ontology, schema.getAxioms()); - man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_DATA_PROPERTY)); - man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_OBJECT_PROPERTY)); - man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); - return ontology; - } catch (Exception e) { - e.printStackTrace(); - } - - return null; - } - - /** - * Filter triples which are not relevant based on the given knowledge base - * namespace. - * - * @param model - * @param namespace - */ - private void filter(Model model, String namespace) { - List<Statement> statementsToRemove = new ArrayList<Statement>(); - for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { - Statement st = iter.next(); - Property predicate = st.getPredicate(); - if (predicate.equals(RDF.type)) { - if (!st.getObject().asResource().getURI().startsWith(namespace)) { - statementsToRemove.add(st); - } else if (st.getObject().equals(OWL.FunctionalProperty.asNode())) { - statementsToRemove.add(st); - } else if (st.getObject().isLiteral() && st.getObject().asLiteral().getDatatypeURI().equals(XSD.gYear.getURI())) { - statementsToRemove.add(st); - } - } else if (!predicate.equals(RDFS.subClassOf) && !predicate.equals(OWL.sameAs) && !predicate.asResource().getURI().startsWith(namespace)) { - statementsToRemove.add(st); - } - } - model.remove(statementsToRemove); - } - - private static void cleanUpModel(Model model) { - // filter out triples with String literals, as therein often occur - // some syntax errors and they are not relevant for learning - List<Statement> statementsToRemove = new ArrayList<Statement>(); - for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { - Statement st = iter.next(); - RDFNode object = st.getObject(); - if (object.isLiteral()) { - // statementsToRemove.add(st); - Literal lit = object.asLiteral(); - if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.xstring)) { - st.changeObject("shortened", "en"); - } else if (lit.getDatatype().getURI().equals(XSD.gYear.getURI())) { - statementsToRemove.add(st); - // System.err.println("REMOVE " + st); - } else if (lit.getDatatype().getURI().equals(XSD.gYearMonth.getURI())) { - statementsToRemove.add(st); -// System.err.println("REMOVE " + st); - } - } - //remove statements like <x a owl:Class> - if (st.getPredicate().equals(RDF.type)) { - if (object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode()) - || object.equals(RDFS.Resource)) { - statementsToRemove.add(st); - } - } - - //remove unwanted properties - String dbo = "http://dbpedia.org/ontology/"; - Set<String> blackList = Sets.newHashSet(dbo + "wikiPageDisambiguates",dbo + "wikiPageExternalLink", - dbo + "wikiPageID", dbo + "wikiPageInterLanguageLink", dbo + "wikiPageRedirects", dbo + "wikiPageRevisionID", - dbo + "wikiPageWikiLink"); - for(String bl: blackList){ - if (st.getPredicate().getURI().equals(bl)) { - statementsToRemove.add(st); - } - } - } - - model.remove(statementsToRemove); - } - - - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.Experiment#getDocuments() - */ - @Override - protected Set<String> getDocuments() { - Set<String> documents = new HashSet<String>(); - - documents.addAll(DBpediaCorpusGenerator.getDBpediaCorpusSample( - "http://dbpedia.org/ontology/abstract", - Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Person")), - maxNrOfInstancesPerClass)); - - documents.clear(); - documents.add("Thomas Cruise Mapother IV, widely known as Tom Cruise, is an American film player and producer. He has been nominated for three Academy Awards and has won three Golden Globe Awards. He started his career at age 19 in the 1981 film Taps. His first leading role was in Risky Business, released in August 1983. Cruise became a full-fledged movie star after starring in Top Gun (1986). He is well known for his role as secret agent Ethan Hunt in the Mission: Impossible film series between 1996 and 2011. Cruise has starred in many Hollywood blockbusters, including Rain Man (1988), A Few Good Men (1992), Jerry Maguire (1996), Vanilla Sky (2001), Minority Report (2002), The Last Samurai (2003), Collateral (2004), War of the Worlds (2005), Tropic Thunder (2008) and Jack Reacher (2012). As of 2012, Cruise is Hollywood's highest-paid actor. Cruise is known for his Scientologist faith and for his support of the Church of Scientology."); - - return documents; - } - - public static void main(String[] args) throws Exception { - new DBpediaExperiment().run(new NamedClass("http://dbpedia.org/ontology/Person")); - } -} Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaPlainExperiment.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -0,0 +1,200 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.vocabulary.XSD; + +/** + * @author Lorenz Buehmann + * + */ +public class DBpediaPlainExperiment extends Experiment{ + + final SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + final int maxNrOfInstancesPerClass = 10; + static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + static final String searchField = "comment"; + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getIndex() + */ + @Override + protected Index getIndex() { + return null; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getOntology() + */ + @Override + protected OWLOntology getOntology() { + //load the DBpedia schema + OWLOntology schema = null; + try { + URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (CompressorException e) { + e.printStackTrace(); + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + //load some sample data for the machine learning part + Model sample = KnowledgebaseSampleGenerator.createKnowledgebaseSample( + endpoint, + "http://dbpedia.org/ontology/", + Sets.newHashSet(classToDescribe), + maxNrOfInstancesPerClass); + cleanUpModel(sample); + filter(sample, "http://dbpedia.org/ontology/"); + OWLEntityTypeAdder.addEntityTypes(sample); +// StmtIterator iterator = sample.listStatements(); +// while(iterator.hasNext()){ +// System.out.println(iterator.next()); +// } + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + sample.write(baos, "TURTLE", null); + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + man.addAxioms(ontology, schema.getAxioms()); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_DATA_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_OBJECT_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); + return ontology; + } catch (Exception e) { + e.printStackTrace(); + } + + return null; + } + + /** + * Filter triples which are not relevant based on the given knowledge base + * namespace. + * + * @param model + * @param namespace + */ + private void filter(Model model, String namespace) { + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + Property predicate = st.getPredicate(); + if (predicate.equals(RDF.type)) { + if (!st.getObject().asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } else if (st.getObject().equals(OWL.FunctionalProperty.asNode())) { + statementsToRemove.add(st); + } else if (st.getObject().isLiteral() && st.getObject().asLiteral().getDatatypeURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + } + } else if (!predicate.equals(RDFS.subClassOf) && !predicate.equals(OWL.sameAs) && !predicate.asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } + } + model.remove(statementsToRemove); + } + + private static void cleanUpModel(Model model) { + // filter out triples with String literals, as therein often occur + // some syntax errors and they are not relevant for learning + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + RDFNode object = st.getObject(); + if (object.isLiteral()) { + // statementsToRemove.add(st); + Literal lit = object.asLiteral(); + if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.xstring)) { + st.changeObject("shortened", "en"); + } else if (lit.getDatatype().getURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + // System.err.println("REMOVE " + st); + } else if (lit.getDatatype().getURI().equals(XSD.gYearMonth.getURI())) { + statementsToRemove.add(st); +// System.err.println("REMOVE " + st); + } + } + //remove statements like <x a owl:Class> + if (st.getPredicate().equals(RDF.type)) { + if (object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode()) + || object.equals(RDFS.Resource)) { + statementsToRemove.add(st); + } + } + + //remove unwanted properties + String dbo = "http://dbpedia.org/ontology/"; + Set<String> blackList = Sets.newHashSet(dbo + "wikiPageDisambiguates",dbo + "wikiPageExternalLink", + dbo + "wikiPageID", dbo + "wikiPageInterLanguageLink", dbo + "wikiPageRedirects", dbo + "wikiPageRevisionID", + dbo + "wikiPageWikiLink"); + for(String bl: blackList){ + if (st.getPredicate().getURI().equals(bl)) { + statementsToRemove.add(st); + } + } + } + + model.remove(statementsToRemove); + } + + + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getDocuments() + */ + @Override + protected Set<String> getDocuments() { + Set<String> documents = new HashSet<String>(); + return documents; + } + + public static void main(String[] args) throws Exception { + new DBpediaPlainExperiment().run(new NamedClass("http://dbpedia.org/ontology/SpaceShuttle")); + } +} Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSemanticIndexExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSemanticIndexExperiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSemanticIndexExperiment.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -0,0 +1,202 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.vocabulary.XSD; + +/** + * @author Lorenz Buehmann + * + */ +public class DBpediaSemanticIndexExperiment extends Experiment{ + + final SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + final int maxNrOfInstancesPerClass = 10; + static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + static final String searchField = "comment"; + + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getOntology() + */ + @Override + protected OWLOntology getOntology() { + //load the DBpedia schema + OWLOntology schema = null; + try { + URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new CompressorStreamFactory().createCompressorInputStream("bzip2", is); + schema = OWLManager.createOWLOntologyManager().loadOntologyFromOntologyDocument(in); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (CompressorException e) { + e.printStackTrace(); + } catch (OWLOntologyCreationException e) { + e.printStackTrace(); + } + //load some sample data for the machine learning part + Model sample = KnowledgebaseSampleGenerator.createKnowledgebaseSample( + endpoint, + "http://dbpedia.org/ontology/", + Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Person")), + maxNrOfInstancesPerClass); + cleanUpModel(sample); + filter(sample, "http://dbpedia.org/ontology/"); + OWLEntityTypeAdder.addEntityTypes(sample); +// StmtIterator iterator = sample.listStatements(); +// while(iterator.hasNext()){ +// System.out.println(iterator.next()); +// } + + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + sample.write(baos, "TURTLE", null); + OWLOntologyManager man = OWLManager.createOWLOntologyManager(); + OWLOntology ontology = man.loadOntologyFromOntologyDocument(new ByteArrayInputStream(baos.toByteArray())); + man.addAxioms(ontology, schema.getAxioms()); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_DATA_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.FUNCTIONAL_OBJECT_PROPERTY)); + man.removeAxioms(ontology, ontology.getAxioms(AxiomType.DATA_PROPERTY_RANGE)); + return ontology; + } catch (Exception e) { + e.printStackTrace(); + } + + return null; + } + + /** + * Filter triples which are not relevant based on the given knowledge base + * namespace. + * + * @param model + * @param namespace + */ + private void filter(Model model, String namespace) { + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + Property predicate = st.getPredicate(); + if (predicate.equals(RDF.type)) { + if (!st.getObject().asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } else if (st.getObject().equals(OWL.FunctionalProperty.asNode())) { + statementsToRemove.add(st); + } else if (st.getObject().isLiteral() && st.getObject().asLiteral().getDatatypeURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + } + } else if (!predicate.equals(RDFS.subClassOf) && !predicate.equals(OWL.sameAs) && !predicate.asResource().getURI().startsWith(namespace)) { + statementsToRemove.add(st); + } + } + model.remove(statementsToRemove); + } + + private static void cleanUpModel(Model model) { + // filter out triples with String literals, as therein often occur + // some syntax errors and they are not relevant for learning + List<Statement> statementsToRemove = new ArrayList<Statement>(); + for (Iterator<Statement> iter = model.listStatements().toList().iterator(); iter.hasNext();) { + Statement st = iter.next(); + RDFNode object = st.getObject(); + if (object.isLiteral()) { + // statementsToRemove.add(st); + Literal lit = object.asLiteral(); + if (lit.getDatatype() == null || lit.getDatatype().equals(XSD.xstring)) { + st.changeObject("shortened", "en"); + } else if (lit.getDatatype().getURI().equals(XSD.gYear.getURI())) { + statementsToRemove.add(st); + // System.err.println("REMOVE " + st); + } else if (lit.getDatatype().getURI().equals(XSD.gYearMonth.getURI())) { + statementsToRemove.add(st); +// System.err.println("REMOVE " + st); + } + } + //remove statements like <x a owl:Class> + if (st.getPredicate().equals(RDF.type)) { + if (object.equals(RDFS.Class.asNode()) || object.equals(OWL.Class.asNode()) || object.equals(RDFS.Literal.asNode()) + || object.equals(RDFS.Resource)) { + statementsToRemove.add(st); + } + } + + //remove unwanted properties + String dbo = "http://dbpedia.org/ontology/"; + Set<String> blackList = Sets.newHashSet(dbo + "wikiPageDisambiguates",dbo + "wikiPageExternalLink", + dbo + "wikiPageID", dbo + "wikiPageInterLanguageLink", dbo + "wikiPageRedirects", dbo + "wikiPageRevisionID", + dbo + "wikiPageWikiLink"); + for(String bl: blackList){ + if (st.getPredicate().getURI().equals(bl)) { + statementsToRemove.add(st); + } + } + } + + model.remove(statementsToRemove); + } + + + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getDocuments() + */ + @Override + protected Set<String> getDocuments() { + Set<String> documents = new HashSet<String>(); + + documents.addAll(DBpediaCorpusGenerator.getDBpediaCorpusSample( + "http://dbpedia.org/ontology/abstract", + Sets.newHashSet(new NamedClass("http://dbpedia.org/ontology/Person")), + maxNrOfInstancesPerClass)); + + documents.clear(); + documents.add("Thomas Cruise Mapother IV, widely known as Tom Cruise, is an American film player and producer. He has been nominated for three Academy Awards and has won three Golden Globe Awards. He started his career at age 19 in the 1981 film Taps. His first leading role was in Risky Business, released in August 1983. Cruise became a full-fledged movie star after starring in Top Gun (1986). He is well known for his role as secret agent Ethan Hunt in the Mission: Impossible film series between 1996 and 2011. Cruise has starred in many Hollywood blockbusters, including Rain Man (1988), A Few Good Men (1992), Jerry Maguire (1996), Vanilla Sky (2001), Minority Report (2002), The Last Samurai (2003), Collateral (2004), War of the Worlds (2005), Tropic Thunder (2008) and Jack Reacher (2012). As of 2012, Cruise is Hollywood's highest-paid actor. Cruise is known for his Scientologist faith and for his support of the Church of Scientology."); + + return documents; + } + + public static void main(String[] args) throws Exception { + new DBpediaSemanticIndexExperiment().run(new NamedClass("http://dbpedia.org/ontology/Person")); + } +} Added: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSyntacticIndexBasedExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSyntacticIndexBasedExperiment.java (rev 0) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaSyntacticIndexBasedExperiment.java 2014-01-08 19:37:22 UTC (rev 4210) @@ -0,0 +1,209 @@ +/** + * + */ +package org.dllearner.algorithms.isle; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.compress.compressors.CompressorException; +import org.apache.commons.compress.compressors.CompressorInputStream; +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.dllearner.algorithms.isle.index.Index; +import org.dllearner.algorithms.isle.index.syntactic.SolrSyntacticIndex; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.utilities.owl.OWLEntityTypeAdder; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.AxiomType; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyCreationException; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import com.hp.hpl.jena.vocabulary.XSD; + +/** + * @author Lorenz Buehmann + * + */ +public class DBpediaSyntacticIndexBasedExperiment extends Experiment{ + + final SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpedia(); + final int maxNrOfInstancesPerClass = 10; + static final String solrServerURL = "http://solr.aksw.org/en_dbpedia_resources/"; + static final String searchField = "comment"; + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getIndex() + */ + @Override + protected Index getIndex() { + return new SolrSyntacticIndex(ontology, solrServerURL, searchField); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.Experiment#getOntology() + */ + @Override + protected OWLOntology getOntology() { + //load the DBpedia schema + OWLOntology schema = null; + try { + URL url = new URL("http://downloads.dbpedia.org/3.9/dbpedia_3.9.owl.bz2"); + InputStream is = new BufferedInputStream(url.openStream()); + CompressorInputStream in = new Comp... [truncated message content] |