From: <lor...@us...> - 2013-01-25 12:02:22
|
Revision: 3891 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3891&view=rev Author: lorenz_b Date: 2013-01-25 12:02:10 +0000 (Fri, 25 Jan 2013) Log Message: ----------- Started script for instance based matching of 2 SPARQL knowledge bases. Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java trunk/components-core/src/main/java/org/dllearner/learningproblems/PosOnlyLP.java trunk/scripts/pom.xml trunk/scripts/src/main/java/org/dllearner/scripts/matching/GeneralMatcher.java trunk/test/qtl/dbpedia_simple.conf Added Paths: ----------- trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java 2013-01-23 13:30:14 UTC (rev 3890) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/SPARQLTasks.java 2013-01-25 12:02:10 UTC (rev 3891) @@ -701,7 +701,7 @@ public Set<NamedClass> getAllClasses() { Set<NamedClass> classes = new TreeSet<NamedClass>(); - String query = "PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?c WHERE {?c a owl:Class} LIMIT 1000"; + String query = "SELECT ?c WHERE {?c a <http://www.w3.org/2002/07/owl#Class>} LIMIT 1000"; /* * String query = "PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " + "SELECT ?c WHERE {{?c a owl:Class} UNION {?c rdfs:subClassOf ?d} UNION {?d rdfs:subClassOf ?c}} LIMIT 1000"; Modified: trunk/components-core/src/main/java/org/dllearner/learningproblems/PosOnlyLP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/learningproblems/PosOnlyLP.java 2013-01-23 13:30:14 UTC (rev 3890) +++ trunk/components-core/src/main/java/org/dllearner/learningproblems/PosOnlyLP.java 2013-01-25 12:02:10 UTC (rev 3891) @@ -64,6 +64,11 @@ public PosOnlyLP(AbstractReasonerComponent reasoningService) { super(reasoningService); } + + public PosOnlyLP(AbstractReasonerComponent reasoningService, SortedSet<Individual> positiveExamples) { + super(reasoningService); + this.positiveExamples = positiveExamples; + } /* * (non-Javadoc) Modified: trunk/scripts/pom.xml =================================================================== --- trunk/scripts/pom.xml 2013-01-23 13:30:14 UTC (rev 3890) +++ trunk/scripts/pom.xml 2013-01-25 12:02:10 UTC (rev 3891) @@ -25,6 +25,10 @@ <artifactId>jena</artifactId> <groupId>com.hp.hpl.jena</groupId> </exclusion> + <exclusion> + <artifactId>owlapi</artifactId> + <groupId>net.sourceforge.owlapi</groupId> + </exclusion> </exclusions> </dependency> <dependency> @@ -40,6 +44,12 @@ <dependency> <groupId>org.dllearner</groupId> <artifactId>interfaces</artifactId> + <exclusions> + <exclusion> + <artifactId>owlapi</artifactId> + <groupId>net.sourceforge.owlapi</groupId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>net.sourceforge.secondstring</groupId> @@ -49,12 +59,12 @@ <groupId>postgresql</groupId> <artifactId>postgresql</artifactId> </dependency> - <dependency> - <groupId>com.dumontierlab</groupId> - <artifactId>pdb2rdf-parser</artifactId> - </dependency> <dependency> <groupId>com.dumontierlab</groupId> + <artifactId>pdb2rdf-parser</artifactId> + </dependency> + <dependency> + <groupId>com.dumontierlab</groupId> <artifactId>pdb2rdf-cli</artifactId> <exclusions> <exclusion> @@ -84,8 +94,8 @@ <groupId>org.deri.any23</groupId> </exclusion> <exclusion> - <artifactId>jena-core</artifactId> - <groupId>org.apache.jena</groupId> + <artifactId>jena-core</artifactId> + <groupId>org.apache.jena</groupId> </exclusion> </exclusions> </dependency> @@ -103,6 +113,30 @@ <artifactId>weka</artifactId> <version>3.6.5</version> </dependency> + <dependency> + <groupId>net.sourceforge.owlapi</groupId> + <artifactId>owlapi-distribution</artifactId> + </dependency> + <dependency> + <groupId>net.sourceforge.owlapi</groupId> + <artifactId>owlapi-reasoner</artifactId> + <exclusions> + <exclusion> + <artifactId>owlapi-api</artifactId> + <groupId>net.sourceforge.owlapi</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>net.sourceforge.owlapi</groupId> + <artifactId>owlapi-util</artifactId> + <exclusions> + <exclusion> + <artifactId>owlapi-api</artifactId> + <groupId>net.sourceforge.owlapi</groupId> + </exclusion> + </exclusions> + </dependency> </dependencies> Added: trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java (rev 0) +++ trunk/scripts/src/main/java/org/dllearner/scripts/OntologyMatching.java 2013-01-25 12:02:10 UTC (rev 3891) @@ -0,0 +1,291 @@ +package org.dllearner.scripts; + +import java.net.URL; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.log4j.Logger; +import org.dllearner.algorithms.celoe.CELOE; +import org.dllearner.core.AbstractLearningProblem; +import org.dllearner.core.AbstractReasonerComponent; +import org.dllearner.core.ComponentInitException; +import org.dllearner.core.owl.Individual; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SPARQLTasks; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlKnowledgeSource; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.learningproblems.PosNegLPStandard; +import org.dllearner.learningproblems.PosOnlyLP; +import org.dllearner.reasoning.FastInstanceChecker; +import org.dllearner.reasoning.SPARQLReasoner; +import org.dllearner.utilities.datastructures.Datastructures; +import org.dllearner.utilities.datastructures.SortedSetTuple; +import org.dllearner.utilities.examples.AutomaticNegativeExampleFinderSPARQL2; + +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.vocabulary.OWL; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + +public class OntologyMatching { + + + private static final Logger logger = Logger.getLogger(OntologyMatching.class.getName()); + + private final ObjectProperty sameAs = new ObjectProperty(OWL.sameAs.getURI()); + private final Monitor mon; + + //KB1 + private KnowledgeBase kb1; + //KB2 + private KnowledgeBase kb2; + + public OntologyMatching(KnowledgeBase kb1, KnowledgeBase kb2) { + this.kb1 = kb1; + this.kb2 = kb2; + + mon = MonitorFactory.getTimeMonitor("time"); + } + + public OntologyMatching(SparqlEndpoint endpoint1, SparqlEndpoint endpoint2) { + this(new KnowledgeBase(endpoint1), new KnowledgeBase(endpoint2)); + } + + public void start(){ + computeMatching(kb1, kb2); + computeMatching(kb2, kb1); + } + + private void computeMatching(KnowledgeBase source, KnowledgeBase target) { + // get all classes in SOURCE + Set<NamedClass> sourceClasses = getClasses(source); + + // for each class of KB1 + for (NamedClass nc : sourceClasses) { + logger.info(nc); + // get all via owl:sameAs related individuals + SortedSet<Individual> individuals = getRelatedIndividualsNamespaceAware(source, nc, target.getNamespace()); + logger.info(individuals); + //learn concept in KB2 based on the examples + if(individuals.size() >= 3){ + learnClassExpression(target, individuals); + } + } + } + + private void learnClassExpression(KnowledgeBase kb, SortedSet<Individual> posExamples){ + learnClassExpression(kb, posExamples, false); + } + + private void learnClassExpression(KnowledgeBase kb, SortedSet<Individual> positiveExamples, boolean posNeg){ + try { + SortedSet<Individual> negativeExamples = new TreeSet<Individual>(); + if(posNeg){ + //find negative examples + mon.start(); + AutomaticNegativeExampleFinderSPARQL2 finder = new AutomaticNegativeExampleFinderSPARQL2(kb.getEndpoint()); + //TODO find negative examples + mon.stop(); + logger.info("Found " + negativeExamples.size() + " negative examples in " + mon.getLastValue() + "ms."); + } + + SortedSetTuple<Individual> examples = new SortedSetTuple<Individual>(positiveExamples, negativeExamples); + + SparqlKnowledgeSource ks = new SparqlKnowledgeSource(); + ks.setInstances(Datastructures.individualSetToStringSet(examples.getCompleteSet())); + ks.setUrl(kb.getEndpoint().getURL()); + ks.setDefaultGraphURIs(new TreeSet<String>(kb.getEndpoint().getDefaultGraphURIs())); + ks.setUseLits(false); + ks.setUseCacheDatabase(true); + ks.setCacheDir("cache"); + ks.setRecursionDepth(2); + ks.setCloseAfterRecursion(true); + ks.setDissolveBlankNodes(false); + ks.setSaveExtractedFragment(false); + ks.init(); + + AbstractReasonerComponent rc = new FastInstanceChecker(ks); + rc.init(); + + AbstractLearningProblem lp; + if(posNeg){ + lp = new PosNegLPStandard(rc, positiveExamples, negativeExamples); + } else { + lp = new PosOnlyLP(rc, positiveExamples); + + } + lp.init(); + + CELOE la = new CELOE(lp, rc); + la.setMaxExecutionTimeInSeconds(10); + la.setNoisePercentage(25); + la.init(); + la.start(); + + logger.info(la.getCurrentlyBestEvaluatedDescription()); + } catch (ComponentInitException e) { + e.printStackTrace(); + } + } + + private Set<NamedClass> getClasses(KnowledgeBase kb){ + Set<NamedClass> classes = kb.getSparqlHelper().getAllClasses(); + //fallback: check for ?s a ?type where ?type is not asserted to owl:Class + if(classes.isEmpty()){ + String query = "SELECT ?type WHERE {?s a ?type.}"; + ResultSet rs = executeSelect(kb, query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + if(qs.get("type").isURIResource()){ + classes.add(new NamedClass(qs.get("type").asResource().getURI())); + } + } + } + return classes; + } + + private SortedSet<Individual> getRelatedIndividualsNaive(KnowledgeBase kb, NamedClass nc){ + SortedSet<Individual> relatedIndividuals = new TreeSet<Individual>(); + //get all individuals in given class nc + Set<Individual> individuals = kb.getReasoner().getIndividuals(nc); + //for each individual in class nc + for(Individual ind : individuals){ + //get all individuals related via owl:sameAs + Set<Individual> sameIndividuals = kb.getReasoner().getRelatedIndividuals(ind, sameAs); + relatedIndividuals.addAll(sameIndividuals); + } + return relatedIndividuals; + } + + private SortedSet<Individual> getRelatedIndividuals(KnowledgeBase kb, NamedClass nc){ + SortedSet<Individual> relatedIndividuals = new TreeSet<Individual>(); + //get all individuals o which are connected to individuals s belonging to class nc + String query = String.format("SELECT ?o WHERE {?s a <%s>. ?s <http://www.w3.org/2002/07/owl#sameAs> ?o.}", nc.getName()); + ResultSet rs = executeSelect(kb, query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + RDFNode object = qs.get("o"); + if(object.isURIResource()){ + relatedIndividuals.add(new Individual(object.asResource().getURI())); + } + } + return relatedIndividuals; + } + + private SortedSet<Individual> getRelatedIndividualsNamespaceAware(KnowledgeBase kb, NamedClass nc, String targetNamespace){ + SortedSet<Individual> relatedIndividuals = new TreeSet<Individual>(); + //get all individuals o which are connected to individuals s belonging to class nc + String query = String.format("SELECT ?o WHERE {?s a <%s>. ?s <http://www.w3.org/2002/07/owl#sameAs> ?o. FILTER(REGEX(STR(?o),'%s'))}", nc.getName(), targetNamespace); + ResultSet rs = executeSelect(kb, query); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + RDFNode object = qs.get("o"); + if(object.isURIResource()){ + relatedIndividuals.add(new Individual(object.asResource().getURI())); + } + } + return relatedIndividuals; + } + + protected ResultSet executeSelect(KnowledgeBase kb, String query){ + return executeSelect(kb, QueryFactory.create(query, Syntax.syntaxARQ)); + } + + protected ResultSet executeSelect(KnowledgeBase kb, Query query){ + ExtractionDBCache cache = kb.getCache(); + SparqlEndpoint endpoint = kb.getEndpoint(); + ResultSet rs = null; + if(cache != null){ + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query.toString())); + } else { + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); + for(String uri : endpoint.getDefaultGraphURIs()){ + qe.addDefaultGraph(uri); + } + rs = qe.execSelect(); + } + return rs; + } + + public static class KnowledgeBase{ + private SparqlEndpoint endpoint; + private SPARQLReasoner reasoner; + private SPARQLTasks sparqlHelper; + private String namespace; + private ExtractionDBCache cache; + + public KnowledgeBase(SparqlEndpoint endpoint, ExtractionDBCache cache, String namespace) { + this.endpoint = endpoint; + this.namespace = namespace; + this.cache = cache; + + this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + this.sparqlHelper = new SPARQLTasks(endpoint); + } + + public KnowledgeBase(SparqlEndpoint endpoint) { + this.endpoint = endpoint; + + this.reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); + this.sparqlHelper = new SPARQLTasks(endpoint); + } + + public SparqlEndpoint getEndpoint() { + return endpoint; + } + + public SPARQLReasoner getReasoner() { + return reasoner; + } + + public SPARQLTasks getSparqlHelper() { + return sparqlHelper; + } + + public String getNamespace() { + return namespace; + } + + public ExtractionDBCache getCache() { + return cache; + } + + + + } + + /** + * @param args + */ + public static void main(String[] args) throws Exception{ + // KB2 + SparqlEndpoint endpoint1 = SparqlEndpoint.getEndpointDBpedia(); + ExtractionDBCache cache1 = new ExtractionDBCache("cache"); + String namespace1 = "http://dbpedia.org/resource/"; + KnowledgeBase kb1 = new KnowledgeBase(endpoint1, cache1, namespace1); + // KB2 + SparqlEndpoint endpoint2 = new SparqlEndpoint(new URL("http://wifo5-03.informatik.uni-mannheim.de/factbook/sparql")); + ExtractionDBCache cache2 = new ExtractionDBCache("cache"); + String namespace2 = "http://www4.wiwiss.fu-berlin.de/factbook/resource/"; + KnowledgeBase kb2 = new KnowledgeBase(endpoint2, cache2, namespace2); + + OntologyMatching matcher = new OntologyMatching(kb1, kb2); + matcher.start(); + + } + +} Modified: trunk/scripts/src/main/java/org/dllearner/scripts/matching/GeneralMatcher.java =================================================================== --- trunk/scripts/src/main/java/org/dllearner/scripts/matching/GeneralMatcher.java 2013-01-23 13:30:14 UTC (rev 3890) +++ trunk/scripts/src/main/java/org/dllearner/scripts/matching/GeneralMatcher.java 2013-01-25 12:02:10 UTC (rev 3891) @@ -24,7 +24,7 @@ import java.util.LinkedList; import java.util.Set; -import org.dllearner.algorithm.qtl.QTL; +import org.dllearner.algorithms.qtl.QTL; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblemUnsupportedException; import org.dllearner.kb.SparqlEndpointKS; Modified: trunk/test/qtl/dbpedia_simple.conf =================================================================== --- trunk/test/qtl/dbpedia_simple.conf 2013-01-23 13:30:14 UTC (rev 3890) +++ trunk/test/qtl/dbpedia_simple.conf 2013-01-25 12:02:10 UTC (rev 3891) @@ -2,7 +2,7 @@ prefixes = [ ("dbr","http://dbpedia.org/resource/") ] ks.type = "SPARQL" -ks.url = "http://live.dbpedia.org/sparql" +ks.url = "http://dbpedia.org/sparql" ks.defaultGraphURIs = { "http://dbpedia.org" } // learning problem @@ -10,3 +10,4 @@ lp.positiveExamples = { "dbr:Bob_Dylan", "dbr:The_Beatles", "dbr:Aerosmith" } alg.type = "qtl" +alg.maxQueryTreeDepth = 1 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |