From: <lor...@us...> - 2013-07-01 19:05:53
|
Revision: 4013 http://sourceforge.net/p/dl-learner/code/4013 Author: lorenz_b Date: 2013-07-01 19:05:50 +0000 (Mon, 01 Jul 2013) Log Message: ----------- Minor changes in SPARQL reasonr. Updated LUCENE libs for ISLE: Modified Paths: -------------- trunk/components-core/pom.xml trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java Modified: trunk/components-core/pom.xml =================================================================== --- trunk/components-core/pom.xml 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/pom.xml 2013-07-01 19:05:50 UTC (rev 4013) @@ -215,6 +215,14 @@ <artifactId>lucene-core</artifactId> </dependency> <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-queryparser</artifactId> + </dependency> + <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> </dependency> @@ -272,7 +280,7 @@ <dependency> <groupId>org.aksw.jena-sparql-api</groupId> <artifactId>jena-sparql-api-core</artifactId> - <version>2.10.0-4-SNAPSHOT</version> + <version>2.10.0-5-SNAPSHOT</version> </dependency> </dependencies> <dependencyManagement> Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityTextRetriever.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -43,6 +43,6 @@ * @param entity The entity to handle. * @return A weighted set of strings. For a value x, we need to have 0 <= x <= 1. */ - public Map<String, Integer> getRelevantText(Entity entity); + public Map<String, Double> getRelevantText(Entity entity); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneIndexer.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -25,8 +25,10 @@ import java.io.IOException; import java.util.Date; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -51,12 +53,12 @@ Date start = new Date(); try { - IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), - new StandardAnalyzer( Version.LUCENE_CURRENT ), true, IndexWriter.MaxFieldLength.LIMITED ); + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter( FSDirectory.open( INDEX ), indexWriterConfig); System.out.println( "Creating index ..." ); index( writer, docDir ); System.out.println( "Optimizing index ..." ); - writer.optimize(); writer.close(); Date end = new Date(); System.out.println( end.getTime() - start.getTime() + " total milliseconds" ); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/LuceneSearcher.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -32,13 +32,14 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Searcher; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -48,7 +49,7 @@ private String FIELD = "contents"; private IndexReader m_reader = null; - private Searcher m_searcher = null; + private IndexSearcher m_searcher = null; private Analyzer m_analyzer = null; private QueryParser m_parser = null; @@ -70,10 +71,10 @@ @SuppressWarnings("deprecation") public LuceneSearcher() throws Exception { - m_reader = IndexReader.open( FSDirectory.open( new File( INDEX ) ), true ); + m_reader = DirectoryReader.open( FSDirectory.open( new File( INDEX ) )); m_searcher = new IndexSearcher( m_reader ); - m_analyzer = new StandardAnalyzer( Version.LUCENE_CURRENT ); - m_parser = new QueryParser( Version.LUCENE_CURRENT, FIELD, m_analyzer ); + m_analyzer = new StandardAnalyzer( Version.LUCENE_43); + m_parser = new QueryParser( Version.LUCENE_43, FIELD, m_analyzer ); } public void close() throws Exception { @@ -139,13 +140,13 @@ return true; } @Override - public void setNextReader( IndexReader reader, int docBase ) throws IOException { - this.docBase = docBase; - } - @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + this.docBase = context.docBase; + } }; m_searcher.search( query, collector ); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/pattern/OWLAxiomPatternFinder.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -364,7 +364,7 @@ System.out.print(i++ + ": "); URI uri = entry.getPhysicalURI(); // if(uri.toString().startsWith("http://rest.bioontology.org/bioportal/ontologies/download/42764")){ - if (!ontologyProcessed(uri)) { + if (!ontologyProcessed(uri)) {//if(entry.getOntologyShortName().equals("00698"))continue; System.out.print("Loading \"" + entry.getOntologyShortName() + "\" from "+ uri); try { manager = OWLManager.createOWLOntologyManager(); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/properties/DisjointObjectPropertyAxiomLearner.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -116,7 +116,7 @@ //compute the overlap if exist Map<ObjectProperty, Integer> property2Overlap = new HashMap<ObjectProperty, Integer>(); String query = String.format("SELECT ?p (COUNT(*) AS ?cnt) WHERE {?s <%s> ?o. ?s ?p ?o.} GROUP BY ?p", propertyToDescribe.getName()); - System.out.println(query);ResultSet rs = executeSelectQuery(query); + ResultSet rs = executeSelectQuery(query); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); Modified: trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/repository/oxford/OxfordRepository.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -90,7 +90,7 @@ private URI physicalURI; public RepositoryEntry(URI ontologyIRI) { - this.ontologyURI = ontologyIRI;System.out.println(ontologyIRI); + this.ontologyURI = ontologyIRI; OntologyIRIShortFormProvider sfp = new OntologyIRIShortFormProvider(); shortName = sfp.getShortForm(IRI.create(ontologyIRI)); physicalURI = ontologyIRI; Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/QueryEngineHTTP.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -268,7 +268,7 @@ private Model execModel(Model model) { HttpQuery httpQuery = makeHttpQuery() ; - httpQuery.setAccept(WebContent.contentTypeNTriplesAlt) ; + httpQuery.setAccept(WebContent.contentTypeTurtleAlt1) ; InputStream in = httpQuery.exec() ; //Don't assume the endpoint actually gives back the content type we asked for @@ -284,7 +284,7 @@ //Try to select language appropriately here based on the model content type Lang lang = WebContent.contentTypeToLang(actualContentType); if (! RDFLanguages.isTriples(lang)) throw new QueryException("Endpoint returned Content Type: " + actualContentType + " which is not a valid RDF Graph syntax"); - model.read(in, null, "N-TRIPLES") ; + model.read(in, null, "TURTLE") ; return model ; } Modified: trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/kb/sparql/simple/SparqlSimpleExtractor.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -11,7 +11,6 @@ import org.dllearner.core.AbstractKnowledgeSource; import org.dllearner.core.ComponentAnn; import org.dllearner.core.ComponentInitException; -import org.dllearner.core.KnowledgeSource; import org.dllearner.core.OntologyFormat; import org.dllearner.core.OntologyFormatUnsupportedException; import org.dllearner.core.config.ConfigOption; @@ -36,7 +35,6 @@ import com.jamonapi.MonitorFactory; @ComponentAnn(name = "efficient SPARQL fragment extractor", shortName = "sparqls", version = 0.1) - public class SparqlSimpleExtractor extends AbstractKnowledgeSource implements OWLOntologyKnowledgeSource{ @@ -334,5 +332,10 @@ JenaToOwlapiConverter converter = new JenaToOwlapiConverter(); return converter.convert(this.model,manager); } + + public static String getName(){ + return "efficient SPARQL fragment extractor"; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-07-01 18:51:19 UTC (rev 4012) +++ trunk/components-core/src/main/java/org/dllearner/reasoning/SPARQLReasoner.java 2013-07-01 19:05:50 UTC (rev 4013) @@ -22,10 +22,12 @@ import java.net.URL; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; @@ -73,6 +75,8 @@ import org.slf4j.LoggerFactory; import com.clarkparsia.owlapiv3.XSD; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.query.QueryExecution; @@ -80,6 +84,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.OWL2; @@ -134,7 +139,7 @@ e.printStackTrace(); } } - qef = new QueryExecutionFactoryPaginated(qef, 10000); +// qef = new QueryExecutionFactoryPaginated(qef, 10000); } else { qef = new QueryExecutionFactoryModel(((LocalModelBasedSparqlEndpointKS)ks).getModel()); @@ -518,6 +523,100 @@ } return types; } + + public Set<Property> getProperties(boolean inferType, String namespace) { + Set<Property> properties = new HashSet<Property>(); + String query = "SELECT DISTINCT ?p ?type WHERE {?s ?p ?o." + + (namespace != null ? ("FILTER(REGEX(?p,'^" + namespace + "'))") : "") + + "OPTIONAL{?p a ?type.}}"; + ResultSet rs = executeSelectQuery(query); + Multimap<String, String> uri2Types = HashMultimap.create(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String uri = qs.getResource("p").getURI(); + String type = ""; + if(qs.getResource("type") != null){ + type = qs.getResource("type").getURI(); + } + uri2Types.put(uri, type); + } + for (Entry<String, Collection<String>> entry : uri2Types.asMap().entrySet()) { + String uri = entry.getKey(); + Collection<String> types = entry.getValue(); + if(types.contains(OWL.ObjectProperty.getURI()) && !types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new ObjectProperty(uri)); + } else if(!types.contains(OWL.ObjectProperty.getURI()) && types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new DatatypeProperty(uri)); + } else { + //infer the type by values + query = "SELECT ?o WHERE {?s <" + uri + "> ?o. } LIMIT 100"; + rs = executeSelectQuery(query); + boolean op = true; + boolean dp = true; + RDFNode node; + while(rs.hasNext()){ + node = rs.next().get("o"); + op = node.isResource(); + dp = node.isLiteral(); + } + if(op && !dp){ + properties.add(new ObjectProperty(uri)); + } else if(!op && dp){ + properties.add(new DatatypeProperty(uri)); + } else { + //not possible to decide + } + } + } + return properties; + } + + public Set<Property> getProperties(boolean inferType) { + Set<Property> properties = new TreeSet<Property>(); + String query = "SELECT DISTINCT ?p ?type WHERE {?s ?p ?o. OPTIONAL{?p a ?type.}}"; + ResultSet rs = executeSelectQuery(query); + Multimap<String, String> uri2Types = HashMultimap.create(); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String uri = qs.getResource("p").getURI(); + String type = ""; + if(qs.getResource("type") != null){ + type = qs.getResource("type").getURI(); + } + uri2Types.put(uri, type); + } + for (Entry<String, Collection<String>> entry : uri2Types.asMap().entrySet()) { + String uri = entry.getKey(); + Collection<String> types = entry.getValue(); + if(types.contains(OWL.ObjectProperty.getURI()) && !types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new ObjectProperty(uri)); + } else if(!types.contains(OWL.ObjectProperty.getURI()) && types.contains(OWL.DatatypeProperty.getURI())){ + properties.add(new DatatypeProperty(uri)); + } else { + //infer the type by values + query = "SELECT ?o WHERE {?s <" + uri + "> ?o. } LIMIT 100"; + rs = executeSelectQuery(query); + boolean op = true; + boolean dp = true; + RDFNode node; + while(rs.hasNext()){ + node = rs.next().get("o"); + op = node.isResource(); + dp = node.isLiteral(); + } + if(op && !dp){ + properties.add(new ObjectProperty(uri)); + } else if(!op && dp){ + properties.add(new DatatypeProperty(uri)); + } else { + //not possible to decide + } + } + } + return properties; + } public Set<NamedClass> getOWLClasses() { Set<NamedClass> types = new HashSet<NamedClass>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |