From: <dfl...@us...> - 2013-08-19 09:54:10
|
Revision: 4026 http://sourceforge.net/p/dl-learner/code/4026 Author: dfleischhacker Date: 2013-08-19 09:54:07 +0000 (Mon, 19 Aug 2013) Log Message: ----------- TR API: More restructuring of class structure Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java Removed Paths: ------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,99 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TotalHitCountCollector; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Version; - -import java.io.File; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * @author Lorenz Buehmann - * - */ -public class LuceneSyntacticIndex implements SyntacticIndex { - - private IndexSearcher searcher; - private QueryParser parser; - private IndexReader indexReader; - private String searchField; - - public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { - this.indexReader = indexReader; - this.searchField = searchField; - searcher = new IndexSearcher(indexReader); - StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); - parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); - } - - public LuceneSyntacticIndex(Directory directory, String searchField) throws Exception { - this(DirectoryReader.open(directory), searchField); - } - - public LuceneSyntacticIndex(String indexDirectory, String searchField) throws Exception { - this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), searchField); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) - */ - @Override - public Set<org.dllearner.algorithms.isle.index.Document> getDocuments(String searchString) { - Set<org.dllearner.algorithms.isle.index.Document> documents = new HashSet<org.dllearner.algorithms.isle.index.Document>(); - try { - Query query = parser.parse(searchString); - ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; - for (int i = 0; i < result.length; i++) { - Document doc = searcher.doc(result[i].doc); - documents.add(new TextDocument(doc.get(searchField))); - } - } catch (ParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - return documents; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() - */ - @Override - public int getSize() { - return indexReader.numDocs(); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) - */ - @Override - public int count(String searchString) { - try { - Query query = parser.parse(searchString); - TotalHitCountCollector results = new TotalHitCountCollector(); - searcher.search(query, results); - return results.getTotalHits(); - } catch (ParseException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - return -1; - } - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,94 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.Version; -import org.semanticweb.owlapi.model.*; -import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; -import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * Creates a Lucene Index for the labels if classes and properties. - * @author Lorenz Buehmann - * - */ -public class OWLOntologyLuceneSyntacticIndexCreator { - - private Directory directory = new RAMDirectory(); - private OWLOntology ontology; - private Set<OWLEntity> schemaEntities; - - private OWLDataFactory df = new OWLDataFactoryImpl(); - private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); - private String language = "en"; - private String searchField; - - public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { - this.ontology = ontology; - this.annotationProperty = annotationProperty; - this.searchField = searchField; - - schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - } - - public SyntacticIndex buildIndex() throws Exception{ - Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); - IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); - IndexWriter writer = new IndexWriter(directory, indexWriterConfig); - System.out.println( "Creating index ..." ); - - Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>(); - FieldType stringType = new FieldType(StringField.TYPE_STORED); - stringType.setStoreTermVectors(false); - FieldType textType = new FieldType(TextField.TYPE_STORED); - textType.setStoreTermVectors(false); - - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (val.hasLang(language)) { - label = val.getLiteral(); - } - } - } - - if(label != null){ - org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); - luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); - luceneDocument.add(new Field(searchField, label, textType)); - luceneDocuments.add(luceneDocument); - } - - } - writer.addDocuments(luceneDocuments); - - System.out.println("Done."); - writer.close(); - - return new LuceneSyntacticIndex(directory, searchField); - } - - - -} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,10 @@ +package org.dllearner.algorithms.isle.index; + +/** + * + */ +public interface SemanticAnnotator { + /** + * Processes the given document and return + */ +} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,37 +0,0 @@ -package org.dllearner.algorithms.isle.index; - -import org.dllearner.core.owl.Entity; - -import java.util.Set; - -/** - * Interface for an index which is able to resolve a given entity's URI to the set of documents containing - * this entity, i.e., documents which contain words disambiguated to the given entity. - * - * @author Lorenz Buehmann - * @author Daniel Fleischhacker - */ -public interface SemanticIndex { - /** - * Returns the set of documents which reference the given entity using one of its surface forms. - * - * @param entity entity to retrieve documents - * @return documents referencing given entity - */ - public Set<Document> getDocuments(Entity entity); - - /** - * Returns the number of documents for the given entity. - * - * @param entity entity to return number of referencing documents for - * @return number of documents for the given entity in this index - */ - public int count(Entity entity); - - /** - * Returns the total number of documents contained in the index. - * - * @return the total number of documents contained in the index - */ - public int getSize(); -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,22 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -/** - * This gets a syntactic index and returns a semantic index by applying WSD etc. - * @author Lorenz Buehmann - * - */ -public class SemanticIndexCreator { - - private SyntacticIndex syntacticIndex; - - public SemanticIndexCreator(SyntacticIndex syntacticIndex) { - this.syntacticIndex = syntacticIndex; - } - - public SemanticIndex createSemanticIndex(){ - return null; - } -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleSemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,67 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - -import java.util.HashSet; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -/** - * @author Lorenz Buehmann - * - */ -public class SimpleSemanticIndex implements SemanticIndex { - - private SyntacticIndex syntacticIndex; - private RDFSLabelEntityTextRetriever labelRetriever; - - /** - * - */ - public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - this.syntacticIndex = syntacticIndex; - labelRetriever = new RDFSLabelEntityTextRetriever(ontology); - } - - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) - */ - @Override - public Set<Document> getDocuments(Entity entity) { - Set<Document> documents = new HashSet<Document>(); - Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); - - for (Entry<String, Double> entry : relevantText.entrySet()) { - String label = entry.getKey(); - documents.addAll(syntacticIndex.getDocuments(label)); - } - - return documents; - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) - */ - @Override - public int count(Entity entity) { - return getDocuments(entity).size(); - } - - /* (non-Javadoc) - * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() - */ - @Override - public int getSize() { - return syntacticIndex.getSize(); - } - - - -} Deleted: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -1,41 +0,0 @@ -/** - * - */ -package org.dllearner.algorithms.isle.index; - -import java.util.Set; - -/** - * Interface for a syntactic index, e.g., a basic string-based inverted index. - * - * @author Lorenz Buehmann - * @author Daniel Fleischhacker - */ -public interface SyntacticIndex { - - /** - * Returns a set of documents based on how the underlying index is processing the given - * search string. - * - * @param searchString query specifying the documents to retrieve - * @return set of documents retrieved based on the given query string - */ - Set<Document> getDocuments(String searchString); - - /** - * Returns the number of documents based on how the underlying index is processing the - * given search string. - * - * @param searchString query specifying the documents to include in the number of documents - * @return number of documents retrieved based on the given query string - */ - int count(String searchString); - - /** - * Returns the total number of documents contained in the index. - * - * @return the total number of documents contained in the index - */ - int getSize(); - -} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,38 @@ +package org.dllearner.algorithms.isle.index.semantic; + +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.core.owl.Entity; + +import java.util.Set; + +/** + * Interface for an index which is able to resolve a given entity's URI to the set of documents containing + * this entity, i.e., documents which contain words disambiguated to the given entity. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public interface SemanticIndex { + /** + * Returns the set of documents which reference the given entity using one of its surface forms. + * + * @param entity entity to retrieve documents + * @return documents referencing given entity + */ + public Set<Document> getDocuments(Entity entity); + + /** + * Returns the number of documents for the given entity. + * + * @param entity entity to return number of referencing documents for + * @return number of documents for the given entity in this index + */ + public int count(Entity entity); + + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + public int getSize(); +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexFactory.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,18 @@ +package org.dllearner.algorithms.isle.index.semantic; + +import java.io.File; + +/** + * Provides methods for creating semantic indexes. + * + * @author Daniel Fleischhacker + */ +public interface SemanticIndexFactory { + /** + * Returns a newly created semantic index for the collection of files contained in the given {@code directory}. + * + * @param inputDirectory directory containing files to create index from + * @return semantic index for the files in the given input directory + */ + public SemanticIndex createIndex(File inputDirectory); +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,73 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.semantic.simple; + +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +/** + * A semantic index which returns all documents which contain at least one of the labels assigned to a specific + * entity in a provided ontology. + * + * @author Lorenz Buehmann + */ +public class SimpleSemanticIndex implements SemanticIndex { + private SyntacticIndex syntacticIndex; + private RDFSLabelEntityTextRetriever labelRetriever; + + /** + * Initializes the semantic index to use {@code ontology} for finding all labels of an entity and + * {@code syntacticIndex} to query for documents containing these labels. + * + * @param ontology ontology to retrieve entity labels from + * @param syntacticIndex index to query for documents containing the labels + */ + public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { + this.syntacticIndex = syntacticIndex; + labelRetriever = new RDFSLabelEntityTextRetriever(ontology); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getDocuments(org.dllearner.core.owl.Entity) + */ + @Override + public Set<Document> getDocuments(Entity entity) { + Set<Document> documents = new HashSet<Document>(); + Map<String, Double> relevantText = labelRetriever.getRelevantText(entity); + + for (Entry<String, Double> entry : relevantText.entrySet()) { + String label = entry.getKey(); + documents.addAll(syntacticIndex.getDocuments(label)); + } + + return documents; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#count(java.lang.String) + */ + @Override + public int count(Entity entity) { + return getDocuments(entity).size(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SemanticIndex#getSize() + */ + @Override + public int getSize() { + return syntacticIndex.getSize(); + } + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndexFactory.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,40 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.semantic.simple; + +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndexFactory; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.io.File; + +/** + * This gets a syntactic index and returns a semantic index by applying WSD etc. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public class SimpleSemanticIndexFactory implements SemanticIndexFactory { + private OWLOntology ontology; + private SyntacticIndex syntacticIndex; + + /** + * Initializes a semantic index factory for creating simple semantic indexes. Simple semantic indexes use + * the labels assigned to an entity in {@code ontology} as its surface forms and return the all documents + * from the given syntactic index which contain at least one of these surface forms. + * + * @param syntacticIndex the syntactic index in which occurrences of the labels are searched + * @param ontology the ontology retrieve the entities' labels from + */ + public SimpleSemanticIndexFactory(SyntacticIndex syntacticIndex, OWLOntology ontology) { + this.syntacticIndex = syntacticIndex; + this.ontology = ontology; + } + + @Override + public SemanticIndex createIndex(File inputDirectory) { + return new SimpleSemanticIndex(ontology, syntacticIndex); + } +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LuceneSyntacticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/LuceneSyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,100 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; +import org.dllearner.algorithms.isle.index.TextDocument; + +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * @author Lorenz Buehmann + * + */ +public class LuceneSyntacticIndex implements SyntacticIndex { + + private IndexSearcher searcher; + private QueryParser parser; + private IndexReader indexReader; + private String searchField; + + public LuceneSyntacticIndex(IndexReader indexReader, String searchField) throws Exception { + this.indexReader = indexReader; + this.searchField = searchField; + searcher = new IndexSearcher(indexReader); + StandardAnalyzer analyzer = new StandardAnalyzer( Version.LUCENE_43); + parser = new QueryParser( Version.LUCENE_43, searchField, analyzer ); + } + + public LuceneSyntacticIndex(Directory directory, String searchField) throws Exception { + this(DirectoryReader.open(directory), searchField); + } + + public LuceneSyntacticIndex(String indexDirectory, String searchField) throws Exception { + this(DirectoryReader.open(FSDirectory.open(new File(indexDirectory))), searchField); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getDocuments(java.lang.String) + */ + @Override + public Set<org.dllearner.algorithms.isle.index.Document> getDocuments(String searchString) { + Set<org.dllearner.algorithms.isle.index.Document> documents = new HashSet<org.dllearner.algorithms.isle.index.Document>(); + try { + Query query = parser.parse(searchString); + ScoreDoc[] result = searcher.search(query, getSize()).scoreDocs; + for (int i = 0; i < result.length; i++) { + Document doc = searcher.doc(result[i].doc); + documents.add(new TextDocument(doc.get(searchField))); + } + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return documents; + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#getSize() + */ + @Override + public int getSize() { + return indexReader.numDocs(); + } + + /* (non-Javadoc) + * @see org.dllearner.algorithms.isle.SyntacticIndex#count(java.lang.String) + */ + @Override + public int count(String searchString) { + try { + Query query = parser.parse(searchString); + TotalHitCountCollector results = new TotalHitCountCollector(); + searcher.search(query, results); + return results.getTotalHits(); + } catch (ParseException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + return -1; + } + +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/OWLOntologyLuceneSyntacticIndexCreator.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/OWLOntologyLuceneSyntacticIndexCreator.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,94 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; +import org.semanticweb.owlapi.model.*; +import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; +import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * Creates a Lucene Index for the labels if classes and properties. + * @author Lorenz Buehmann + * + */ +public class OWLOntologyLuceneSyntacticIndexCreator { + + private Directory directory = new RAMDirectory(); + private OWLOntology ontology; + private Set<OWLEntity> schemaEntities; + + private OWLDataFactory df = new OWLDataFactoryImpl(); + private OWLAnnotationProperty annotationProperty = df.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI()); + private String language = "en"; + private String searchField; + + public OWLOntologyLuceneSyntacticIndexCreator(OWLOntology ontology, OWLAnnotationProperty annotationProperty, String searchField) throws IOException { + this.ontology = ontology; + this.annotationProperty = annotationProperty; + this.searchField = searchField; + + schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + } + + public SyntacticIndex buildIndex() throws Exception{ + Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); + IndexWriter writer = new IndexWriter(directory, indexWriterConfig); + System.out.println( "Creating index ..." ); + + Set<org.apache.lucene.document.Document> luceneDocuments = new HashSet<org.apache.lucene.document.Document>(); + FieldType stringType = new FieldType(StringField.TYPE_STORED); + stringType.setStoreTermVectors(false); + FieldType textType = new FieldType(TextField.TYPE_STORED); + textType.setStoreTermVectors(false); + + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (val.hasLang(language)) { + label = val.getLiteral(); + } + } + } + + if(label != null){ + org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); + luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); + luceneDocument.add(new Field(searchField, label, textType)); + luceneDocuments.add(luceneDocument); + } + + } + writer.addDocuments(luceneDocuments); + + System.out.println("Done."); + writer.close(); + + return new LuceneSyntacticIndex(directory, searchField); + } + + + +} Copied: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java (from rev 4025, trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SyntacticIndex.java) =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/syntactic/SyntacticIndex.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -0,0 +1,43 @@ +/** + * + */ +package org.dllearner.algorithms.isle.index.syntactic; + +import org.dllearner.algorithms.isle.index.Document; + +import java.util.Set; + +/** + * Interface for a syntactic index, e.g., a basic string-based inverted index. + * + * @author Lorenz Buehmann + * @author Daniel Fleischhacker + */ +public interface SyntacticIndex { + + /** + * Returns a set of documents based on how the underlying index is processing the given + * search string. + * + * @param searchString query specifying the documents to retrieve + * @return set of documents retrieved based on the given query string + */ + Set<Document> getDocuments(String searchString); + + /** + * Returns the number of documents based on how the underlying index is processing the + * given search string. + * + * @param searchString query specifying the documents to include in the number of documents + * @return number of documents retrieved based on the given query string + */ + int count(String searchString); + + /** + * Returns the total number of documents contained in the index. + * + * @return the total number of documents contained in the index + */ + int getSize(); + +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/AbstractRelevanceMetric.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,12 +3,12 @@ */ package org.dllearner.algorithms.isle.metrics; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.semanticweb.owlapi.model.OWLEntity; + import java.util.HashMap; import java.util.Map; -import org.dllearner.algorithms.isle.index.SemanticIndex; -import org.semanticweb.owlapi.model.OWLEntity; - /** * @author Lorenz Buehmann * Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/metrics/PMIRelevanceMetric.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,12 +3,12 @@ */ package org.dllearner.algorithms.isle.metrics; -import java.util.Set; - -import org.dllearner.algorithms.isle.index.SemanticIndex; +import com.google.common.collect.Sets; +import org.dllearner.algorithms.isle.index.Document; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; import org.dllearner.core.owl.Entity; -import com.google.common.collect.Sets; +import java.util.Set; /** * @author Lorenz Buehmann @@ -22,9 +22,9 @@ @Override public double getRelevance(Entity entityA, Entity entityB){ - Set<String> documentsA = index.getDocuments(entityA); - Set<String> documentsB = index.getDocuments(entityB); - Set<String> documentsAB = Sets.intersection(documentsA, documentsB); + Set<Document> documentsA = index.getDocuments(entityA); + Set<Document> documentsB = index.getDocuments(entityB); + Set<Document> documentsAB = Sets.intersection(documentsA, documentsB); int nrOfDocuments = index.getSize(); double dPClass = nrOfDocuments == 0 ? 0 : ((double) documentsA.size() / (double) nrOfDocuments); Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-08-19 09:53:14 UTC (rev 4025) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETest.java 2013-08-19 09:54:07 UTC (rev 4026) @@ -3,13 +3,11 @@ */ package org.dllearner.algorithms.isle; -import java.io.File; -import java.util.Map; - -import org.dllearner.algorithms.isle.index.OWLOntologyLuceneSyntacticIndexCreator; -import org.dllearner.algorithms.isle.index.SemanticIndex; -import org.dllearner.algorithms.isle.index.SimpleSemanticIndex; -import org.dllearner.algorithms.isle.index.SyntacticIndex; +import com.google.common.base.Joiner; +import org.dllearner.algorithms.isle.index.semantic.SemanticIndex; +import org.dllearner.algorithms.isle.index.semantic.simple.SimpleSemanticIndex; +import org.dllearner.algorithms.isle.index.syntactic.OWLOntologyLuceneSyntacticIndexCreator; +import org.dllearner.algorithms.isle.index.syntactic.SyntacticIndex; import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric; import org.dllearner.algorithms.isle.metrics.RelevanceMetric; import org.dllearner.algorithms.isle.metrics.RelevanceUtils; @@ -28,10 +26,10 @@ import org.semanticweb.owlapi.model.OWLDataFactory; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyManager; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; -import com.google.common.base.Joiner; +import java.io.File; +import java.util.Map; /** * @author Lorenz Buehmann This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |