From: <dfl...@us...> - 2013-09-05 07:46:37
|
Revision: 4081 http://sourceforge.net/p/dl-learner/code/4081 Author: dfleischhacker Date: 2013-09-05 07:46:34 +0000 (Thu, 05 Sep 2013) Log Message: ----------- Pass ontology into SimpleSemanticIndex Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 07:22:38 UTC (rev 4080) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndex.java 2013-09-05 07:46:34 UTC (rev 4081) @@ -28,103 +28,101 @@ * @author Daniel Fleischhacker */ public abstract class SemanticIndex { - - - private static final Logger logger = Logger.getLogger(SemanticIndex.class.getName()); - - private SemanticAnnotator semanticAnnotator; - private SyntacticIndex syntacticIndex; - private Map<Entity, Set<AnnotatedDocument>> index; - private OWLOntology ontology; - - public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, WordSenseDisambiguation wordSenseDisambiguation, - EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { - this.ontology = ontology; - this.syntacticIndex = syntacticIndex; - semanticAnnotator = new SemanticAnnotator(wordSenseDisambiguation, entityCandidateGenerator, linguisticAnnotator); - } - - public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, SemanticAnnotator semanticAnnotator) { - this.semanticAnnotator = semanticAnnotator; - } - - public SemanticIndex() { -} - - /** - * @param semanticAnnotator the semanticAnnotator to set - */ - public void setSemanticAnnotator(SemanticAnnotator semanticAnnotator) { - this.semanticAnnotator = semanticAnnotator; - } - - /** - * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. - */ - public void buildIndex(Set<TextDocument> documents){ - logger.info("Creating semantic index..."); - index = new HashMap<Entity, Set<AnnotatedDocument>>(); - for (TextDocument document : documents) { - logger.info("Processing document:\n" + document); - AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(document); - for (Entity entity : annotatedDocument.getContainedEntities()) { - Set<AnnotatedDocument> existingAnnotatedDocuments = index.get(entity); - if(existingAnnotatedDocuments == null){ - existingAnnotatedDocuments = new HashSet<AnnotatedDocument>(); - index.put(entity, existingAnnotatedDocuments); - } - existingAnnotatedDocuments.add(annotatedDocument); - } - } - logger.info("...done."); - } - - public void buildIndex(OWLAnnotationProperty annotationProperty, String language){ - Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); - schemaEntities.addAll(ontology.getClassesInSignature()); - schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); - schemaEntities.addAll(ontology.getDataPropertiesInSignature()); - Set<TextDocument> documents = new HashSet<TextDocument>(); - for (OWLEntity entity : schemaEntities) { - String label = null; - Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); - for (OWLAnnotation annotation : annotations) { - if (annotation.getValue() instanceof OWLLiteral) { - OWLLiteral val = (OWLLiteral) annotation.getValue(); - if (language != null) { - if(val.hasLang(language)){ - label = val.getLiteral(); - } - - } else { - label = val.getLiteral(); - } - } - } - if(label != null){ - documents.add(new TextDocument(label)); - } - } - buildIndex(documents); - } - + + + private static final Logger logger = Logger.getLogger(SemanticIndex.class.getName()); + + private SemanticAnnotator semanticAnnotator; + private SyntacticIndex syntacticIndex; + private Map<Entity, Set<AnnotatedDocument>> index; + private OWLOntology ontology; + + public SemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex, WordSenseDisambiguation wordSenseDisambiguation, + EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { + this.ontology = ontology; + this.syntacticIndex = syntacticIndex; + semanticAnnotator = new SemanticAnnotator(wordSenseDisambiguation, entityCandidateGenerator, linguisticAnnotator); + } + + public SemanticIndex(OWLOntology ontology) { + this.ontology = ontology; + } + /** + * @param semanticAnnotator the semanticAnnotator to set + */ + public void setSemanticAnnotator(SemanticAnnotator semanticAnnotator) { + this.semanticAnnotator = semanticAnnotator; + } + + /** + * Precompute the whole index, i.e. iterate over all entities and compute all annotated documents. + */ + public void buildIndex(Set<TextDocument> documents) { + logger.info("Creating semantic index..."); + index = new HashMap<Entity, Set<AnnotatedDocument>>(); + for (TextDocument document : documents) { + logger.info("Processing document:\n" + document); + AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(document); + for (Entity entity : annotatedDocument.getContainedEntities()) { + Set<AnnotatedDocument> existingAnnotatedDocuments = index.get(entity); + if (existingAnnotatedDocuments == null) { + existingAnnotatedDocuments = new HashSet<AnnotatedDocument>(); + index.put(entity, existingAnnotatedDocuments); + } + existingAnnotatedDocuments.add(annotatedDocument); + } + } + logger.info("...done."); + } + + public void buildIndex(OWLAnnotationProperty annotationProperty, String language) { + Set<OWLEntity> schemaEntities = new HashSet<OWLEntity>(); + schemaEntities.addAll(ontology.getClassesInSignature()); + schemaEntities.addAll(ontology.getObjectPropertiesInSignature()); + schemaEntities.addAll(ontology.getDataPropertiesInSignature()); + Set<TextDocument> documents = new HashSet<TextDocument>(); + for (OWLEntity entity : schemaEntities) { + String label = null; + Set<OWLAnnotation> annotations = entity.getAnnotations(ontology, annotationProperty); + for (OWLAnnotation annotation : annotations) { + if (annotation.getValue() instanceof OWLLiteral) { + OWLLiteral val = (OWLLiteral) annotation.getValue(); + if (language != null) { + if (val.hasLang(language)) { + label = val.getLiteral(); + } + + } + else { + label = val.getLiteral(); + } + } + } + if (label != null) { + documents.add(new TextDocument(label)); + } + } + buildIndex(documents); + } + + /** * Returns the set of annotated documents which reference the given entity using one of its surface forms. * * @param entity entity to retrieve documents * @return documents referencing given entity */ - public Set<AnnotatedDocument> getDocuments(Entity entity){ - if(index == null){ - System.err.println("You have to prebuild the index before you can use this method."); - System.exit(1); - } - - Set<AnnotatedDocument> annotatedDocuments = index.get(entity); - if(annotatedDocuments == null) { - annotatedDocuments = new HashSet<AnnotatedDocument>(); - } - return annotatedDocuments; + public Set<AnnotatedDocument> getDocuments(Entity entity) { + if (index == null) { + System.err.println("You have to prebuild the index before you can use this method."); + System.exit(1); + } + + Set<AnnotatedDocument> annotatedDocuments = index.get(entity); + if (annotatedDocuments == null) { + annotatedDocuments = new HashSet<AnnotatedDocument>(); + } + return annotatedDocuments; } /** @@ -133,8 +131,8 @@ * @param entity entity to return number of referencing documents for * @return number of documents for the given entity in this index */ - public int count(Entity entity){ - return index.get(entity).size(); + public int count(Entity entity) { + return index.get(entity).size(); } /** @@ -142,7 +140,7 @@ * * @return the total number of documents contained in the index */ - public int getSize(){ - return index.size(); + public int getSize() { + return index.size(); } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 07:22:38 UTC (rev 4080) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/simple/SimpleSemanticIndex.java 2013-09-05 07:46:34 UTC (rev 4081) @@ -29,8 +29,9 @@ * @param syntacticIndex index to query for documents containing the labels */ public SimpleSemanticIndex(OWLOntology ontology, SyntacticIndex syntacticIndex) { - SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); - setSemanticAnnotator(new SemanticAnnotator( + super(ontology); + SimpleEntityCandidatesTrie trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology); + setSemanticAnnotator(new SemanticAnnotator( new SimpleWordSenseDisambiguation(ontology), new TrieEntityCandidateGenerator(ontology, trie), new TrieLinguisticAnnotator(trie))); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |