From: <and...@us...> - 2013-09-04 14:40:03
|
Revision: 4064 http://sourceforge.net/p/dl-learner/code/4064 Author: andremelo Date: 2013-09-04 14:39:59 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Adding EntityCandidatesTries and the implementations of EntityCandidateGenerator and LinguisticAnnotator based on it Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,41 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.Map.Entry; +import java.util.Set; + +import org.dllearner.core.owl.Entity; + +public interface EntityCandidatesTrie { + + /** + * Adds an entry to the trie. If string already existent, adds to entity to its set of candidates + * @param s + * @param e + */ + public void addEntry(String s, Entity e); + + + /** + * Gets set of candidate entities for an exact given String + * @param s + * @return + */ + public Set<Entity> getCandidateEntities(String s); + + + /** + * Gets longest matching string and its candidate entities + * @param s + * @return + */ + public Entry<String,Set<Entity>> getLongestMatchWithCandidates(String s); + + /** + * Gets the longest matching string + * @param s + * @return + */ + public String getLongestMatch(String s); + + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,27 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.Set; + +import org.dllearner.algorithms.isle.EntityCandidateGenerator; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +/** + * Generates candidates using a entity candidates prefix trie + * @author Andre Melo + * + */ +public class TrieEntityCandidateGenerator extends EntityCandidateGenerator{ + + EntityCandidatesTrie candidatesTrie; + + public TrieEntityCandidateGenerator(OWLOntology ontology, EntityCandidatesTrie candidatesTrie) { + super(ontology); + this.candidatesTrie = candidatesTrie; + } + + public Set<Entity> getCandidates(Annotation annotation) { + return candidatesTrie.getCandidateEntities(annotation.getToken()); + } + +} Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java 2013-09-04 14:39:59 UTC (rev 4064) @@ -0,0 +1,41 @@ +package org.dllearner.algorithms.isle.index; + +import java.util.HashSet; +import java.util.Set; + +/** + * Annotates a document using a prefix trie + * @author Andre Melo + * + */ +public class TrieLinguisticAnnotator implements LinguisticAnnotator { + + EntityCandidatesTrie candidatesTrie; + + public TrieLinguisticAnnotator(EntityCandidatesTrie candidatesTrie) { + this.candidatesTrie = candidatesTrie; + } + + /** + * Generates annotation based on trie's longest matching strings + * @param document + * @param candidatesTrie + * @return + */ + @Override + public Set<Annotation> annotate(Document document) { + String content = document.getRawContent(); + Set<Annotation> annotations = new HashSet<Annotation>(); + for (int i=0; i<content.length(); i++) { + String unparsed = content.substring(i); + String match = candidatesTrie.getLongestMatch(unparsed); + if (match!=null && !match.isEmpty()) { + Annotation annotation = new Annotation(document, i, match.length()); + annotations.add(annotation); + i += match.length()-1; + } + } + return annotations; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |