From: <dfl...@us...> - 2013-12-10 15:41:39
|
Revision: 4208 http://sourceforge.net/p/dl-learner/code/4208 Author: dfleischhacker Date: 2013-12-10 15:41:36 +0000 (Tue, 10 Dec 2013) Log Message: ----------- Adapt WSD interfaces to scored candidates Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -3,13 +3,14 @@ */ package org.dllearner.algorithms.isle; -import java.util.HashMap; -import java.util.Set; - import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; +import java.util.HashMap; +import java.util.Set; + /** * @author Lorenz Buehmann * @@ -22,8 +23,8 @@ this.ontology = ontology; } - public abstract Set<Entity> getCandidates(Annotation annotation); + public abstract Set<EntityScorePair> getCandidates(Annotation annotation); - public abstract HashMap<Annotation,Set<Entity>> getCandidatesMap(Set<Annotation> annotations); + public abstract HashMap<Annotation,Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations); } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -17,10 +17,9 @@ /** * Gets set of candidate entities for a list of tokens - * @param s * @return */ - public Set<Entity> getCandidateEntities(List<Token> tokens); + public Set<EntityScorePair> getCandidateEntities(List<Token> tokens); /** @@ -28,14 +27,12 @@ * ontology string when the parameter string has been added to the trie after generation by using * WordNet or other additional methods. * - * @param s the string to search in the trie * @return string generating the path of the longest match in the trie */ public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens); /** * Gets the longest matching string - * @param s * @return */ public List<Token> getLongestMatchingText(List<Token> tokens); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -1,13 +1,12 @@ package org.dllearner.algorithms.isle.index; +import org.dllearner.algorithms.isle.EntityCandidateGenerator; +import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation; + import java.util.HashMap; import java.util.HashSet; import java.util.Set; -import org.dllearner.algorithms.isle.EntityCandidateGenerator; -import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation; -import org.dllearner.core.owl.Entity; - /** * Provides methods to annotate documents. * @@ -23,7 +22,6 @@ /** * Initialize this semantic annotator to use the entities from the provided ontology. * - * @param ontology the ontology to use entities from */ public SemanticAnnotator(WordSenseDisambiguation wordSenseDisambiguation, EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) { @@ -41,9 +39,9 @@ public AnnotatedDocument processDocument(TextDocument document){ Set<Annotation> annotations = linguisticAnnotator.annotate(document); Set<SemanticAnnotation> semanticAnnotations = new HashSet<SemanticAnnotation>(); - HashMap<Annotation,Set<Entity>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations); + HashMap<Annotation, Set<EntityScorePair>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations); for (Annotation annotation : candidatesMap.keySet()) { - Set<Entity> candidateEntities = candidatesMap.get(annotation); + Set<EntityScorePair> candidateEntities = candidatesMap.get(annotation); if (candidateEntities == null || candidateEntities.size() == 0) { continue; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -3,16 +3,16 @@ */ package org.dllearner.algorithms.isle.index; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; - import org.dllearner.algorithms.isle.EntityCandidateGenerator; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.OWLAPIConverter; import org.semanticweb.owlapi.model.OWLEntity; import org.semanticweb.owlapi.model.OWLOntology; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + /** * @author Lorenz Buehmann * @@ -36,13 +36,17 @@ * @see org.dllearner.algorithms.isle.EntityCandidateGenerator#getCandidates(org.dllearner.algorithms.isle.index.Annotation) */ @Override - public Set<Entity> getCandidates(Annotation annotation) { - return allEntities; - } + public Set<EntityScorePair> getCandidates(Annotation annotation) { + HashSet<EntityScorePair> result = new HashSet<>(); + for (Entity e : allEntities) { + result.add(new EntityScorePair(e, 1.0)); + } + return result; + } @Override - public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) { - HashMap<Annotation, Set<Entity>> result = new HashMap<Annotation, Set<Entity>>(); + public HashMap<Annotation, Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations) { + HashMap<Annotation, Set<EntityScorePair>> result = new HashMap<Annotation, Set<EntityScorePair>>(); for (Annotation annotation: annotations) result.put(annotation, getCandidates(annotation)); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -1,160 +1,156 @@ -package org.dllearner.algorithms.isle.index; - -import net.didion.jwnl.data.POS; -import org.dllearner.algorithms.isle.WordNet; -import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; -import org.dllearner.core.owl.Entity; -import org.semanticweb.owlapi.model.OWLOntology; - -import java.util.*; -import java.util.Map.Entry; - -public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { - TokenTree tree; - EntityTextRetriever entityTextRetriever; - -// /** -// * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the -// * actual ontology strings are added and no expansion is done. -// * -// * @param entityTextRetriever the text retriever to use -// * @param ontology the ontology to get strings from -// */ -// public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { -// this(entityTextRetriever, ontology, new DummyNameGenerator()); -// } - - /** - * Initialize the trie with strings from the provided ontology and use the given entity name generator - * for generating alternative words. - * - * @param entityTextRetriever the text retriever to use - * @param ontology the ontology to get strings from - */ - public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { - this.entityTextRetriever = entityTextRetriever; - buildTrie(ontology); - } - - public void buildTrie(OWLOntology ontology) { - this.tree = new TokenTree(); - Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology); - - - for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) { - Entity entity = entry.getKey(); - Set<List<Token>> tokenSet = entry.getValue(); - for (List<Token> tokens : tokenSet) { - addAlternativeFormsFromWordNet(tokens); - addEntry(tokens, entity); - addSubsequences(entity, tokens); - } - } - } - - /** - * Adds the subsequences of a test - * @param entity - * @param tokens - */ - private void addSubsequences(Entity entity, List<Token> tokens) { - tree.add(tokens, entity); - for (int size = 1; size < tokens.size(); size++) { - for (int start = 0; start < tokens.size() - size + 1; start++) { - ArrayList<Token> subsequence = new ArrayList<>(); - for (int i = 0; i < size; i++) { - subsequence.add(tokens.get(start + i)); - } - addEntry(subsequence, entity); - } - } - } - - private void addAlternativeFormsFromWordNet(List<Token> tokens) { - for (Token t : tokens) { - POS wordnetPos = null; - String posTag = t.getPOSTag(); - if (posTag.startsWith("N")) {//nouns - wordnetPos = POS.NOUN; - } - else if (posTag.startsWith("V")) {//verbs - wordnetPos = POS.VERB; - } - else if (posTag.startsWith("J")) {//adjectives - wordnetPos = POS.ADJECTIVE; - } - else if (posTag.startsWith("R")) {//adverbs - wordnetPos = POS.ADVERB; - } - if (wordnetPos == null) { - continue; - } - //String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos); - Set<WordNet.LemmaScorePair> alternativeFormPairs = LinguisticUtil.getInstance() - .getScoredHyponyms(t.getRawForm(), wordnetPos); - - for (WordNet.LemmaScorePair synonym : alternativeFormPairs) { - // ignore all multi word synonyms - if (synonym.getLemma().contains("_")) { - continue; - } - //t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym)); - t.addAlternativeForm(synonym.getLemma(), synonym.getScore()); - } - } - } - - @Override - public void addEntry(List<Token> s, Entity e) { - tree.add(s, e); - } - - public void addEntry(List<Token> s, Entity e, List<Token> originalTokens) { - tree.add(s, e, originalTokens); - } - - @Override - public Set<Entity> getCandidateEntities(List<Token> tokens) { - Set<Entity> res = tree.getAllEntities(tokens); - System.out.println("Unscored: " + res); - Set<EntityScorePair> scored = tree.getAllEntitiesScored(tokens); - System.out.println("Scored: " + scored); - - return res; - } - - @Override - public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens) { - return tree.getOriginalTokensForLongestMatch(tokens); - } - - @Override - public List<Token> getLongestMatchingText(List<Token> tokens) { - return tree.getLongestMatch(tokens); - } - - public String toString() { - return tree.toString(); - } - - public static void main(String[] args) { - String[] tokens = "this is a long and very complex text".split(" "); - - List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length]; - - // generate list of lemmatized wordnet synonyms for each token - for (int i = 0; i < tokens.length; i++) { - wordnetTokens[i] = new ArrayList<String>(); - wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i])); - for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) { - System.out.println("Adding: " + LinguisticUtil.getInstance().getNormalizedForm(w)); - wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " ")); - } - } - } - - public void printTrie() { - System.out.println(this.toString()); - - } -} +package org.dllearner.algorithms.isle.index; + +import net.didion.jwnl.data.POS; +import org.dllearner.algorithms.isle.WordNet; +import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever; +import org.dllearner.core.owl.Entity; +import org.semanticweb.owlapi.model.OWLOntology; + +import java.util.*; +import java.util.Map.Entry; + +public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie { + TokenTree tree; + EntityTextRetriever entityTextRetriever; + +// /** +// * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the +// * actual ontology strings are added and no expansion is done. +// * +// * @param entityTextRetriever the text retriever to use +// * @param ontology the ontology to get strings from +// */ +// public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { +// this(entityTextRetriever, ontology, new DummyNameGenerator()); +// } + + /** + * Initialize the trie with strings from the provided ontology and use the given entity name generator + * for generating alternative words. + * + * @param entityTextRetriever the text retriever to use + * @param ontology the ontology to get strings from + */ + public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) { + this.entityTextRetriever = entityTextRetriever; + buildTrie(ontology); + } + + public void buildTrie(OWLOntology ontology) { + this.tree = new TokenTree(); + Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology); + + + for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) { + Entity entity = entry.getKey(); + Set<List<Token>> tokenSet = entry.getValue(); + for (List<Token> tokens : tokenSet) { + addAlternativeFormsFromWordNet(tokens); + addEntry(tokens, entity); + addSubsequences(entity, tokens); + } + } + } + + /** + * Adds the subsequences of a test + * @param entity + * @param tokens + */ + private void addSubsequences(Entity entity, List<Token> tokens) { + tree.add(tokens, entity); + for (int size = 1; size < tokens.size(); size++) { + for (int start = 0; start < tokens.size() - size + 1; start++) { + ArrayList<Token> subsequence = new ArrayList<>(); + for (int i = 0; i < size; i++) { + subsequence.add(tokens.get(start + i)); + } + addEntry(subsequence, entity); + } + } + } + + private void addAlternativeFormsFromWordNet(List<Token> tokens) { + for (Token t : tokens) { + POS wordnetPos = null; + String posTag = t.getPOSTag(); + if (posTag.startsWith("N")) {//nouns + wordnetPos = POS.NOUN; + } + else if (posTag.startsWith("V")) {//verbs + wordnetPos = POS.VERB; + } + else if (posTag.startsWith("J")) {//adjectives + wordnetPos = POS.ADJECTIVE; + } + else if (posTag.startsWith("R")) {//adverbs + wordnetPos = POS.ADVERB; + } + if (wordnetPos == null) { + continue; + } + //String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos); + Set<WordNet.LemmaScorePair> alternativeFormPairs = LinguisticUtil.getInstance() + .getScoredHyponyms(t.getRawForm(), wordnetPos); + + for (WordNet.LemmaScorePair synonym : alternativeFormPairs) { + // ignore all multi word synonyms + if (synonym.getLemma().contains("_")) { + continue; + } + //t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym)); + t.addAlternativeForm(synonym.getLemma(), synonym.getScore()); + } + } + } + + @Override + public void addEntry(List<Token> s, Entity e) { + tree.add(s, e); + } + + public void addEntry(List<Token> s, Entity e, List<Token> originalTokens) { + tree.add(s, e, originalTokens); + } + + @Override + public Set<EntityScorePair> getCandidateEntities(List<Token> tokens) { + Set<EntityScorePair> res = tree.getAllEntitiesScored(tokens); + return res; + } + + @Override + public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens) { + return tree.getOriginalTokensForLongestMatch(tokens); + } + + @Override + public List<Token> getLongestMatchingText(List<Token> tokens) { + return tree.getLongestMatch(tokens); + } + + public String toString() { + return tree.toString(); + } + + public static void main(String[] args) { + String[] tokens = "this is a long and very complex text".split(" "); + + List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length]; + + // generate list of lemmatized wordnet synonyms for each token + for (int i = 0; i < tokens.length; i++) { + wordnetTokens[i] = new ArrayList<String>(); + wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i])); + for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) { + System.out.println("Adding: " + LinguisticUtil.getInstance().getNormalizedForm(w)); + wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " ")); + } + } + } + + public void printTrie() { + System.out.println(this.toString()); + + } +} Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -3,7 +3,6 @@ import com.google.common.collect.Lists; import org.dllearner.algorithms.isle.EntityCandidateGenerator; import org.dllearner.algorithms.isle.StopWordFilter; -import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; import java.util.ArrayList; @@ -27,8 +26,8 @@ this.candidatesTrie = candidatesTrie; } - public Set<Entity> getCandidates(Annotation annotation) { - Set<Entity> candidateEntities = candidatesTrie.getCandidateEntities(annotation.getTokens()); + public Set<EntityScorePair> getCandidates(Annotation annotation) { + Set<EntityScorePair> candidateEntities = candidatesTrie.getCandidateEntities(annotation.getTokens()); System.out.println(annotation + " --> " + candidateEntities); return candidateEntities; } @@ -39,7 +38,7 @@ * @param window : maximum distance between the annotations * @return */ - public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) { + public void postProcess(HashMap<Annotation,Set<EntityScorePair>> candidatesMap, int window, StopWordFilter stopWordFilter) { Set<Annotation> annotations = candidatesMap.keySet(); List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations); //TODO refactoring @@ -119,8 +118,8 @@ } @Override - public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) { - HashMap<Annotation, Set<Entity>> candidatesMap = new HashMap<Annotation, Set<Entity>>(); + public HashMap<Annotation, Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations) { + HashMap<Annotation, Set<EntityScorePair>> candidatesMap = new HashMap<>(); for (Annotation annotation: annotations) candidatesMap.put(annotation, getCandidates(annotation)); Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -18,14 +18,15 @@ */ package org.dllearner.algorithms.isle.wsd; -import java.util.Random; -import java.util.Set; - import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.algorithms.isle.index.SemanticAnnotation; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; +import java.util.Random; +import java.util.Set; + /** * Disambiguation by randomly selecting one of the candidates (baseline method). * @@ -43,17 +44,17 @@ @Override public SemanticAnnotation disambiguate(Annotation annotation, - Set<Entity> candidateEntities) { + Set<EntityScorePair> candidateEntities) { int pos = random.nextInt(candidateEntities.size()); int i = 0; - for(Entity e : candidateEntities) - { - if (i == pos) { - return new SemanticAnnotation(annotation, e); - } - i++; - } - return null; + for(EntityScorePair esp : candidateEntities) { + Entity e = esp.getEntity(); + if (i == pos) { + return new SemanticAnnotation(annotation, e); + } + i++; + } + return null; } } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -3,26 +3,20 @@ */ package org.dllearner.algorithms.isle.wsd; -import java.util.HashSet; -import java.util.Set; - import org.apache.log4j.Logger; import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.algorithms.isle.index.SemanticAnnotation; import org.dllearner.core.owl.Entity; import org.dllearner.utilities.owl.OWLAPIConverter; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLEntity; -import org.semanticweb.owlapi.model.OWLLiteral; -import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.*; import org.semanticweb.owlapi.util.IRIShortFormProvider; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; - import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl; +import java.util.HashSet; +import java.util.Set; + /** * @author Lorenz Buehmann * @@ -47,26 +41,27 @@ * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) */ @Override - public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { + public SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities) { logger.debug("Linguistic annotations:\n" + annotation); logger.debug("Candidate entities:" + candidateEntities); String token = annotation.getString().trim(); //check if annotation token matches label of entity or the part behind #(resp. /) - for (Entity entity : candidateEntities) { - Set<String> labels = getLabels(entity); - for (String label : labels) { - if(label.equals(token)){ - logger.debug("Disambiguated entity: " + entity); - return new SemanticAnnotation(annotation, entity); - } - } - String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); - if(annotation.equals(shortForm)){ - logger.debug("Disambiguated entity: " + entity); - return new SemanticAnnotation(annotation, entity); - } - } - return null; + for (EntityScorePair entityScorePair : candidateEntities) { + Entity entity = entityScorePair.getEntity(); + Set<String> labels = getLabels(entity); + for (String label : labels) { + if (label.equals(token)) { + logger.debug("Disambiguated entity: " + entity); + return new SemanticAnnotation(annotation, entity); + } + } + String shortForm = sfp.getShortForm(IRI.create(entity.getURI())); + if (annotation.equals(shortForm)) { + logger.debug("Disambiguated entity: " + entity); + return new SemanticAnnotation(annotation, entity); + } + } + return null; } private Set<String> getLabels(Entity entity){ Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -3,21 +3,21 @@ */ package org.dllearner.algorithms.isle.wsd; -import java.io.IOException; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - +import com.google.common.base.Joiner; +import com.google.common.collect.Sets; import org.dllearner.algorithms.isle.StructuralEntityContext; import org.dllearner.algorithms.isle.VSMCosineDocumentSimilarity; import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.algorithms.isle.index.SemanticAnnotation; import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; -import com.google.common.base.Joiner; -import com.google.common.collect.Sets; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** * @author Lorenz Buehmann @@ -39,7 +39,7 @@ * @see org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set) */ @Override - public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) { + public SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities) { if(!candidateEntities.isEmpty()){ //get the context of the annotated token List<String> tokenContext = contextExtractor.extractContext(annotation); @@ -47,19 +47,20 @@ //compare this context with the context of each entity candidate double maxScore = Double.NEGATIVE_INFINITY; Entity bestEntity = null; - for (Entity entity : candidateEntities) { - //get the context of the entity by analyzing the structure of the ontology - Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity); - //compute the VSM Cosine Similarity - double score = computeScore(tokenContext, entityContext); - //set best entity - if(score > maxScore){ - maxScore = score; - bestEntity = entity; - } - } - - return new SemanticAnnotation(annotation, bestEntity); + for (EntityScorePair entityScorePair : candidateEntities) { + Entity entity = entityScorePair.getEntity(); + //get the context of the entity by analyzing the structure of the ontology + Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity); + //compute the VSM Cosine Similarity + double score = computeScore(tokenContext, entityContext); + //set best entity + if (score > maxScore) { + maxScore = score; + bestEntity = entity; + } + } + + return new SemanticAnnotation(annotation, bestEntity); } return null; } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java 2013-12-10 15:25:13 UTC (rev 4207) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java 2013-12-10 15:41:36 UTC (rev 4208) @@ -1,12 +1,12 @@ package org.dllearner.algorithms.isle.wsd; -import java.util.Set; - import org.dllearner.algorithms.isle.index.Annotation; +import org.dllearner.algorithms.isle.index.EntityScorePair; import org.dllearner.algorithms.isle.index.SemanticAnnotation; -import org.dllearner.core.owl.Entity; import org.semanticweb.owlapi.model.OWLOntology; +import java.util.Set; + /** * Abstract class for the word sense disambiguation component. * @@ -27,9 +27,10 @@ /** * Chooses the correct entity for the given annotation from a set of candidate entities. * + * * @param annotation the annotation to find entity for * @param candidateEntities the set of candidate entities * @return semantic annotation containing the given annotation and the chosen entity */ - public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities); + public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |