[DL-Learner SVN] SF.net SVN: dl-learner:[4208] trunk/components-core/src/main/java/org/ dllearner/a

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4208
          http://sourceforge.net/p/dl-learner/code/4208
Author:   dfleischhacker
Date:     2013-12-10 15:41:36 +0000 (Tue, 10 Dec 2013)
Log Message:
-----------
Adapt WSD interfaces to scored candidates

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/EntityCandidateGenerator.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -3,13 +3,14 @@
  */
 package org.dllearner.algorithms.isle;
 
-import java.util.HashMap;
-import java.util.Set;
-
 import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.EntityScorePair;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
+import java.util.HashMap;
+import java.util.Set;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -22,8 +23,8 @@
 		this.ontology = ontology;
 	}
 
-	public abstract Set<Entity> getCandidates(Annotation annotation);
+	public abstract Set<EntityScorePair> getCandidates(Annotation annotation);
 	
 
-	public abstract HashMap<Annotation,Set<Entity>> getCandidatesMap(Set<Annotation> annotations);
+	public abstract HashMap<Annotation,Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations);
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/EntityCandidatesTrie.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -17,10 +17,9 @@
 	
 	/**
 	 * Gets set of candidate entities for a list of tokens
-	 * @param s
 	 * @return
 	 */
-	public Set<Entity> getCandidateEntities(List<Token> tokens);
+	public Set<EntityScorePair> getCandidateEntities(List<Token> tokens);
 
 
 	/**
@@ -28,14 +27,12 @@
      * ontology string when the parameter string has been added to the trie after generation by using
      * WordNet or other additional methods.
      *
-	 * @param s the string to search in the trie
 	 * @return string generating the path of the longest match in the trie
 	 */
 	public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens);
 
     /**
      * Gets the longest matching string
-     * @param s
      * @return
      */
     public List<Token> getLongestMatchingText(List<Token> tokens);

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SemanticAnnotator.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -1,13 +1,12 @@
 package org.dllearner.algorithms.isle.index;
 
+import org.dllearner.algorithms.isle.EntityCandidateGenerator;
+import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
+
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.dllearner.algorithms.isle.EntityCandidateGenerator;
-import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
-import org.dllearner.core.owl.Entity;
-
 /**
  * Provides methods to annotate documents.
  *
@@ -23,7 +22,6 @@
     /**
      * Initialize this semantic annotator to use the entities from the provided ontology.
      *
-     * @param ontology the ontology to use entities from
      */
     public SemanticAnnotator(WordSenseDisambiguation wordSenseDisambiguation, 
     		EntityCandidateGenerator entityCandidateGenerator, LinguisticAnnotator linguisticAnnotator) {
@@ -41,9 +39,9 @@
     public AnnotatedDocument processDocument(TextDocument document){
     	Set<Annotation> annotations = linguisticAnnotator.annotate(document);
     	Set<SemanticAnnotation> semanticAnnotations = new HashSet<SemanticAnnotation>();
-    	HashMap<Annotation,Set<Entity>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations);
+    	HashMap<Annotation, Set<EntityScorePair>> candidatesMap = entityCandidateGenerator.getCandidatesMap(annotations);
     	for (Annotation annotation : candidatesMap.keySet()) {
-    		Set<Entity> candidateEntities = candidatesMap.get(annotation);
+    		Set<EntityScorePair> candidateEntities = candidatesMap.get(annotation);
             if (candidateEntities == null || candidateEntities.size() == 0) {
                 continue;
             }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidateGenerator.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -3,16 +3,16 @@
  */
 package org.dllearner.algorithms.isle.index;
 
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Set;
-
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.utilities.owl.OWLAPIConverter;
 import org.semanticweb.owlapi.model.OWLEntity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -36,13 +36,17 @@
 	 * @see org.dllearner.algorithms.isle.EntityCandidateGenerator#getCandidates(org.dllearner.algorithms.isle.index.Annotation)
 	 */
 	@Override
-	public Set<Entity> getCandidates(Annotation annotation) {
-		return allEntities;
-	}
+	public Set<EntityScorePair> getCandidates(Annotation annotation) {
+        HashSet<EntityScorePair> result = new HashSet<>();
+        for (Entity e : allEntities) {
+            result.add(new EntityScorePair(e, 1.0));
+        }
+        return result;
+    }
 
 	@Override
-	public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) {
-		HashMap<Annotation, Set<Entity>> result = new HashMap<Annotation, Set<Entity>>();
+	public HashMap<Annotation, Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations) {
+		HashMap<Annotation, Set<EntityScorePair>> result = new HashMap<Annotation, Set<EntityScorePair>>();
 		for (Annotation annotation: annotations) 
 			result.put(annotation, getCandidates(annotation));
 		

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -1,160 +1,156 @@
-package org.dllearner.algorithms.isle.index;
-
-import net.didion.jwnl.data.POS;
-import org.dllearner.algorithms.isle.WordNet;
-import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
-import org.dllearner.core.owl.Entity;
-import org.semanticweb.owlapi.model.OWLOntology;
-
-import java.util.*;
-import java.util.Map.Entry;
-
-public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie {
-    TokenTree tree;
-	EntityTextRetriever entityTextRetriever;
-
-//    /**
-//     * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the
-//     * actual ontology strings are added and no expansion is done.
-//     *
-//     * @param entityTextRetriever the text retriever to use
-//     * @param ontology the ontology to get strings from
-//     */
-//	public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) {
-//        this(entityTextRetriever, ontology, new DummyNameGenerator());
-//	}
-
-    /**
-     * Initialize the trie with strings from the provided ontology and use the given entity name generator
-     * for generating alternative words.
-     *
-     * @param entityTextRetriever the text retriever to use
-     * @param ontology the ontology to get strings from
-     */
-    public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) {
-        this.entityTextRetriever = entityTextRetriever;
-        buildTrie(ontology);
-    }
-	
-	public void buildTrie(OWLOntology ontology) {
-		this.tree = new TokenTree();
-		Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology);
-		
-		
-		for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) {
-			Entity entity = entry.getKey();
-			Set<List<Token>> tokenSet = entry.getValue();
-			for (List<Token> tokens : tokenSet) {
-                addAlternativeFormsFromWordNet(tokens);
-				addEntry(tokens, entity);
-                addSubsequences(entity, tokens);
-			}
-		}
-	}
-	
-	/**
-	 * Adds the subsequences of a test
-	 * @param entity
-     * @param tokens
-	 */
-    private void addSubsequences(Entity entity, List<Token> tokens) {
-        tree.add(tokens, entity);
-        for (int size = 1; size < tokens.size(); size++) {
-            for (int start = 0; start < tokens.size() - size + 1; start++) {
-                ArrayList<Token> subsequence = new ArrayList<>();
-                for (int i = 0; i < size; i++) {
-                    subsequence.add(tokens.get(start + i));
-                }
-                addEntry(subsequence, entity);
-            }
-        }
-    }
-
-    private void addAlternativeFormsFromWordNet(List<Token> tokens) {
-        for (Token t : tokens) {
-            POS wordnetPos = null;
-            String posTag = t.getPOSTag();
-            if (posTag.startsWith("N")) {//nouns
-                wordnetPos = POS.NOUN;
-            }
-            else if (posTag.startsWith("V")) {//verbs
-                wordnetPos = POS.VERB;
-            }
-            else if (posTag.startsWith("J")) {//adjectives
-                wordnetPos = POS.ADJECTIVE;
-            }
-            else if (posTag.startsWith("R")) {//adverbs
-                wordnetPos = POS.ADVERB;
-            }
-            if (wordnetPos == null) {
-                continue;
-            }
-            //String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos);
-            Set<WordNet.LemmaScorePair> alternativeFormPairs = LinguisticUtil.getInstance()
-                    .getScoredHyponyms(t.getRawForm(), wordnetPos);
-
-            for (WordNet.LemmaScorePair synonym : alternativeFormPairs) {
-                // ignore all multi word synonyms
-                if (synonym.getLemma().contains("_")) {
-                    continue;
-                }
-                //t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym));
-                t.addAlternativeForm(synonym.getLemma(), synonym.getScore());
-            }
-        }
-    }
-
-    @Override
-	public void addEntry(List<Token> s, Entity e) {
-        tree.add(s, e);
-	}
-
-    public void addEntry(List<Token> s, Entity e, List<Token> originalTokens) {
-        tree.add(s, e, originalTokens);
-    }
-
-	@Override
-	public Set<Entity> getCandidateEntities(List<Token> tokens) {
-        Set<Entity> res = tree.getAllEntities(tokens);
-        System.out.println("Unscored: " + res);
-        Set<EntityScorePair> scored = tree.getAllEntitiesScored(tokens);
-        System.out.println("Scored: " + scored);
-
-        return res;
-    }
-
-	@Override
-	public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens) {
-		return tree.getOriginalTokensForLongestMatch(tokens);
-	}
-
-    @Override
-    public List<Token> getLongestMatchingText(List<Token> tokens) {
-        return tree.getLongestMatch(tokens);
-    }
-	
-	public String toString() {
-		return tree.toString();
-	}
-
-    public static void main(String[] args) {
-        String[] tokens = "this is a long and very complex text".split(" ");
-
-        List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
-
-        // generate list of lemmatized wordnet synonyms for each token
-        for (int i = 0; i < tokens.length; i++) {
-            wordnetTokens[i] = new ArrayList<String>();
-            wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i]));
-            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
-                System.out.println("Adding: " + LinguisticUtil.getInstance().getNormalizedForm(w));
-                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " "));
-            }
-        }
-    }
-
-    public void printTrie() {
-		System.out.println(this.toString());
-
-	}
-}
+package org.dllearner.algorithms.isle.index;
+
+import net.didion.jwnl.data.POS;
+import org.dllearner.algorithms.isle.WordNet;
+import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
+import org.dllearner.core.owl.Entity;
+import org.semanticweb.owlapi.model.OWLOntology;
+
+import java.util.*;
+import java.util.Map.Entry;
+
+public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie {
+    TokenTree tree;
+	EntityTextRetriever entityTextRetriever;
+
+//    /**
+//     * Initialize the trie with strings from the provided ontology using a no-op name generator, i.e., only the
+//     * actual ontology strings are added and no expansion is done.
+//     *
+//     * @param entityTextRetriever the text retriever to use
+//     * @param ontology the ontology to get strings from
+//     */
+//	public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) {
+//        this(entityTextRetriever, ontology, new DummyNameGenerator());
+//	}
+
+    /**
+     * Initialize the trie with strings from the provided ontology and use the given entity name generator
+     * for generating alternative words.
+     *
+     * @param entityTextRetriever the text retriever to use
+     * @param ontology the ontology to get strings from
+     */
+    public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) {
+        this.entityTextRetriever = entityTextRetriever;
+        buildTrie(ontology);
+    }
+	
+	public void buildTrie(OWLOntology ontology) {
+		this.tree = new TokenTree();
+		Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology);
+		
+		
+		for (Entry<Entity, Set<List<Token>>> entry : entity2TokenSet.entrySet()) {
+			Entity entity = entry.getKey();
+			Set<List<Token>> tokenSet = entry.getValue();
+			for (List<Token> tokens : tokenSet) {
+                addAlternativeFormsFromWordNet(tokens);
+				addEntry(tokens, entity);
+                addSubsequences(entity, tokens);
+			}
+		}
+	}
+	
+	/**
+	 * Adds the subsequences of a test
+	 * @param entity
+     * @param tokens
+	 */
+    private void addSubsequences(Entity entity, List<Token> tokens) {
+        tree.add(tokens, entity);
+        for (int size = 1; size < tokens.size(); size++) {
+            for (int start = 0; start < tokens.size() - size + 1; start++) {
+                ArrayList<Token> subsequence = new ArrayList<>();
+                for (int i = 0; i < size; i++) {
+                    subsequence.add(tokens.get(start + i));
+                }
+                addEntry(subsequence, entity);
+            }
+        }
+    }
+
+    private void addAlternativeFormsFromWordNet(List<Token> tokens) {
+        for (Token t : tokens) {
+            POS wordnetPos = null;
+            String posTag = t.getPOSTag();
+            if (posTag.startsWith("N")) {//nouns
+                wordnetPos = POS.NOUN;
+            }
+            else if (posTag.startsWith("V")) {//verbs
+                wordnetPos = POS.VERB;
+            }
+            else if (posTag.startsWith("J")) {//adjectives
+                wordnetPos = POS.ADJECTIVE;
+            }
+            else if (posTag.startsWith("R")) {//adverbs
+                wordnetPos = POS.ADVERB;
+            }
+            if (wordnetPos == null) {
+                continue;
+            }
+            //String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos);
+            Set<WordNet.LemmaScorePair> alternativeFormPairs = LinguisticUtil.getInstance()
+                    .getScoredHyponyms(t.getRawForm(), wordnetPos);
+
+            for (WordNet.LemmaScorePair synonym : alternativeFormPairs) {
+                // ignore all multi word synonyms
+                if (synonym.getLemma().contains("_")) {
+                    continue;
+                }
+                //t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym));
+                t.addAlternativeForm(synonym.getLemma(), synonym.getScore());
+            }
+        }
+    }
+
+    @Override
+	public void addEntry(List<Token> s, Entity e) {
+        tree.add(s, e);
+	}
+
+    public void addEntry(List<Token> s, Entity e, List<Token> originalTokens) {
+        tree.add(s, e, originalTokens);
+    }
+
+	@Override
+	public Set<EntityScorePair> getCandidateEntities(List<Token> tokens) {
+        Set<EntityScorePair> res = tree.getAllEntitiesScored(tokens);
+        return res;
+    }
+
+	@Override
+	public List<Token> getGeneratingStringForLongestMatch(List<Token> tokens) {
+		return tree.getOriginalTokensForLongestMatch(tokens);
+	}
+
+    @Override
+    public List<Token> getLongestMatchingText(List<Token> tokens) {
+        return tree.getLongestMatch(tokens);
+    }
+	
+	public String toString() {
+		return tree.toString();
+	}
+
+    public static void main(String[] args) {
+        String[] tokens = "this is a long and very complex text".split(" ");
+
+        List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
+
+        // generate list of lemmatized wordnet synonyms for each token
+        for (int i = 0; i < tokens.length; i++) {
+            wordnetTokens[i] = new ArrayList<String>();
+            wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i]));
+            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
+                System.out.println("Adding: " + LinguisticUtil.getInstance().getNormalizedForm(w));
+                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " "));
+            }
+        }
+    }
+
+    public void printTrie() {
+		System.out.println(this.toString());
+
+	}
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -3,7 +3,6 @@
 import com.google.common.collect.Lists;
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
 import org.dllearner.algorithms.isle.StopWordFilter;
-import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
 import java.util.ArrayList;
@@ -27,8 +26,8 @@
 		this.candidatesTrie = candidatesTrie;
 	}
 	
-	public Set<Entity> getCandidates(Annotation annotation) {
-        Set<Entity> candidateEntities = candidatesTrie.getCandidateEntities(annotation.getTokens());
+	public Set<EntityScorePair> getCandidates(Annotation annotation) {
+        Set<EntityScorePair> candidateEntities = candidatesTrie.getCandidateEntities(annotation.getTokens());
         System.out.println(annotation + " --> " + candidateEntities);
         return candidateEntities;
 	}
@@ -39,7 +38,7 @@
      * @param window : maximum distance between the annotations
      * @return
      */
-    public void postProcess(HashMap<Annotation,Set<Entity>> candidatesMap, int window, StopWordFilter stopWordFilter) {
+    public void postProcess(HashMap<Annotation,Set<EntityScorePair>> candidatesMap, int window, StopWordFilter stopWordFilter) {
     	Set<Annotation> annotations = candidatesMap.keySet();
     	List<Annotation> sortedAnnotations = new ArrayList<Annotation>(annotations);
     	//TODO refactoring
@@ -119,8 +118,8 @@
 	}
 
 	@Override
-	public HashMap<Annotation, Set<Entity>> getCandidatesMap(Set<Annotation> annotations) {
-		HashMap<Annotation, Set<Entity>> candidatesMap = new HashMap<Annotation, Set<Entity>>();
+	public HashMap<Annotation, Set<EntityScorePair>> getCandidatesMap(Set<Annotation> annotations) {
+		HashMap<Annotation, Set<EntityScorePair>> candidatesMap = new HashMap<>();
 		for (Annotation annotation: annotations) 
 			candidatesMap.put(annotation, getCandidates(annotation));
 		

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/RandomWordSenseDisambiguation.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -18,14 +18,15 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.Random;
-import java.util.Set;
-
 import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.EntityScorePair;
 import org.dllearner.algorithms.isle.index.SemanticAnnotation;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
+import java.util.Random;
+import java.util.Set;
+
 /**
  * Disambiguation by randomly selecting one of the candidates (baseline method).
  * 
@@ -43,17 +44,17 @@
 
 	@Override
 	public SemanticAnnotation disambiguate(Annotation annotation,
-			Set<Entity> candidateEntities) {
+			Set<EntityScorePair> candidateEntities) {
 		int pos = random.nextInt(candidateEntities.size());
 		int i = 0;
-		for(Entity e : candidateEntities)
-		{
-		    if (i == pos) {
-		    	return new SemanticAnnotation(annotation, e);
-		    }
-		    i++;
-		}
-		return null;
+		for(EntityScorePair esp : candidateEntities) {
+            Entity e = esp.getEntity();
+            if (i == pos) {
+                return new SemanticAnnotation(annotation, e);
+            }
+            i++;
+        }
+        return null;
 	}
 
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/SimpleWordSenseDisambiguation.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -3,26 +3,20 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.HashSet;
-import java.util.Set;
-
 import org.apache.log4j.Logger;
 import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.EntityScorePair;
 import org.dllearner.algorithms.isle.index.SemanticAnnotation;
 import org.dllearner.core.owl.Entity;
 import org.dllearner.utilities.owl.OWLAPIConverter;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom;
-import org.semanticweb.owlapi.model.OWLAnnotationProperty;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLLiteral;
-import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.*;
 import org.semanticweb.owlapi.util.IRIShortFormProvider;
 import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider;
-
 import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
 
+import java.util.HashSet;
+import java.util.Set;
+
 /**
  * @author Lorenz Buehmann
  *
@@ -47,26 +41,27 @@
 	 * @see org.dllearner.algorithms.isle.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set)
 	 */
 	@Override
-	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
+	public SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities) {
 		logger.debug("Linguistic annotations:\n" + annotation);
 		logger.debug("Candidate entities:" + candidateEntities);
 		String token = annotation.getString().trim();
 		//check if annotation token matches label of entity or the part behind #(resp. /)
-		for (Entity entity : candidateEntities) {
-			Set<String> labels = getLabels(entity);
-			for (String label : labels) {
-				if(label.equals(token)){
-					logger.debug("Disambiguated entity: " + entity);
-					return new SemanticAnnotation(annotation, entity);
-				}
-			}
-			String shortForm = sfp.getShortForm(IRI.create(entity.getURI()));
-			if(annotation.equals(shortForm)){
-				logger.debug("Disambiguated entity: " + entity);
-				return new SemanticAnnotation(annotation, entity);
-			}
-		}
-		return null;
+		for (EntityScorePair entityScorePair : candidateEntities) {
+            Entity entity = entityScorePair.getEntity();
+            Set<String> labels = getLabels(entity);
+            for (String label : labels) {
+                if (label.equals(token)) {
+                    logger.debug("Disambiguated entity: " + entity);
+                    return new SemanticAnnotation(annotation, entity);
+                }
+            }
+            String shortForm = sfp.getShortForm(IRI.create(entity.getURI()));
+            if (annotation.equals(shortForm)) {
+                logger.debug("Disambiguated entity: " + entity);
+                return new SemanticAnnotation(annotation, entity);
+            }
+        }
+        return null;
 	}
 	
 	private Set<String> getLabels(Entity entity){

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/StructureBasedWordSenseDisambiguation.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -3,21 +3,21 @@
  */
 package org.dllearner.algorithms.isle.wsd;
 
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
+import com.google.common.base.Joiner;
+import com.google.common.collect.Sets;
 import org.dllearner.algorithms.isle.StructuralEntityContext;
 import org.dllearner.algorithms.isle.VSMCosineDocumentSimilarity;
 import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.EntityScorePair;
 import org.dllearner.algorithms.isle.index.SemanticAnnotation;
 import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
-import com.google.common.base.Joiner;
-import com.google.common.collect.Sets;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 /**
  * @author Lorenz Buehmann
@@ -39,7 +39,7 @@
 	 * @see org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation#disambiguate(org.dllearner.algorithms.isle.index.Annotation, java.util.Set)
 	 */
 	@Override
-	public SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities) {
+	public SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities) {
 		if(!candidateEntities.isEmpty()){
 			//get the context of the annotated token
 			List<String> tokenContext = contextExtractor.extractContext(annotation);
@@ -47,19 +47,20 @@
 			//compare this context with the context of each entity candidate
 			double maxScore = Double.NEGATIVE_INFINITY;
 			Entity bestEntity = null;
-			for (Entity entity : candidateEntities) {
-				//get the context of the entity by analyzing the structure of the ontology
-				Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
-				//compute the VSM Cosine Similarity
-				double score = computeScore(tokenContext, entityContext);
-				//set best entity
-				if(score > maxScore){
-					maxScore = score;
-					bestEntity = entity;
-				}
-			}
-			
-			return new SemanticAnnotation(annotation, bestEntity);
+			for (EntityScorePair entityScorePair : candidateEntities) {
+                Entity entity = entityScorePair.getEntity();
+                //get the context of the entity by analyzing the structure of the ontology
+                Set<String> entityContext = StructuralEntityContext.getContextInNaturalLanguage(ontology, entity);
+                //compute the VSM Cosine Similarity
+                double score = computeScore(tokenContext, entityContext);
+                //set best entity
+                if (score > maxScore) {
+                    maxScore = score;
+                    bestEntity = entity;
+                }
+            }
+
+            return new SemanticAnnotation(annotation, bestEntity);
 		}
 		return null;
 	}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java	2013-12-10 15:25:13 UTC (rev 4207)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/wsd/WordSenseDisambiguation.java	2013-12-10 15:41:36 UTC (rev 4208)
@@ -1,12 +1,12 @@
 package org.dllearner.algorithms.isle.wsd;
 
-import java.util.Set;
-
 import org.dllearner.algorithms.isle.index.Annotation;
+import org.dllearner.algorithms.isle.index.EntityScorePair;
 import org.dllearner.algorithms.isle.index.SemanticAnnotation;
-import org.dllearner.core.owl.Entity;
 import org.semanticweb.owlapi.model.OWLOntology;
 
+import java.util.Set;
+
 /**
  * Abstract class for the word sense disambiguation component.
  *
@@ -27,9 +27,10 @@
     /**
      * Chooses the correct entity for the given annotation from a set of candidate entities.
      *
+     *
      * @param annotation        the annotation to find entity for
      * @param candidateEntities the set of candidate entities
      * @return semantic annotation containing the given annotation and the chosen entity
      */
-    public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<Entity> candidateEntities);
+    public abstract SemanticAnnotation disambiguate(Annotation annotation, Set<EntityScorePair> candidateEntities);
 }

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[DL-Learner SVN] SF.net SVN: dl-learner:[4208] trunk/components-core/src/main/java/org/ dllearner/a

[DL-Learner SVN] SF.net SVN: dl-learner:[4208] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle