[DL-Learner SVN] SF.net SVN: dl-learner:[4195] trunk/components-core/src

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4195
          http://sourceforge.net/p/dl-learner/code/4195
Author:   dfleischhacker
Date:     2013-12-09 14:40:04 +0000 (Mon, 09 Dec 2013)
Log Message:
-----------
WordNet alternative forms

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexGenerator.java
    trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -97,6 +97,20 @@
     }
 
     /**
+     * Returns an array of all synonyms for the given word. Only synonyms for the POS in {@link #RELEVANT_POS} are
+     * returned.
+     *
+     * @param word the word to retrieve synonyms for
+     * @return synonyms for the given word
+     */
+    public String[] getSynonymsForWord(String word, POS pos) {
+        ArrayList<String> synonyms = new ArrayList<String>();
+
+        synonyms.addAll(wn.getAllSynonyms(pos, word));
+        return synonyms.toArray(new String[synonyms.size()]);
+    }
+
+    /**
      * Returns an array of the lemmas of the top {@code n} synonyms for the given word. Only synonyms for the POS in
      * {@link #RELEVANT_POS} are returned.
      *

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -1,9 +1,8 @@
 package org.dllearner.algorithms.isle.index;
 
-import org.apache.commons.lang.StringUtils;
+import net.didion.jwnl.data.POS;
 import org.dllearner.algorithms.isle.textretrieval.EntityTextRetriever;
 import org.dllearner.core.owl.Entity;
-import org.dllearner.utilities.datastructures.PrefixTrie;
 import org.semanticweb.owlapi.model.OWLOntology;
 
 import java.util.*;
@@ -11,7 +10,6 @@
 
 public class SimpleEntityCandidatesTrie implements EntityCandidatesTrie {
     TokenTree tree;
-	PrefixTrie<FullTokenEntitySetPair> trie;
 	EntityTextRetriever entityTextRetriever;
 
 //    /**
@@ -31,15 +29,13 @@
      *
      * @param entityTextRetriever the text retriever to use
      * @param ontology the ontology to get strings from
-     * @param nameGenerator the name generator to use for generating alternative words
      */
-    public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology,
-                                      NameGenerator nameGenerator) {
+    public SimpleEntityCandidatesTrie(EntityTextRetriever entityTextRetriever, OWLOntology ontology) {
         this.entityTextRetriever = entityTextRetriever;
-        buildTrie(ontology, nameGenerator);
+        buildTrie(ontology);
     }
 	
-	public void buildTrie(OWLOntology ontology, NameGenerator nameGenerator) {
+	public void buildTrie(OWLOntology ontology) {
 		this.tree = new TokenTree();
 		Map<Entity, Set<List<Token>>> entity2TokenSet = entityTextRetriever.getRelevantText(ontology);
 		
@@ -48,12 +44,9 @@
 			Entity entity = entry.getKey();
 			Set<List<Token>> tokenSet = entry.getValue();
 			for (List<Token> tokens : tokenSet) {
+                addAlternativeFormsFromWordNet(tokens);
 				addEntry(tokens, entity);
                 addSubsequences(entity, tokens);
-//                addSubsequencesWordNet(entity, text);
-//                for (String alternativeText : nameGenerator.getAlternativeText(text)) {
-//                    addEntry(alternativeText.toLowerCase(), entity, text);
-//                }
 			}
 		}
 	}
@@ -76,65 +69,33 @@
         }
     }
 
-//    private void addSubsequencesWordNet(Entity entity, String text) {
-//        if (text.contains(" ")) {
-//            String[] tokens = text.split(" ");
-//
-//            List<String>[] wordnetTokens = (ArrayList<String>[]) new ArrayList[tokens.length];
-//
-//            // generate list of lemmatized wordnet synonyms for each token
-//            for (int i = 0; i < tokens.length; i++) {
-//                wordnetTokens[i] = new ArrayList<String>();
-//                wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(tokens[i].toLowerCase()));
-//                for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(tokens[i], 5)) {
-//                    wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).toLowerCase());
-//                }
-//            }
-//
-//            // generate subsequences starting at the given start index of the given size
-//            Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
-//
-//            for (String[] s : allPossibleSubsequences) {
-//                addEntry(s[0], entity, s[1]);
-//            }
-//        }
-//    }
+    private void addAlternativeFormsFromWordNet(List<Token> tokens) {
+        for (Token t : tokens) {
+            POS wordnetPos = null;
+            String posTag = t.getPOSTag();
+            if (posTag.startsWith("N")) {//nouns
+                wordnetPos = POS.NOUN;
+            }
+            else if (posTag.startsWith("V")) {//verbs
+                wordnetPos = POS.VERB;
+            }
+            else if (posTag.startsWith("J")) {//adjectives
+                wordnetPos = POS.ADJECTIVE;
+            }
+            else if (posTag.startsWith("R")) {//adverbs
+                wordnetPos = POS.ADVERB;
+            }
+            if (wordnetPos == null) {
+                continue;
+            }
+            String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos);
 
-    private static Set<String[]> getAllPossibleSubsequences(String[] originalTokens, List<String>[] wordnetTokens) {
-        ArrayList<String[]> res = new ArrayList<String[]>();
-
-        for (int size = 1; size < wordnetTokens.length + 1; size++) {
-            for (int start = 0; start < wordnetTokens.length - size + 1; start++) {
-                getPossibleSubsequencesRec(originalTokens, res, new ArrayList<String>(), new ArrayList<String>(),
-                        wordnetTokens, 0, size);
+            for (String synonym : synonyms) {
+                t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym));
             }
         }
-
-        return new HashSet<String[]>(res);
     }
 
-
-    private static void getPossibleSubsequencesRec(String[] originalTokens, List<String[]> allSubsequences,
-                                                   List<String> currentSubsequence,
-                                                   List<String> currentOriginalSubsequence,
-                                                   List<String>[] wordnetTokens,
-                                                   int curStart, int maxLength) {
-
-        if (currentSubsequence.size() == maxLength) {
-            allSubsequences.add(new String[]{StringUtils.join(currentSubsequence, " ").toLowerCase(), StringUtils
-                    .join(currentOriginalSubsequence, " ").toLowerCase()});
-            return;
-        }
-        for (String w : wordnetTokens[curStart]) {
-            ArrayList<String> tmpSequence = new ArrayList<String>(currentSubsequence);
-            ArrayList<String> tmpOriginalSequence = new ArrayList<String>(currentOriginalSubsequence);
-            tmpSequence.add(w);
-            tmpOriginalSequence.add(originalTokens[curStart]);
-            getPossibleSubsequencesRec(originalTokens, allSubsequences, tmpSequence, tmpOriginalSequence, wordnetTokens,
-                    curStart + 1, maxLength);
-        }
-    }
-
     @Override
 	public void addEntry(List<Token> s, Entity e) {
         tree.add(s, e);
@@ -177,111 +138,10 @@
                 wordnetTokens[i].add(LinguisticUtil.getInstance().getNormalizedForm(w).replaceAll("_", " "));
             }
         }
-
-        // generate subsequences starting at the given start index of the given size
-        Set<String[]> allPossibleSubsequences = getAllPossibleSubsequences(tokens, wordnetTokens);
-
-        for (String[] s : allPossibleSubsequences) {
-            System.out.println(String.format("%s - %s", s[0], s[1]));
-        }
     }
 
     public void printTrie() {
 		System.out.println(this.toString());
-		
+
 	}
-
-    public static interface NameGenerator {
-        /**
-         * Returns a list of possible alternative words for the given word
-         *
-         * @param text    the text to return alternative words for
-         * @return alternative words for given word
-         */
-        List<String> getAlternativeText(String text);
-    }
-
-    public static class DummyNameGenerator implements NameGenerator {
-        @Override
-        public List<String> getAlternativeText(String word) {
-            return Collections.singletonList(word);
-        }
-    }
-
-    /**
-     * Generates alternative texts by using WordNet synonyms.
-     */
-    public static class WordNetNameGenerator implements NameGenerator {
-        private int maxNumberOfSenses = 5;
-
-        /**
-         * Sets up the generator for returning the lemmas of the top {@code maxNumberOfSenses} senses.
-         * @param maxNumberOfSenses the maximum number of senses to aggregate word lemmas from
-         */
-        public WordNetNameGenerator(int maxNumberOfSenses) {
-            this.maxNumberOfSenses = maxNumberOfSenses;
-        }
-
-        @Override
-        public List<String> getAlternativeText(String word) {
-            return Arrays.asList(LinguisticUtil.getInstance().getTopSynonymsForWord(word, maxNumberOfSenses));
-        }
-    }
-
-    /**
-     * Generates alternative texts by using WordNet synonym and lemmatizing of the original words
-     */
-    public static class LemmatizingWordNetNameGenerator implements NameGenerator {
-        private int maxNumberOfSenses = 5;
-
-        /**
-         * Sets up the generator for returning the lemmas of the top {@code maxNumberOfSenses} senses.
-         * @param maxNumberOfSenses the maximum number of senses to aggregate word lemmas from
-         */
-        public LemmatizingWordNetNameGenerator(int maxNumberOfSenses) {
-            this.maxNumberOfSenses = maxNumberOfSenses;
-        }
-
-        @Override
-        public List<String> getAlternativeText(String word) {
-            ArrayList<String> res = new ArrayList<String>();
-            res.add(LinguisticUtil.getInstance().getNormalizedForm(word));
-
-            for (String w : LinguisticUtil.getInstance().getTopSynonymsForWord(word, maxNumberOfSenses)) {
-                res.add(LinguisticUtil.getInstance().getNormalizedForm(w.replaceAll("_", " ")));
-            }
-
-            return res;
-        }
-    }
-
-    /**
-     * Pair of the actual word and the word after processing.
-     */
-    public static class ActualModifiedWordPair {
-        private String actualString;
-        private String modifiedString;
-
-        public String getActualString() {
-            return actualString;
-        }
-
-        public void setActualString(String actualString) {
-            this.actualString = actualString;
-        }
-
-        public String getModifiedString() {
-            return modifiedString;
-        }
-
-        public void setModifiedString(String modifiedString) {
-            this.modifiedString = modifiedString;
-        }
-
-        public ActualModifiedWordPair(String actualString, String modifiedString) {
-
-            this.actualString = actualString;
-            this.modifiedString = modifiedString;
-        }
-    }
 }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/Token.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -3,13 +3,13 @@
  */
 package org.dllearner.algorithms.isle.index;
 
+import com.google.common.collect.ComparisonChain;
+
 import java.io.Serializable;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
-import com.google.common.collect.ComparisonChain;
-
 /**
  * @author Lorenz Buehmann
  *

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TokenTree.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -13,13 +13,13 @@
  * @author Daniel Fleischhacker
  */
 public class TokenTree {
-    private HashMap<Token, TokenTree> children;
+    private LinkedHashMap<Token, TokenTree> children;
     private Set<Entity> entities;
     private List<Token> originalTokens;
     private boolean ignoreStopWords = true;
 
     public TokenTree() {
-        this.children = new HashMap<>();
+        this.children = new LinkedHashMap<>();
         this.entities = new HashSet<>();
         this.originalTokens = new ArrayList<>();
     }
@@ -73,7 +73,7 @@
     }
 
     /**
-     * Returns the set of entities located by the given list of tokens.
+     * Returns the set of entities located by the given list of tokens. This method does not consider alternative forms.
      *
      * @param tokens tokens to locate the information to get
      * @return located set of entities or null if token sequence not contained in tree
@@ -101,7 +101,7 @@
         TokenTree curNode = this;
 
         for (Token t : tokens) {
-            TokenTree nextNode = curNode.children.get(t);
+            TokenTree nextNode = getNextTokenTree(curNode, t);
             if (nextNode == null) {
                 return fallbackTokenList;
             }
@@ -111,6 +111,19 @@
         return fallbackTokenList;
     }
 
+    private TokenTree getNextTokenTree(TokenTree current, Token t) {
+        TokenTree next = current.children.get(t);
+        if (next != null) {
+            return next;
+        }
+        for (Map.Entry<Token, TokenTree> child : current.children.entrySet()) {
+            if (child.getKey().equalsWithAlternativeForms(t)) {
+                return child.getValue();
+            }
+        }
+        return null;
+    }
+
     /**
      * Returns the set of entities assigned to the longest matching token subsequence of the given token sequence.
      * @param tokens    token sequence to search for longest match
@@ -121,7 +134,7 @@
         TokenTree curNode = this;
 
         for (Token t : tokens) {
-            TokenTree nextNode = curNode.children.get(t);
+            TokenTree nextNode = getNextTokenTree(curNode, t);
             if (nextNode == null) {
                 return fallback == null ? null : fallback.entities;
             }
@@ -142,7 +155,7 @@
         TokenTree curNode = this;
 
         for (Token t : tokens) {
-            TokenTree nextNode = curNode.children.get(t);
+            TokenTree nextNode = getNextTokenTree(curNode, t);
             if (nextNode == null) {
                 return fallback == null ? null : fallback.originalTokens;
             }

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexGenerator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexGenerator.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/semantic/SemanticIndexGenerator.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -1,38 +1,22 @@
 package org.dllearner.algorithms.isle.index.semantic;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.util.HashSet;
-import java.util.Set;
-
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
 import org.apache.log4j.Logger;
 import org.dllearner.algorithms.isle.EntityCandidateGenerator;
 import org.dllearner.algorithms.isle.TextDocumentGenerator;
-import org.dllearner.algorithms.isle.index.AnnotatedDocument;
-import org.dllearner.algorithms.isle.index.LinguisticAnnotator;
-import org.dllearner.algorithms.isle.index.SemanticAnnotator;
-import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie;
-import org.dllearner.algorithms.isle.index.TextDocument;
-import org.dllearner.algorithms.isle.index.TrieEntityCandidateGenerator;
-import org.dllearner.algorithms.isle.index.TrieLinguisticAnnotator;
+import org.dllearner.algorithms.isle.index.*;
 import org.dllearner.algorithms.isle.textretrieval.RDFSLabelEntityTextRetriever;
 import org.dllearner.algorithms.isle.wsd.StructureBasedWordSenseDisambiguation;
 import org.dllearner.algorithms.isle.wsd.WindowBasedContextExtractor;
 import org.dllearner.algorithms.isle.wsd.WordSenseDisambiguation;
 import org.dllearner.core.owl.Entity;
-import org.semanticweb.owlapi.model.OWLAnnotation;
-import org.semanticweb.owlapi.model.OWLAnnotationProperty;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLLiteral;
-import org.semanticweb.owlapi.model.OWLOntology;
+import org.semanticweb.owlapi.model.*;
 
-import com.google.common.hash.HashCode;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
+import java.io.*;
+import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Interface for an index which is able to resolve a given entity's URI to the set of documents containing
@@ -86,14 +70,8 @@
     
     public static SemanticIndex generateIndex(Set<String> documents, OWLOntology ontology, boolean useWordNormalization){
     	SimpleEntityCandidatesTrie trie;
-        if (useWordNormalization) {
-            trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology),
-                    ontology, new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5));
-        }
-        else {
-            trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology),
-                    ontology, new SimpleEntityCandidatesTrie.DummyNameGenerator());
-        }
+        trie = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology),
+                    ontology);
         trie.printTrie();
         
         TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(trie);
@@ -142,7 +120,10 @@
         logger.info("Creating semantic index...");
     	SemanticIndex index = new SemanticIndex();
         for (String document : documents) {
-        	TextDocument textDocument = TextDocumentGenerator.getInstance().generateDocument(document);
+            if (document.isEmpty()) {
+                continue;
+            }
+            TextDocument textDocument = TextDocumentGenerator.getInstance().generateDocument(document);
             logger.debug("Processing document:" + textDocument);
             AnnotatedDocument annotatedDocument = semanticAnnotator.processDocument(textDocument);
             for (Entity entity : annotatedDocument.getContainedEntities()) {

Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java
===================================================================
--- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-12-09 14:36:38 UTC (rev 4194)
+++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/ISLETestCorpus.java	2013-12-09 14:40:04 UTC (rev 4195)
@@ -3,26 +3,11 @@
  */
 package org.dllearner.algorithms.isle;
 
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.text.DecimalFormat;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+import com.google.common.io.Files;
 import org.dllearner.algorithms.celoe.CELOE;
-import org.dllearner.algorithms.isle.index.AnnotatedDocument;
-import org.dllearner.algorithms.isle.index.EntityCandidatesTrie;
-import org.dllearner.algorithms.isle.index.LinguisticAnnotator;
-import org.dllearner.algorithms.isle.index.RemoteDataProvider;
-import org.dllearner.algorithms.isle.index.SemanticAnnotator;
-import org.dllearner.algorithms.isle.index.SimpleEntityCandidatesTrie;
-import org.dllearner.algorithms.isle.index.TextDocument;
-import org.dllearner.algorithms.isle.index.Token;
-import org.dllearner.algorithms.isle.index.TrieEntityCandidateGenerator;
-import org.dllearner.algorithms.isle.index.TrieLinguisticAnnotator;
+import org.dllearner.algorithms.isle.index.*;
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndex;
 import org.dllearner.algorithms.isle.index.semantic.SemanticIndexGenerator;
 import org.dllearner.algorithms.isle.metrics.PMIRelevanceMetric;
@@ -43,17 +28,17 @@
 import org.junit.Before;
 import org.junit.Test;
 import org.semanticweb.owlapi.apibinding.OWLManager;
-import org.semanticweb.owlapi.model.IRI;
-import org.semanticweb.owlapi.model.OWLDataFactory;
-import org.semanticweb.owlapi.model.OWLEntity;
-import org.semanticweb.owlapi.model.OWLOntology;
-import org.semanticweb.owlapi.model.OWLOntologyManager;
-
+import org.semanticweb.owlapi.model.*;
 import uk.ac.manchester.cs.owl.owlapi.OWLDataFactoryImpl;
 
-import com.google.common.base.Charsets;
-import com.google.common.base.Joiner;
-import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.text.DecimalFormat;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * Some tests for the ISLE algorithm.
@@ -184,8 +169,7 @@
 	
     @Test
     public void testEntityLinkingWithLemmatizing() throws Exception {
-        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
-                new SimpleEntityCandidatesTrie.LemmatizingWordNetNameGenerator(5));
+        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology);
         LinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
         WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);
         EntityCandidateGenerator ecg = new TrieEntityCandidateGenerator(ontology, ect);
@@ -200,8 +184,7 @@
 
     @Test
     public void testEntityLinkingWithSimpleStringMatching() throws Exception {
-        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology,
-                new SimpleEntityCandidatesTrie.DummyNameGenerator());
+        EntityCandidatesTrie ect = new SimpleEntityCandidatesTrie(new RDFSLabelEntityTextRetriever(ontology), ontology);
         TrieLinguisticAnnotator linguisticAnnotator = new TrieLinguisticAnnotator(ect);
         linguisticAnnotator.setNormalizeWords(false);
         WordSenseDisambiguation wsd = new SimpleWordSenseDisambiguation(ontology);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.