[DL-Learner SVN] SF.net SVN: dl-learner:[4120] trunk/components-core/src/main/java/org/ dllearner/a

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4120
          http://sourceforge.net/p/dl-learner/code/4120
Author:   dfleischhacker
Date:     2013-10-07 09:15:20 +0000 (Mon, 07 Oct 2013)
Log Message:
-----------
Fix bug leading to out of bounds exception

Modified Paths:
--------------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java

Added Paths:
-----------
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
===================================================================

--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -137,7 +137,7 @@
                 else {
                     res.append(" ");
                 }
-                res.append(lemmatizeSingleWord(word));
+                res.append(lemmatizeSingleWord(w));
             }
             catch (Exception e) {
                throw new RuntimeException(e);

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -149,7 +149,8 @@
 
 	@Override
 	public Set<Entity> getCandidateEntities(String s) {
-		return trie.get(s);
+        Set<Entity> res = trie.get(s);
+		return res == null ? new HashSet<Entity>() : trie.get(s);
 	}
 
 	@Override
@@ -263,4 +264,34 @@
             return res;
         }
     }
+
+    /**
+     * Pair of the actual word and the word after processing.
+     */
+    public static class ActualModifiedWordPair {
+        private String actualString;
+        private String modifiedString;
+
+        public String getActualString() {
+            return actualString;
+        }
+
+        public void setActualString(String actualString) {
+            this.actualString = actualString;
+        }
+
+        public String getModifiedString() {
+            return modifiedString;
+        }
+
+        public void setModifiedString(String modifiedString) {
+            this.modifiedString = modifiedString;
+        }
+
+        public ActualModifiedWordPair(String actualString, String modifiedString) {
+
+            this.actualString = actualString;
+            this.modifiedString = modifiedString;
+        }
+    }
 }

Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java	                        (rev 0)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/StanfordLemmatizer.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -0,0 +1,54 @@
+package org.dllearner.algorithms.isle.index;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.CoreMap;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ *
+ */
+class StanfordLemmatizer {
+
+    protected StanfordCoreNLP pipeline;
+
+    public StanfordLemmatizer() {
+        // Create StanfordCoreNLP object properties, with POS tagging
+        // (required for lemmatization), and lemmatization
+        Properties props;
+        props = new Properties();
+        props.put("annotators", "tokenize, ssplit, pos, lemma");
+
+        // StanfordCoreNLP loads a lot of models, so you probably
+        // only want to do this once per execution
+        this.pipeline = new StanfordCoreNLP(props);
+    }
+
+    public String lemmatize(String documentText)
+    {
+        List<String> lemmas = new LinkedList<String>();
+
+        // create an empty Annotation just with the given text
+        edu.stanford.nlp.pipeline.Annotation document = new edu.stanford.nlp.pipeline.Annotation(documentText);
+
+        // run all Annotators on this text
+        this.pipeline.annotate(document);
+
+        // Iterate over all of the sentences found
+        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
+        for(CoreMap sentence: sentences) {
+            // Iterate over all tokens in a sentence
+            for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
+                // Retrieve and add the lemma for each word into the
+                // list of lemmas
+                lemmas.add(token.get(CoreAnnotations.LemmaAnnotation.class));
+            }
+        }
+
+        return lemmas.get(0);
+    }
+}

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-07 07:38:17 UTC (rev 4119)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieLinguisticAnnotator.java	2013-10-07 09:15:20 UTC (rev 4120)
@@ -37,6 +37,8 @@
             }
             String match = candidatesTrie.getLongestMatch(unparsed);
             if (match != null && !match.isEmpty()) {
+
+                //TODO: here we are losing the original offset and index...
                 Annotation annotation = new Annotation(document, i, match.length());
                 annotations.add(annotation);
                 i += match.length() - 1;

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[DL-Learner SVN] SF.net SVN: dl-learner:[4120] trunk/components-core/src/main/java/org/ dllearner/a

[DL-Learner SVN] SF.net SVN: dl-learner:[4120] trunk/components-core/src/main/java/org/ dllearner/algorithms/isle/index