[DL-Learner SVN] SF.net SVN: dl-learner:[4065] trunk/components-core

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 4065
          http://sourceforge.net/p/dl-learner/code/4065
Author:   dfleischhacker
Date:     2013-09-04 15:04:37 +0000 (Wed, 04 Sep 2013)
Log Message:
-----------
Add lemmatizing to linguistic utils

Modified Paths:
--------------
    trunk/components-core/pom.xml
    trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java

Modified: trunk/components-core/pom.xml
===================================================================

--- trunk/components-core/pom.xml	2013-09-04 14:39:59 UTC (rev 4064)
+++ trunk/components-core/pom.xml	2013-09-04 15:04:37 UTC (rev 4065)
@@ -195,7 +195,13 @@
 			<version>1.0</version>
 		</dependency>
 
+        <dependency>
+            <groupId>edu.northwestern.at</groupId>
+            <artifactId>morphadorner</artifactId>
+            <version>2009-04-30</version>
+        </dependency>
 
+
         <!-- This module is a library module, so it needs only to have the slf api dependency to enable logging -->
 		<dependency>
 			<groupId>org.slf4j</groupId>

Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java
===================================================================
--- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-09-04 14:39:59 UTC (rev 4064)
+++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java	2013-09-04 15:04:37 UTC (rev 4065)
@@ -1,9 +1,16 @@
 package org.dllearner.algorithms.isle.index;
 
+import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.DefaultLemmatizer;
+import edu.northwestern.at.utils.corpuslinguistics.lemmatizer.Lemmatizer;
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.*;
+import edu.stanford.nlp.util.CoreMap;
 import net.didion.jwnl.data.POS;
 import org.dllearner.algorithms.isle.WordNet;
 
 import java.util.ArrayList;
+import java.util.Properties;
 
 /**
  * Provides shortcuts to commonly used linguistic operations
@@ -12,7 +19,17 @@
 public class LinguisticUtil {
     private static final WordNet wn = new WordNet();
     private static POS[] RELEVANT_POS = new POS[]{POS.NOUN, POS.VERB};
+    private static Lemmatizer lemmatizer;
 
+    static {
+        try {
+            lemmatizer = new DefaultLemmatizer();
+        }
+        catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
     /**
      * Processes the given string and puts camelCased words into single words.
      * @param camelCase    the word containing camelcase to split
@@ -54,7 +71,13 @@
         return underScored.split("_");
     }
 
-    // get synonyms
+    /**
+     * Returns an array of all synonyms for the given word. Only synonyms for the POS in {@link #RELEVANT_POS} are
+     * returned.
+     *
+     * @param word the word to retrieve synonyms for
+     * @return synonyms for the given word
+     */
     public static String[] getSynonymsForWord(String word) {
         ArrayList<String> synonyms = new ArrayList<String>();
 
@@ -64,7 +87,28 @@
         return synonyms.toArray(new String[synonyms.size()]);
     }
 
+    /**
+     * Returns the normalized form of the given word. This method is only able to work with single words! If there is an
+     * error normalizing the given word, the word itself is returned.
+     *
+     * @param word the word to get normalized form for
+     * @return normalized form of the word or the word itself on an error
+     */
+    public static String getNormalizedForm(String word) {
+        try {
+            if (lemmatizer == null) {
+                return word;
+            }
+            return lemmatizer.lemmatize(word);
+        }
+        catch (Exception e) {
+            e.printStackTrace();
+        }
+        return word;
+    }
+
     public static void main(String[] args) {
+        System.out.println(getNormalizedForm("going"));
         for (String s : getWordsFromCamelCase("thisIsAClassWith1Name123")) {
             System.out.println(s);
             for (String w : getSynonymsForWord(s)) {

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.