From: <dfl...@us...> - 2013-09-04 14:28:16
|
Revision: 4061 http://sourceforge.net/p/dl-learner/code/4061 Author: dfleischhacker Date: 2013-09-04 14:28:12 +0000 (Wed, 04 Sep 2013) Log Message: ----------- Add class providing wrapper for common linguistic operations Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java Added Paths: ----------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-04 14:26:47 UTC (rev 4060) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-04 14:28:12 UTC (rev 4061) @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Set; import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; @@ -71,8 +72,31 @@ } return synonyms; } - - public List<String> getSisterTerms(POS pos, String s){ + + public List<String> getAllSynonyms(POS pos, String s) { + List<String> synonyms = new ArrayList<String>(); + try { + IndexWord iw = dict.getIndexWord(pos, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + for (Synset synset : synsets) { + for (Word w : synset.getWords()) { + String lemma = w.getLemma(); + if (!lemma.equals(s) && !lemma.contains(" ")) { + synonyms.add(lemma); + } + } + } + } + } + catch (JWNLException e) { + e.printStackTrace(); + } + + return synonyms; + } + + public List<String> getSisterTerms(POS pos, String s){ List<String> sisterTerms = new ArrayList<String>(); try { Added: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java (rev 0) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-04 14:28:12 UTC (rev 4061) @@ -0,0 +1,78 @@ +package org.dllearner.algorithms.isle.index; + +import net.didion.jwnl.data.POS; +import org.dllearner.algorithms.isle.WordNet; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Provides shortcuts to + * @author Daniel Fleischhacker + */ +public class LinguisticUtil { + private static final WordNet wn = new WordNet(); + private static POS[] RELEVANT_POS = new POS[]{POS.NOUN, POS.VERB}; + + /** + * Processes the given string and puts camelCased words into single words. + * @param camelCase the word containing camelcase to split + * @return all words as camelcase contained in the given word + */ + public static String[] getWordsFromCamelCase(String camelCase) { + ArrayList<String> resultingWords = new ArrayList<String>(); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < camelCase.length(); i++) { + // we just ignore characters not matching the defined pattern + char curChar = camelCase.charAt(i); + if (!Character.isLetter(curChar)) { + continue; + } + if (Character.isUpperCase(curChar)) { // found a new upper case letter + resultingWords.add(sb.toString()); + sb = new StringBuilder(); + sb.append(Character.toLowerCase(curChar)); + } + else { // lower case letter + sb.append(curChar); + } + } + + if (sb.length() > 0) { + resultingWords.add(sb.toString()); + } + + return resultingWords.toArray(new String[resultingWords.size()]); + } + + /** + * Split word into words it contains divided by underscores. + * + * @param underScored word to split at underscores + * @return words contained in given word + */ + public static String[] getWordsFromUnderscored(String underScored) { + return underScored.split("_"); + } + + // get synonyms + public static String[] getSynonymsForWord(String word) { + ArrayList<String> synonyms = new ArrayList<String>(); + + for (POS pos : RELEVANT_POS) { + synonyms.addAll(wn.getAllSynonyms(pos, word)); + } + return synonyms.toArray(new String[synonyms.size()]); + } + + public static void main(String[] args) { + for (String s : getWordsFromCamelCase("thisIsAClassWith1Name123")) { + System.out.println(s); + for (String w : getSynonymsForWord(s)) { + System.out.println(" --> " + w); + } + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |