From: <dfl...@us...> - 2013-09-06 10:01:56
|
Revision: 4092 http://sourceforge.net/p/dl-learner/code/4092 Author: dfleischhacker Date: 2013-09-06 10:01:53 +0000 (Fri, 06 Sep 2013) Log Message: ----------- Add methods to get top n synonyms for words Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-05 13:59:47 UTC (rev 4091) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-09-06 10:01:53 UTC (rev 4092) @@ -1,78 +1,115 @@ package org.dllearner.algorithms.isle; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; -import net.didion.jwnl.data.IndexWord; -import net.didion.jwnl.data.POS; -import net.didion.jwnl.data.PointerTarget; -import net.didion.jwnl.data.PointerUtils; -import net.didion.jwnl.data.Synset; -import net.didion.jwnl.data.Word; +import net.didion.jwnl.data.*; import net.didion.jwnl.data.list.PointerTargetNode; import net.didion.jwnl.data.list.PointerTargetNodeList; import net.didion.jwnl.dictionary.Dictionary; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + public class WordNet { - - public Dictionary dict; - - public WordNet() { - try { - JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public WordNet(String configPath) { - try { - JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public WordNet(InputStream propertiesStream) { - try { - JWNL.initialize(propertiesStream); - dict = Dictionary.getInstance(); - } catch (JWNLException e) { - e.printStackTrace(); - } - } - - public List<String> getBestSynonyms(POS pos, String s) { - - List<String> synonyms = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + + public Dictionary dict; + + public WordNet() { + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream("wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public WordNet(String configPath) { + try { + JWNL.initialize(this.getClass().getClassLoader().getResourceAsStream(configPath)); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public WordNet(InputStream propertiesStream) { + try { + JWNL.initialize(propertiesStream); + dict = Dictionary.getInstance(); + } + catch (JWNLException e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) { + System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); + System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); + } + + public List<String> getBestSynonyms(POS pos, String s) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) // IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - Word[] words = synsets[0].getWords(); - for(Word w : words){ - String c = w.getLemma(); - if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { - synonyms.add(c); - } - } - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - return synonyms; - } + if (iw != null) { + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for (Word w : words) { + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { + synonyms.add(c); + } + } + } + } + catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + + /** + * Returns the lemmas for the top {@code n} synsets of the given POS for the string {@code s}. + * + * @param pos the part of speech to retrieve synonyms for + * @param s the string to retrieve synonyms for + * @param n the number of synonyms to retrieve + * @return list of the lemmas of the top n synonyms of s + */ + public List<String> getTopSynonyms(POS pos, String s, int n) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) +// IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + for (int i = 0; i < n; i++) { + for (Word word : synsets[i].getWords()) { + String c = word.getLemma(); + if (!c.equals(s) && !c.contains(" ")) { + synonyms.add(c); + } + } + } + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + public List<String> getAllSynonyms(POS pos, String s) { List<String> synonyms = new ArrayList<String>(); try { @@ -96,120 +133,124 @@ return synonyms; } - public List<String> getSisterTerms(POS pos, String s){ - List<String> sisterTerms = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + public List<String> getSisterTerms(POS pos, String s) { + List<String> sisterTerms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) // IndexWord iw = dict.getMorphologicalProcessor().lookupBaseForm(pos, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - //System.out.println(synsets[0]); - PointerTarget[] pointerArr = synsets[0].getTargets(); - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - return sisterTerms; - } - - public List<String> getAttributes(String s) { - - List<String> result = new ArrayList<String>(); - - try { - IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); - if(iw != null){ - Synset[] synsets = iw.getSenses(); - Word[] words = synsets[0].getWords(); - for(Word w : words){ - String c = w.getLemma(); - if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { - result.add(c); - } - } - } - - } catch (JWNLException e) { - e.printStackTrace(); - } - - return result; - } - - public static void main(String[] args) { - System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); - System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); - } - - /** - * Funktion returns a List of Hypo and Hypernyms of a given string - * @param s Word for which you want to get Hypo and Hypersyms - * @return List of Hypo and Hypernyms - * @throws JWNLException - */ - public List<String> getRelatedNouns(String s) { - List<String> result = new ArrayList<String>(); - IndexWord word = null; - Synset sense=null; - try{ - word=dict.getIndexWord(POS.NOUN,s); - if(word!=null){ - sense = word.getSense(1); - //Synset sense = word.getSense(1); - - PointerTargetNodeList relatedListHypernyms = null; - PointerTargetNodeList relatedListHyponyms = null; - try { - relatedListHypernyms = PointerUtils.getInstance().getDirectHypernyms(sense); - } catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - relatedListHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); - } catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - Iterator i = relatedListHypernyms.iterator(); - while (i.hasNext()) { - PointerTargetNode related = (PointerTargetNode) i.next(); - Synset s1 = related.getSynset(); - String tmp=(s1.toString()).replace(s1.getGloss(), ""); - tmp=tmp.replace(" -- ()]",""); - tmp=tmp.replaceAll("[0-9]",""); - tmp=tmp.replace("[Synset: [Offset: ",""); - tmp=tmp.replace("] [POS: noun] Words: ",""); - //its possible, that there is more than one word in a line from wordnet - String[] array_tmp=tmp.split(","); - for(String z : array_tmp) result.add(z.replace(" ", "")); - } - - Iterator j = relatedListHyponyms.iterator(); - while (j.hasNext()) { - PointerTargetNode related = (PointerTargetNode) j.next(); - Synset s1 = related.getSynset(); - String tmp=(s1.toString()).replace(s1.getGloss(), ""); - tmp=tmp.replace(" -- ()]",""); - tmp=tmp.replaceAll("[0-9]",""); - tmp=tmp.replace("[Synset: [Offset: ",""); - tmp=tmp.replace("] [POS: noun] Words: ",""); - //its possible, that there is more than one word in a line from wordnet - String[] array_tmp=tmp.split(","); - for(String z : array_tmp) result.add(z.replace(" ", "")); - } - } - }catch (JWNLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - - - return result; - } - + if (iw != null) { + Synset[] synsets = iw.getSenses(); + //System.out.println(synsets[0]); + PointerTarget[] pointerArr = synsets[0].getTargets(); + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + return sisterTerms; + } + + public List<String> getAttributes(String s) { + + List<String> result = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); + if (iw != null) { + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for (Word w : words) { + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { + result.add(c); + } + } + } + + } + catch (JWNLException e) { + e.printStackTrace(); + } + + return result; + } + + /** + * Funktion returns a List of Hypo and Hypernyms of a given string + * + * @param s Word for which you want to get Hypo and Hypersyms + * @return List of Hypo and Hypernyms + * @throws JWNLException + */ + public List<String> getRelatedNouns(String s) { + List<String> result = new ArrayList<String>(); + IndexWord word = null; + Synset sense = null; + try { + word = dict.getIndexWord(POS.NOUN, s); + if (word != null) { + sense = word.getSense(1); + //Synset sense = word.getSense(1); + + PointerTargetNodeList relatedListHypernyms = null; + PointerTargetNodeList relatedListHyponyms = null; + try { + relatedListHypernyms = PointerUtils.getInstance().getDirectHypernyms(sense); + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + try { + relatedListHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + Iterator i = relatedListHypernyms.iterator(); + while (i.hasNext()) { + PointerTargetNode related = (PointerTargetNode) i.next(); + Synset s1 = related.getSynset(); + String tmp = (s1.toString()).replace(s1.getGloss(), ""); + tmp = tmp.replace(" -- ()]", ""); + tmp = tmp.replaceAll("[0-9]", ""); + tmp = tmp.replace("[Synset: [Offset: ", ""); + tmp = tmp.replace("] [POS: noun] Words: ", ""); + //its possible, that there is more than one word in a line from wordnet + String[] array_tmp = tmp.split(","); + for (String z : array_tmp) { + result.add(z.replace(" ", "")); + } + } + + Iterator j = relatedListHyponyms.iterator(); + while (j.hasNext()) { + PointerTargetNode related = (PointerTargetNode) j.next(); + Synset s1 = related.getSynset(); + String tmp = (s1.toString()).replace(s1.getGloss(), ""); + tmp = tmp.replace(" -- ()]", ""); + tmp = tmp.replaceAll("[0-9]", ""); + tmp = tmp.replace("[Synset: [Offset: ", ""); + tmp = tmp.replace("] [POS: noun] Words: ", ""); + //its possible, that there is more than one word in a line from wordnet + String[] array_tmp = tmp.split(","); + for (String z : array_tmp) { + result.add(z.replace(" ", "")); + } + } + } + } + catch (JWNLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + return result; + } + } Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-05 13:59:47 UTC (rev 4091) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-09-06 10:01:53 UTC (rev 4092) @@ -83,6 +83,23 @@ } /** + * Returns an array of the lemmas of the top {@code n} synonyms for the given word. Only synonyms for the POS in + * {@link #RELEVANT_POS} are returned. + * + * @param word the word to retrieve synonyms for + * @param n the number of senses to get lemmas for + * @return synonyms for the given word + */ + public static String[] getTopSynonymsForWord(String word, int n) { + ArrayList<String> synonyms = new ArrayList<String>(); + + for (POS pos : RELEVANT_POS) { + synonyms.addAll(wn.getTopSynonyms(pos, word, n)); + } + return synonyms.toArray(new String[synonyms.size()]); + } + + /** * Returns the normalized form of the given word. This method is only able to work with single words! If there is an * error normalizing the given word, the word itself is returned. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |