From: <dfl...@us...> - 2013-12-10 12:52:55
|
Revision: 4202 http://sourceforge.net/p/dl-learner/code/4202 Author: dfleischhacker Date: 2013-12-10 12:52:52 +0000 (Tue, 10 Dec 2013) Log Message: ----------- Use hyponyms for creating the token tree Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-12-10 09:56:55 UTC (rev 4201) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/WordNet.java 2013-12-10 12:52:52 UTC (rev 4202) @@ -3,8 +3,7 @@ import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.*; -import net.didion.jwnl.data.list.PointerTargetNode; -import net.didion.jwnl.data.list.PointerTargetNodeList; +import net.didion.jwnl.data.list.*; import net.didion.jwnl.dictionary.Dictionary; import java.io.InputStream; @@ -49,6 +48,13 @@ public static void main(String[] args) { System.out.println(new WordNet().getBestSynonyms(POS.VERB, "learn")); System.out.println(new WordNet().getSisterTerms(POS.NOUN, "actress")); + System.out.println("Hypernyms **************************"); + System.out.println(new WordNet().getHypernyms(POS.NOUN, "man")); + System.out.println("Hyponyms ****************************"); + System.out.println(new WordNet().getHyponyms(POS.NOUN, "god")); + System.out.println("Words for first synset **************************"); + System.out.println(new WordNet().getWordsForFirstSynset(POS.NOUN, "man")); + } public List<String> getBestSynonyms(POS pos, String s) { @@ -178,6 +184,103 @@ } /** + * Returns a list of lemmas for the most frequent synset of the given word. + * @param word word to get synonyms for + * @param pos POS of the word to look up + * @return list of lemmas of the most frequent synset + */ + public List<String> getWordsForFirstSynset(POS pos, String word) { + List<String> result = new ArrayList<>(); + IndexWord indexWord = null; + Synset sense = null; + + try { + indexWord = dict.getIndexWord(pos, word); + sense = indexWord.getSense(1); + for (Word w : sense.getWords()) { + result.add(w.getLemma()); + } + } + catch (JWNLException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + + return result; + } + + /** + * Returns a list of words being lemmas of a most frequent synset for the given word or one of its hypernyms. + */ + public List<String> getHypernyms(POS pos, String word) { + List<String> result = new ArrayList<>(); + + IndexWord indexWord; + Synset sense; + + try { + indexWord = dict.getIndexWord(pos, word); + if (indexWord == null) { + return result; + } + sense = indexWord.getSense(1); + for (Word w : sense.getWords()) { + result.add(w.getLemma()); + } + PointerTargetNodeList target = PointerUtils.getInstance().getDirectHypernyms(sense); + while (target != null && !target.isEmpty()) { + for (int i = 0; i < target.size(); i++) { + Synset s = ((PointerTargetNode) target.get(i)).getSynset(); + for (Word w : sense.getWords()) { + result.add(w.getLemma()); + } + } + target = PointerUtils.getInstance().getDirectHyponyms(((PointerTargetNode) target.get(0)).getSynset()); + System.out.println(target); + } + } + catch (JWNLException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + + return result; + } + + public List<String> getHyponyms(POS pos, String s) { + ArrayList<String> result = new ArrayList<>(); + try { + IndexWord word = dict.getIndexWord(pos, s); + if (word == null) { + System.err.println("Unable to find index word for " + s); + return result; + } + Synset sense = word.getSense(1); + getHyponymsRecursive(result, sense, 3); + } + catch (JWNLException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + return result; + } + + public void getHyponymsRecursive(List<String> lemmas, Synset sense, int depthToGo) { + for (Word w : sense.getWords()) { + lemmas.add(w.getLemma()); + } + if (depthToGo == 0) { + return; + } + try { + PointerTargetNodeList directHyponyms = PointerUtils.getInstance().getDirectHyponyms(sense); + for (Object directHyponym : directHyponyms) { + getHyponymsRecursive(lemmas, ((PointerTargetNode) directHyponym).getSynset(), depthToGo - 1); + } + } + catch (JWNLException e) { + e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. + } + } + + /** * Funktion returns a List of Hypo and Hypernyms of a given string * * @param s Word for which you want to get Hypo and Hypersyms Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-12-10 09:56:55 UTC (rev 4201) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/LinguisticUtil.java 2013-12-10 12:52:52 UTC (rev 4202) @@ -97,10 +97,25 @@ } /** - * Returns an array of all synonyms for the given word. Only synonyms for the POS in {@link #RELEVANT_POS} are - * returned. + * Iterates through the hypernym tree for the given word at the given POS and returns a list of all lemmas of the + * most frequent synsets visited during traversing the tree. + * @param word word to get hypernyms for + * @param pos POS to get hypernyms for + * @return list of all lemmas of all hypernyms for the given word + */ + public String[] getAllHyponymsForWord(String word, POS pos) { + ArrayList<String> hyponyms = new ArrayList<>(); + + hyponyms.addAll(wn.getHyponyms(pos, word)); + + return hyponyms.toArray(new String[hyponyms.size()]); + } + + /** + * Returns an array of all synonyms for the given word for the given POS. * * @param word the word to retrieve synonyms for + * @param pos POS to retrieve synonyms for * @return synonyms for the given word */ public String[] getSynonymsForWord(String word, POS pos) { Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-10 09:56:55 UTC (rev 4201) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/SimpleEntityCandidatesTrie.java 2013-12-10 12:52:52 UTC (rev 4202) @@ -88,14 +88,16 @@ if (wordnetPos == null) { continue; } - String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos); + //String[] synonyms = LinguisticUtil.getInstance().getSynonymsForWord(t.getRawForm(), wordnetPos); + String[] synonyms = LinguisticUtil.getInstance().getAllHyponymsForWord(t.getRawForm(), wordnetPos); for (String synonym : synonyms) { // ignore all multi word synonyms if (synonym.contains("_")) { continue; } - t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym)); + //t.addAlternativeForm(LinguisticUtil.getInstance().getNormalizedForm(synonym)); + t.addAlternativeForm(synonym); } } } @@ -111,7 +113,7 @@ @Override public Set<Entity> getCandidateEntities(List<Token> tokens) { - return tree.get(tokens); + return tree.getAllEntities(tokens); } @Override Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-10 09:56:55 UTC (rev 4201) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/isle/index/TrieEntityCandidateGenerator.java 2013-12-10 12:52:52 UTC (rev 4202) @@ -28,7 +28,9 @@ } public Set<Entity> getCandidates(Annotation annotation) { - return candidatesTrie.getCandidateEntities(annotation.getTokens()); + Set<Entity> candidateEntities = candidatesTrie.getCandidateEntities(annotation.getTokens()); + System.out.println(annotation + " --> " + candidateEntities); + return candidateEntities; } /** Modified: trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2013-12-10 09:56:55 UTC (rev 4201) +++ trunk/components-core/src/test/java/org/dllearner/algorithms/isle/DBpediaExperiment.java 2013-12-10 12:52:52 UTC (rev 4202) @@ -3,16 +3,8 @@ */ package org.dllearner.algorithms.isle; -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.HashSet; -import java.util.Set; - +import com.google.common.collect.Sets; +import com.hp.hpl.jena.rdf.model.Model; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; @@ -23,8 +15,11 @@ import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLOntologyManager; -import com.google.common.collect.Sets; -import com.hp.hpl.jena.rdf.model.Model; +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashSet; +import java.util.Set; /** * @author Lorenz Buehmann @@ -92,7 +87,7 @@ maxNrOfInstancesPerClass)); documents.clear(); - documents.add("Thomas Cruise Mapother IV, widely known as Tom Cruise, is an American film actor and producer. He has been nominated for three Academy Awards and has won three Golden Globe Awards. He started his career at age 19 in the 1981 film Taps. His first leading role was in Risky Business, released in August 1983. Cruise became a full-fledged movie star after starring in Top Gun (1986). He is well known for his role as secret agent Ethan Hunt in the Mission: Impossible film series between 1996 and 2011. Cruise has starred in many Hollywood blockbusters, including Rain Man (1988), A Few Good Men (1992), Jerry Maguire (1996), Vanilla Sky (2001), Minority Report (2002), The Last Samurai (2003), Collateral (2004), War of the Worlds (2005), Tropic Thunder (2008) and Jack Reacher (2012). As of 2012, Cruise is Hollywood's highest-paid actor. Cruise is known for his Scientologist faith and for his support of the Church of Scientology."); + documents.add("Thomas Cruise Mapother IV, widely known as Tom Cruise, is an American film player and producer. He has been nominated for three Academy Awards and has won three Golden Globe Awards. He started his career at age 19 in the 1981 film Taps. His first leading role was in Risky Business, released in August 1983. Cruise became a full-fledged movie star after starring in Top Gun (1986). He is well known for his role as secret agent Ethan Hunt in the Mission: Impossible film series between 1996 and 2011. Cruise has starred in many Hollywood blockbusters, including Rain Man (1988), A Few Good Men (1992), Jerry Maguire (1996), Vanilla Sky (2001), Minority Report (2002), The Last Samurai (2003), Collateral (2004), War of the Worlds (2005), Tropic Thunder (2008) and Jack Reacher (2012). As of 2012, Cruise is Hollywood's highest-paid actor. Cruise is known for his Scientologist faith and for his support of the Church of Scientology."); return documents; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |