From: <lor...@us...> - 2011-08-02 12:42:22
|
Revision: 2975 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2975&view=rev Author: lorenz_b Date: 2011-08-02 12:42:15 +0000 (Tue, 02 Aug 2011) Log Message: ----------- Added method to get lexical answer type from template. Loading all resources with inputstreams. Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/pom.xml 2011-08-02 12:42:15 UTC (rev 2975) @@ -137,6 +137,11 @@ <artifactId>ini4j</artifactId> <version>0.5.2</version> </dependency> + <dependency> + <groupId>net.didion.jwnl</groupId> + <artifactId>jwnl</artifactId> + <version>1.4.1.RC2</version> + </dependency> </dependencies> <build> <plugins> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,6 +1,7 @@ package org.dllearner.algorithm.tbsl.learning; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -105,7 +106,7 @@ } public SPARQLTemplateBasedLearner(String optionsFile) throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(new Options(new FileReader(new File(optionsFile)))); + this(new Options(new FileInputStream(optionsFile))); } public SPARQLTemplateBasedLearner(Options options){ @@ -141,6 +142,7 @@ maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); + wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); System.setProperty("wordnet.database.dir", wordnetPath); } @@ -735,7 +737,8 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Give me all books written by authors influenced by Ernest Hemingway."; +// String question = "Give me all books written by authors influenced by Ernest Hemingway."; + String question = "Give me all cities in Canada."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), Collections.<String>singletonList(""), Collections.<String>emptyList()); @@ -743,6 +746,7 @@ learner.setQuestion(question); learner.learnSPARQLQueries(); System.out.println(learner.getBestSPARQLQuery()); + System.out.println(learner.getTemplates().iterator().next().getLexicalAnswerType()); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/LTAG_Lexicon_Constructor.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,8 +1,9 @@ package org.dllearner.algorithm.tbsl.ltag.parser; import java.io.BufferedReader; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; @@ -27,13 +28,13 @@ */ public class LTAG_Lexicon_Constructor { - public LTAGLexicon construct(List<String> fileNames) { + public LTAGLexicon construct(List<InputStream> fileStreams) { LTAGLexicon G = new TAG(); - for (String fileName : fileNames) { + for (InputStream fileStream : fileStreams) { - addFileToGrammar(fileName, G); + addFileToGrammar(fileStream, G); } @@ -41,7 +42,7 @@ } - public void addFileToGrammar(String fileName, LTAGLexicon g) { + public void addFileToGrammar(InputStream fileStream, LTAGLexicon g) { ArrayList<Pair<String, TreeNode>> trees = new ArrayList<Pair<String, TreeNode>>(); ArrayList<List<String>> semantics = new ArrayList<List<String>>(); @@ -50,7 +51,7 @@ try { - BufferedReader in = new BufferedReader(new FileReader(fileName)); + BufferedReader in = new BufferedReader(new InputStreamReader(fileStream)); String zeile = null; int lineNo = 0; @@ -99,7 +100,7 @@ } catch (ParseException e) { System.err.println("ParseException in '" - + fileName.substring(fileName.lastIndexOf("/") + 1) + + fileStream + "' at Line " + lineNo + ": '" + items[1].trim() + "'."); continue; @@ -113,11 +114,7 @@ in.close(); } catch (IOException e) { - - System.err.println("IOException: File '" + fileName - + "' not found!"); - return; - + e.printStackTrace(); } g.addTrees(trees, semantics); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.StringReader; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -19,7 +20,8 @@ public StanfordPartOfSpeechTagger(){ try { // String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); - String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getPath(); + String modelPath = getClass().getResource("/tbsl/models/bidirectional-distsim-wsj-0-18.tagger").getPath(); +// String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getFile(); tagger = new MaxentTagger(modelPath); } catch (IOException e) { e.printStackTrace(); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -0,0 +1,74 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.util.ArrayList; +import java.util.List; + +import net.didion.jwnl.JWNL; +import net.didion.jwnl.JWNLException; +import net.didion.jwnl.data.IndexWord; +import net.didion.jwnl.data.POS; +import net.didion.jwnl.data.Synset; +import net.didion.jwnl.data.Word; +import net.didion.jwnl.dictionary.Dictionary; + +public class WordNet { + + private Dictionary dict; + + public WordNet() { + try { + JWNL.initialize(WordNet.class.getClassLoader().getResourceAsStream("tbsl/wordnet_properties.xml")); + dict = Dictionary.getInstance(); + } catch (JWNLException e) { + e.printStackTrace(); + } + } + + public List<String> getBestSynonyms(POS pos, String s) { + + List<String> synonyms = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(pos, s);//dict.getMorphologicalProcessor().lookupBaseForm(pos, s) + if(iw != null){ + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for(Word w : words){ + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && synonyms.size() < 4) { + synonyms.add(c); + } + } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + return synonyms; + } + + public List<String> getAttributes(String s) { + + List<String> result = new ArrayList<String>(); + + try { + IndexWord iw = dict.getIndexWord(POS.ADJECTIVE, s); + if(iw != null){ + Synset[] synsets = iw.getSenses(); + Word[] words = synsets[0].getWords(); + for(Word w : words){ + String c = w.getLemma(); + if (!c.equals(s) && !c.contains(" ") && result.size() < 4) { + result.add(c); + } + } + } + + } catch (JWNLException e) { + e.printStackTrace(); + } + + return result; + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/WordNet.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -473,6 +473,19 @@ return true; } - + /** + * Returns the variable in the SPARQL query, which determines the type of the answer + * by an rdf:type property. + * @return + */ + public String getAnswerTypeVariable(){ + SPARQL_Term selection = selTerms.iterator().next(); + for(SPARQL_Triple t : conditions){ + if(t.getVariable().equals(selection) && t.getProperty().getName().equals("type")){ + return t.getValue().getName(); + } + } + return null; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -38,4 +38,14 @@ return slots; } + public List<String> getLexicalAnswerType(){ + String variable = query.getAnswerTypeVariable(); + for(Slot slot : slots){ + if(slot.getAnchor().equals(variable)){ + return slot.getWords(); + } + } + return null; + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,7 +1,7 @@ package org.dllearner.algorithm.tbsl.templator; import java.io.BufferedReader; -import java.io.FileReader; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -370,7 +370,7 @@ BufferedReader in; try { - in = new BufferedReader(new FileReader("src/main/resources/tbsl/lexicon/adj_list.txt")); + in = new BufferedReader(new InputStreamReader(this.getClass().getClassLoader().getResourceAsStream("tbsl/lexicon/adj_list.txt"))); String line; while ((line = in.readLine()) != null ) { if (line.contains(adj)) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-08-02 12:40:24 UTC (rev 2974) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-08-02 12:42:15 UTC (rev 2975) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.templator; +import java.io.InputStream; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -33,9 +34,9 @@ boolean UNTAGGED_INPUT = true; public Templator() { - List<String> grammarFiles = new ArrayList<String>(); + List<InputStream> grammarFiles = new ArrayList<InputStream>(); for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResource(GRAMMAR_FILES[i]).getPath()); + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); } g = LTAG_Constructor.construct(grammarFiles); Added: trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml 2011-08-02 12:42:15 UTC (rev 2975) @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<jwnl_properties language="en"> + <version publisher="Princeton" number="3.0" language="en"/> + <dictionary class="net.didion.jwnl.dictionary.FileBackedDictionary"> + <param name="morphological_processor" value="net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor"> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + <param value="net.didion.jwnl.dictionary.morph.TokenizerOperation"> + <param name="delimiters"> + <param value=" "/> + <param value="-"/> + </param> + <param name="token_operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + <param value="net.didion.jwnl.dictionary.morph.DetachSuffixesOperation"> + <param name="noun" value="|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|"/> + <param name="verb" value="|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|"/> + <param name="adjective" value="|er=|est=|er=e|est=e|"/> + <param name="operations"> + <param value="net.didion.jwnl.dictionary.morph.LookupIndexWordOperation"/> + <param value="net.didion.jwnl.dictionary.morph.LookupExceptionsOperation"/> + </param> + </param> + </param> + </param> + </param> + </param> + <param name="dictionary_element_factory" value="net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory"/> + <param name="file_manager" value="net.didion.jwnl.dictionary.file_manager.FileManagerImpl"> + <param name="file_type" value="net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile"/> + <param name="dictionary_path" value="src/main/resources/tbsl/dict"/> + </param> + </dictionary> + <resource class="PrincetonResource"/> +</jwnl_properties> \ No newline at end of file Property changes on: trunk/components-ext/src/main/resources/tbsl/wordnet_properties.xml ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |