From: <lor...@us...> - 2011-07-25 09:03:16
|
Revision: 2960 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2960&view=rev Author: lorenz_b Date: 2011-07-25 09:03:10 +0000 (Mon, 25 Jul 2011) Log Message: ----------- Changed resource loading to InputStreams if possible. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java trunk/components-ext/src/main/resources/tbsl/tbsl.properties Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApachePartOfSpeechTagger.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -1,6 +1,5 @@ package org.dllearner.algorithm.tbsl.nlp; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -13,16 +12,15 @@ public class ApachePartOfSpeechTagger implements PartOfSpeechTagger{ private POSTaggerME tagger; - private static final String MODEL_PATH = "src/main/resources/tbsl/models/en-pos-maxent.bin"; + private static final String MODEL_PATH = "tbsl/models/en-pos-maxent.bin"; private Tokenizer tokenizer; public ApachePartOfSpeechTagger() { - InputStream modelIn = null; + InputStream modelIn = this.getClass().getClassLoader().getResourceAsStream(MODEL_PATH); POSModel model = null; try { - modelIn = new FileInputStream(MODEL_PATH); model = new POSModel(modelIn); } catch (IOException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/ApacheTokenizer.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -1,6 +1,5 @@ package org.dllearner.algorithm.tbsl.nlp; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; @@ -10,13 +9,12 @@ public class ApacheTokenizer implements Tokenizer{ private opennlp.tools.tokenize.Tokenizer tokenizer; - private static final String MODEL_FILE = "src/main/resources/tbsl/models/en-token.bin"; + private static final String MODEL_PATH = "tbsl/models/en-token.bin"; public ApacheTokenizer() { - InputStream modelIn = null; + InputStream modelIn = this.getClass().getClassLoader().getResourceAsStream(MODEL_PATH); TokenizerModel model = null; try { - modelIn = new FileInputStream(MODEL_FILE); model = new TokenizerModel(modelIn); } catch (IOException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipeNER.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -1,7 +1,10 @@ package org.dllearner.algorithm.tbsl.nlp; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.ArrayList; import java.util.List; @@ -35,8 +38,17 @@ try { long startTime = System.currentTimeMillis(); logger.info("Initializing LingPipe NER..."); - String path = this.getClass().getClassLoader().getResource(DICTIONARY_PATH).getPath(); - Dictionary<String> dictionary = (Dictionary<String>) AbstractExternalizable.readObject(new File(path)); + InputStream is = this.getClass().getClassLoader().getResourceAsStream(DICTIONARY_PATH); + File f = File.createTempFile("dbpedia_lingpipe", ".dictionary"); + f.deleteOnExit(); + OutputStream out=new FileOutputStream(f); + byte buf[]=new byte[1024]; + int len; + while((len=is.read(buf))>0) + out.write(buf,0,len); + out.close(); + is.close(); + Dictionary<String> dictionary = (Dictionary<String>) AbstractExternalizable.readObject(f); ner = new ExactDictionaryChunker(dictionary, IndoEuropeanTokenizerFactory.INSTANCE, allMatches, caseSensitive); logger.info("Done in " + (System.currentTimeMillis()-startTime) + "ms."); } catch (IOException e) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/LingPipePartOfSpeechTagger.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -1,8 +1,8 @@ package org.dllearner.algorithm.tbsl.nlp; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.ObjectInputStream; import java.util.ArrayList; import java.util.Arrays; @@ -18,7 +18,7 @@ public class LingPipePartOfSpeechTagger implements PartOfSpeechTagger{ - private static final String MODEL_PATH = "src/main/resources/tbsl/models/lingpipe/pos-en-general-brown.HiddenMarkovModel"; + private static final String MODEL_PATH = "tbsl/models/lingpipe/pos-en-general-brown.HiddenMarkovModel"; private static final int TOP_K = 5; @@ -26,7 +26,7 @@ public LingPipePartOfSpeechTagger() { try { - FileInputStream fileIn = new FileInputStream(MODEL_PATH); + InputStream fileIn = this.getClass().getClassLoader().getResourceAsStream(MODEL_PATH); ObjectInputStream objIn = new ObjectInputStream(fileIn); HiddenMarkovModel hmm = (HiddenMarkovModel) objIn.readObject(); Streams.closeQuietly(objIn); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/StanfordPartOfSpeechTagger.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -18,7 +18,8 @@ public StanfordPartOfSpeechTagger(){ try { - String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); +// String modelPath = this.getClass().getClassLoader().getResource(MODEL).getPath(); + String modelPath = Thread.currentThread().getContextClassLoader().getResource(MODEL).getPath(); tagger = new MaxentTagger(modelPath); } catch (IOException e) { e.printStackTrace(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -19,7 +19,6 @@ public SlotBuilder() { wordnet = new WordNet(); - wordnet.init(); } /** Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -1,7 +1,6 @@ package org.dllearner.algorithm.tbsl.templator; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -12,8 +11,8 @@ import org.dllearner.algorithm.tbsl.ltag.parser.LTAG_Lexicon_Constructor; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; +import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; @@ -23,7 +22,7 @@ public class Templator { - String[] GRAMMAR_FILES = {"src/main/resources/tbsl/lexicon/english.lex"}; + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex"}; PartOfSpeechTagger tagger; LTAGLexicon g; @@ -34,10 +33,15 @@ boolean UNTAGGED_INPUT = true; public Templator() { + List<String> grammarFiles = new ArrayList<String>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResource(GRAMMAR_FILES[i]).getPath()); + } - g = LTAG_Constructor.construct(Arrays.asList(GRAMMAR_FILES)); + g = LTAG_Constructor.construct(grammarFiles); - tagger = new StanfordPartOfSpeechTagger(); +// tagger = new StanfordPartOfSpeechTagger(); + tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/WordNet.java 2011-07-25 09:03:10 UTC (rev 2960) @@ -8,27 +8,12 @@ public class WordNet { - public String path = "tbsl/dict/"; - public WordNetDatabase database; + private WordNetDatabase database; - public WordNet(String s) { - path = s; - - } public WordNet() { - path = this.getClass().getClassLoader().getResource(path).getPath(); - } - - public void setWordNetPath(String s) { - path = s; - } - - public void init() { - System.setProperty("wordnet.database.dir",path); database = WordNetDatabase.getFileInstance(); } - public List<String> getBestSynonyms(String s) { List<String> synonyms = new ArrayList<String>(); Modified: trunk/components-ext/src/main/resources/tbsl/tbsl.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/tbsl.properties 2011-07-25 08:52:39 UTC (rev 2959) +++ trunk/components-ext/src/main/resources/tbsl/tbsl.properties 2011-07-25 09:03:10 UTC (rev 2960) @@ -14,3 +14,5 @@ learning.maxTestedQueriesPerTemplate = 20 !similarity | lucene | none learning.ranking = similarity + +wordnet.dictionary = tbsl/dict/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |