From: <chr...@us...> - 2011-05-13 09:59:33
|
Revision: 2806 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2806&view=rev Author: christinaunger Date: 2011-05-13 09:59:27 +0000 (Fri, 13 May 2011) Log Message: ----------- [tbsl] TreeTagger rearrangement Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java Copied: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java (from rev 2805, trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java) =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java 2011-05-13 09:59:27 UTC (rev 2806) @@ -0,0 +1,49 @@ +package org.dllearner.algorithm.tbsl.nlp; + +import java.io.IOException; +import java.util.List; +import java.util.Arrays; + +import org.annolab.tt4j.TokenHandler; +import org.annolab.tt4j.TreeTaggerException; +import org.annolab.tt4j.TreeTaggerWrapper; + +public class TreeTagger implements PartOfSpeechTagger { + + TreeTaggerWrapper<String> tt; + + public TreeTagger() throws IOException { + System.setProperty("treetagger.home","/home/christina/Software/TreeTagger"); + tt = new TreeTaggerWrapper<String>(); + tt.setModel("/home/christina/Software/TreeTagger/lib/english.par:iso8859-1"); + } + + public String tag(String s) { + + List<String> input = Arrays.asList(s.split(" ")); + try { + tt.setHandler(new TokenHandler<String>() { + public void token(String token, String pos, String lemma) { + System.out.println(token+"/"+pos+"/"+lemma); + } + }); + System.out.println("Tagged with TreeTagger:\n"); + tt.process(input); + System.out.println(tt.getStatus()); + } catch (IOException e) { + e.printStackTrace(); + } catch (TreeTaggerException e) { + e.printStackTrace(); + } + finally { + tt.destroy(); + } + return ""; + } + + @Override + public List<String> tagTopK(String sentence) { + // TODO Auto-generated method stub + return null; + } +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/TreeTagger.java ___________________________________________________________________ Added: svn:mime-type + text/plain Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java 2011-05-13 09:56:20 UTC (rev 2805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java 2011-05-13 09:59:27 UTC (rev 2806) @@ -1,54 +0,0 @@ -package org.dllearner.algorithm.tbsl.templator; - -import java.io.IOException; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import edu.stanford.nlp.ling.HasWord; -import edu.stanford.nlp.ling.TaggedWord; -import edu.stanford.nlp.tagger.maxent.MaxentTagger; - -public class POStagger { - - String taggermodel; - MaxentTagger tagger; - - public POStagger(String s) throws IOException, ClassNotFoundException { - taggermodel = s; - tagger = new MaxentTagger(taggermodel); - } - public POStagger() throws IOException, ClassNotFoundException { - taggermodel = "src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; - //taggermodel = "src/main/resources/tbsl/models/left3words-wsj-0-18.tagger"; - tagger = new MaxentTagger(taggermodel); - } - - public void setPOStaggerModel(String s) throws IOException, ClassNotFoundException { - taggermodel = s; - tagger = new MaxentTagger(taggermodel); - } - - public String tag(String s) { - - String out = ""; - - ArrayList<TaggedWord> tagged = new ArrayList<TaggedWord>(); - - StringReader reader = new StringReader(s); - List<ArrayList<? extends HasWord>> text = tagger.tokenizeText(reader); - - if (text.size() == 1) { - tagged = tagger.processSentence(text.get(0)); - } - - - for (TaggedWord t : tagged) { - out += " " + t.toString(); - } - - return out.trim(); - - } - -} Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java 2011-05-13 09:56:20 UTC (rev 2805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java 2011-05-13 09:59:27 UTC (rev 2806) @@ -1,38 +0,0 @@ -package org.dllearner.algorithm.tbsl.templator; - -import java.io.IOException; -import java.util.List; -import java.util.Arrays; - -import org.annolab.tt4j.TokenHandler; -import org.annolab.tt4j.TreeTaggerException; -import org.annolab.tt4j.TreeTaggerWrapper; - -public class TreeTagger { - - TreeTaggerWrapper<String> tt; - - public TreeTagger() throws IOException { - System.setProperty("treetagger.home","/home/christina/Software/TreeTagger"); - tt = new TreeTaggerWrapper<String>(); - tt.setModel("/home/christina/Software/TreeTagger/lib/english.par:iso8859-1"); - } - - public void tagthis(String s) throws IOException, TreeTaggerException { - - List<String> input = Arrays.asList(s.split(" ")); - try { - tt.setHandler(new TokenHandler<String>() { - public void token(String token, String pos, String lemma) { - System.out.println(token+"/"+pos+"/"+lemma); - } - }); - System.out.println("Tagged with TreeTagger:\n"); - tt.process(input); - System.out.println(tt.getStatus()); - } - finally { - tt.destroy(); - } - } -} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-13 09:56:20 UTC (rev 2805) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-13 09:59:27 UTC (rev 2806) @@ -7,7 +7,7 @@ import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.templator.TreeTagger; +import org.dllearner.algorithm.tbsl.nlp.TreeTagger; public class POStest { @@ -22,7 +22,7 @@ System.out.println(tagged + "\n"); TreeTagger tt = new TreeTagger(); - tt.tagthis(sentence); + tt.tag(sentence); tagger = new ApachePartOfSpeechTagger(); startTime = System.currentTimeMillis(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |