From: <chr...@us...> - 2011-05-13 09:38:56
|
Revision: 2804 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2804&view=rev Author: christinaunger Date: 2011-05-13 09:38:49 +0000 (Fri, 13 May 2011) Log Message: ----------- [tbsl] removed some small bugs, added TreeTagger test, and removed parser redundancies Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TerminalNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Tree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TreeNode.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java trunk/components-ext/src/main/resources/tbsl/lib/ trunk/components-ext/src/main/resources/tbsl/lib/org.annolab.tt4j-1.0.14.jar Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -156,6 +156,7 @@ // add the quantifier at last DiscourseReferent ref = complex.getReferent(); String sref = ref.getValue(); + String fresh; if (!isSilent()) { System.out.print("|quantor:" + quant); } @@ -181,13 +182,15 @@ case SOME: // break; case THE_LEAST: - query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); - query.addOrderBy(new SPARQL_Term(sref, SPARQL_OrderBy.ASC)); + fresh = "c"+createFresh(); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,true, new SPARQL_Term(fresh))); + query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.ASC)); query.setLimit(1); break; case THE_MOST: - query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT)); - query.addOrderBy(new SPARQL_Term(sref, SPARQL_OrderBy.DESC)); + fresh = "c"+createFresh(); + query.addSelTerm(new SPARQL_Term(sref, SPARQL_Aggregate.COUNT,true, new SPARQL_Term(fresh))); + query.addOrderBy(new SPARQL_Term(fresh, SPARQL_OrderBy.DESC)); query.setLimit(1); break; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/FootNode.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -174,6 +174,9 @@ public String getAnchor() { return ""; } + public TreeNode setAnchor(String a) { + return this; + } public Feature getFeature() { return null; @@ -192,4 +195,8 @@ } } + @Override + public void setAnchor(String old_anchor, String new_anchor) { + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/SubstNode.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -163,6 +163,9 @@ public String getAnchor() { return ""; } + public TreeNode setAnchor(String a) { + return this; + } public String getIndex() { return index; @@ -198,4 +201,8 @@ } } + @Override + public void setAnchor(String old_anchor, String new_anchor) { + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TerminalNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TerminalNode.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TerminalNode.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -164,6 +164,12 @@ } return getTerminal()+" "; } + public TreeNode setAnchor(String a) { + if (!getTerminal().equals("")) { + setTerminal(a); + } + return this; + } public void setTerminal(String terminal) { this.terminal = terminal; @@ -190,5 +196,12 @@ return this.getParent().isGovernedBy(cat); } } + + @Override + public void setAnchor(String old_anchor, String new_anchor) { + if (terminal.equals(old_anchor)) { + terminal = new_anchor; + } + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Tree.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Tree.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/Tree.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -240,6 +240,11 @@ return output; } + public void setAnchor(String old_anchor,String new_anchor) { + for (TreeNode child : children) { + child.setAnchor(old_anchor,new_anchor); + } + } public String toString() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TreeNode.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TreeNode.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/data/TreeNode.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -40,6 +40,7 @@ public void setAdjConstraint (boolean x); public String getAnchor(); + public void setAnchor(String old_anchor,String new_anchor); public TreeNode clone(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -25,6 +25,9 @@ class GrammarFilter { final static String[] NAMED_Strings = {"named", "called"}; + // DISAM + static List<Integer> usedInts = new ArrayList<Integer>(); + static ArrayList<String> doubles = new ArrayList<String>(); static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps) { @@ -66,10 +69,29 @@ if (candidates != null) { foundCandidates = true; coveredTokens.add(token); + + // DISAM + String[] tokenParts = token.split(" "); + String[] newTokenParts = new String[tokenParts.length]; + int fresh = createFresh(); + for (int i = 0; i < tokenParts.length; i++) { + newTokenParts[i] = tokenParts[i] + fresh; + } // + for (Pair<Integer,TreeNode> p : candidates) { - add(parseG, p.getSecond(), p.getFirst(), localID); + + // DISAM + TreeNode new_p_second = p.getSecond(); + if (doubles.contains(token)) { + for (int i = 0; i < tokenParts.length; i++) { + new_p_second.setAnchor(tokenParts[i],newTokenParts[i]); + } + } // + + add(parseG, new_p_second, p.getFirst(), localID); localID++; } + doubles.add(token); // DISAM } else if (named != null) { @@ -127,10 +149,27 @@ foundCandidates = true; coveredTokens.add(token); + // DISAM + String[] newTokenParts = new String[tokenParts.length]; + int fresh = createFresh(); + for (int i = 0; i < tokenParts.length; i++) { + newTokenParts[i] = tokenParts[i] + fresh; + } // + for (Pair<Integer, TreeNode> p : grammar.getAnchorToTrees().get(anchor)) { - add(parseG, p.getSecond(), p.getFirst(),localID); + + // DISAM + TreeNode new_p_second = p.getSecond(); + if (doubles.contains(token)) { + for (int i = 0; i < tokenParts.length; i++) { + new_p_second.setAnchor(tokenParts[i],newTokenParts[i]); + } + } // + + add(parseG, new_p_second, p.getFirst(),localID); localID++; } + doubles.add(token); // DISAM } } } @@ -181,9 +220,14 @@ String[] newparts = newtaggedstring.trim().split(" "); for (String s : newparts) { if (s.contains("/")) { - buildSlotFor.add(new Pair<String,String>(s.trim().substring(0,s.indexOf("/")),s.trim().substring(s.indexOf("/")+1))); + String word = s.trim().substring(0,s.indexOf("/")); + if (doubles.contains(word)) { + word += createFresh(); + } + buildSlotFor.add(new Pair<String,String>(word,s.trim().substring(s.indexOf("/")+1))); + doubles.add(word); } else { - System.out.println("Oh no, " + s + " has no POS tag!"); // DEBUG + System.out.println("Oh no, " + s + " has no POS tag!"); } } System.out.println("build slot for: " + buildSlotFor + "\n"); @@ -284,9 +328,17 @@ result.add(s.substring(0,s.indexOf("/"))); } - System.out.println("Word list: " + result); - return result; } + + private static int createFresh() { + + int fresh = 0; + for (int i = 0; usedInts.contains(i); i++) { + fresh = i+1 ; + } + usedInts.add(fresh); + return fresh; + } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -42,14 +42,6 @@ */ public List<DerivationTree> parse(String taggeduserinput, LTAGLexicon grammar) { - String inputNoTags = ""; - for (String s : taggeduserinput.split(" ")) { - inputNoTags += s.substring(0,s.indexOf("/")) + " "; - } - - this.input = ("# ".concat(inputNoTags.trim())).split(" "); - int n = this.input.length; - derivationTrees.clear(); derivedTrees.clear(); dudes.clear(); @@ -63,6 +55,15 @@ */ parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries); + String inputNoTags = ""; + for (String s : taggeduserinput.split(" ")) { + inputNoTags += s.substring(0,s.indexOf("/")) + " "; + } + + this.input = ("# ".concat(inputNoTags.trim())).split(" "); + int n = this.input.length; + + if (SHOW_GRAMMAR) { System.out.println(parseGrammar); } @@ -79,6 +80,7 @@ internalParse(parseGrammar.getDPInitTrees(), n); } + System.out.println("Constructed " + derivationTrees.size() + " derivation trees."); return derivationTrees; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -2,21 +2,17 @@ public class SPARQL_Term extends SPARQL_Value { - SPARQL_OrderBy orderBy; - SPARQL_Aggregate aggregate; + SPARQL_OrderBy orderBy = SPARQL_OrderBy.NONE; + SPARQL_Aggregate aggregate = SPARQL_Aggregate.NONE; SPARQL_Term as = null; public SPARQL_Term(String name) { super(name); this.name = name.replace("?","").replace("!",""); - orderBy = SPARQL_OrderBy.NONE; - aggregate = SPARQL_Aggregate.NONE; } public SPARQL_Term(String name,boolean b) { super(name); this.name = name.replace("?","").replace("!",""); - orderBy = SPARQL_OrderBy.NONE; - aggregate = SPARQL_Aggregate.NONE; setIsVariable(b); } @@ -35,6 +31,12 @@ super(name); this.orderBy = orderBy; } + public SPARQL_Term(String name, SPARQL_OrderBy orderBy,boolean b,SPARQL_Term t) { + super(name); + this.orderBy = orderBy; + setIsVariable(b); + as = t; + } @Override public boolean equals(Object obj) { @@ -76,12 +78,14 @@ } } if (orderBy != SPARQL_OrderBy.NONE) { + String n; + if (as != null) { n = as.name; } else { n = name; } if (orderBy == SPARQL_OrderBy.ASC) - return "ASC(?"+name.toLowerCase()+")"; + return "ASC(?"+n.toLowerCase()+")"; else - return "DESC(?"+name.toLowerCase()+")"; + return "DESC(?"+n.toLowerCase()+")"; } - if (isVariable()) { + if (isVariable() && !isString()) { return "?"+name.toLowerCase(); } else { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-13 09:21:24 UTC (rev 2803) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -48,6 +48,7 @@ else if (pos.equals("NPREP")) { type = "PROPERTY"; } + List<String> words = new ArrayList<String>(); words.add(token); if (!pos.equals("NNP") && !pos.equals("NNPS")) { Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java 2011-05-13 09:38:49 UTC (rev 2804) @@ -0,0 +1,37 @@ +package org.dllearner.algorithm.tbsl.templator; + +import java.io.IOException; +import java.util.List; +import java.util.Arrays; + +import org.annolab.tt4j.TokenHandler; +import org.annolab.tt4j.TreeTaggerException; +import org.annolab.tt4j.TreeTaggerWrapper; + +public class TreeTagger { + + TreeTaggerWrapper<String> tt; + + public TreeTagger() throws IOException { + System.setProperty("treetagger.home","/home/christina/Software/TreeTagger"); + tt = new TreeTaggerWrapper<String>(); + tt.setModel("/home/christina/Software/TreeTagger/lib/english.par:iso8859-1"); + } + + public void tagthis(String s) throws IOException, TreeTaggerException { + + List<String> input = Arrays.asList(s.split(" ")); + try { + tt.setHandler(new TokenHandler<String>() { + public void token(String token, String pos, String lemma) { + System.out.println(token+"/"+pos+"/"+lemma); + } + }); + tt.process(input); + System.out.println(tt.getStatus()); + } + finally { + tt.destroy(); + } + } +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TreeTagger.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/resources/tbsl/lib/org.annolab.tt4j-1.0.14.jar =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/lib/org.annolab.tt4j-1.0.14.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |