From: <chr...@us...> - 2011-05-09 13:37:41
|
Revision: 2787 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2787&view=rev Author: christinaunger Date: 2011-05-09 13:37:35 +0000 (Mon, 09 May 2011) Log Message: ----------- Update SPARQL Template Generation Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserTokenManager.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAG_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ParseException.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/SimpleCharStream.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/Token.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/TokenMgrError.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -27,10 +27,10 @@ final static String[] NAMED_Strings = {"named", "called"}; static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps) { - + SlotBuilder slotbuilder = new SlotBuilder(); - List<String> input = getWordList(taggedinput); + List<String> input = getWordList(taggedinput.trim()); input.add(0,"#"); // This is important. Don't mess with the parser! ParseGrammar parseG = new ParseGrammar(input.size()); @@ -75,7 +75,7 @@ for (Pair<String,String> p : named) { try { - TreeNode tree = c.construct(p.getFirst()); + TreeNode tree = c.construct(p.getFirst().replaceAll("_"," ")); int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree), Collections.singletonList(p.getSecond())); add(parseG, tree, gid-1, localID); @@ -176,7 +176,7 @@ newtaggedstring += part + " "; } } - newtaggedstring = newtaggedstring.trim(); + // build token-POStag-pairs String[] newparts = newtaggedstring.trim().split(" "); for (String s : newparts) { @@ -185,8 +185,7 @@ } else { System.out.println("Oh no, " + s + " has no POS tag!"); // DEBUG } - } - buildSlotFor = Preprocessor.condenseNominalPhrases(buildSlotFor); + } System.out.println("build slot for: " + buildSlotFor + "\n"); List<String[]> entries = slotbuilder.build(taggedinput,buildSlotFor); @@ -282,9 +281,11 @@ List<String> result = new ArrayList<String>(); for (String s : string.split(" ")) { - result.add(s.substring(0,s.indexOf("/"))); + result.add(s.substring(0,s.indexOf("/"))); } + System.out.println("Word list: " + result); + return result; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -132,37 +132,32 @@ return condensedstring; } - public static List<Pair<String,String>> condenseNominalPhrases(List<Pair<String,String>> tokenPOSpairs) { + public static String condenseNominals(String s) { - List<Pair<String,String>> out = new ArrayList<Pair<String,String>>(); - - String flat = ""; - for (Pair<String,String> p : tokenPOSpairs) { - flat += " " + p.fst.trim() + "/" + p.snd.trim(); - } - flat = flat.trim(); + String flat = s; Matcher m; - Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?"); - Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?"); + Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); + Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); + Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); m = nnpPattern.matcher(flat); while (m.find()) { flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); } + m = nnpPattern.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); + } m = nnPattern.matcher(flat); while (m.find()) { flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); } - - System.out.println("NNP stuff: " + flat); - - String[] flatParts = flat.split(" "); - for (String part : flatParts) { - System.out.println(part); - out.add(new Pair<String,String>(part.substring(0,part.indexOf("/")).replaceAll("_"," "), part.substring(part.indexOf("/")+1))); + m = nnnnpPattern.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(1),m.group(2) + "/JJ"); } - - return out; + + return flat; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParser.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParser.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -2,16 +2,11 @@ package org.dllearner.algorithm.tbsl.ltag.reader; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import org.dllearner.algorithm.tbsl.ltag.data.*; +import org.dllearner.algorithm.tbsl.ltag.agreement.*; -import org.dllearner.algorithm.tbsl.ltag.agreement.Feature; -import org.dllearner.algorithm.tbsl.ltag.data.Category; -import org.dllearner.algorithm.tbsl.ltag.data.FootNode; -import org.dllearner.algorithm.tbsl.ltag.data.SubstNode; -import org.dllearner.algorithm.tbsl.ltag.data.TerminalNode; -import org.dllearner.algorithm.tbsl.ltag.data.Tree; -import org.dllearner.algorithm.tbsl.ltag.data.TreeNode; - public class LTAGTreeParser implements LTAGTreeParserConstants { /** Main entry point. */ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserTokenManager.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserTokenManager.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAGTreeParserTokenManager.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -1,5 +1,10 @@ /* Generated By:JavaCC: Do not edit this line. LTAGTreeParserTokenManager.java */ package org.dllearner.algorithm.tbsl.ltag.reader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.dllearner.algorithm.tbsl.ltag.data.*; +import org.dllearner.algorithm.tbsl.ltag.agreement.*; /** Token Manager. */ public class LTAGTreeParserTokenManager implements LTAGTreeParserConstants @@ -100,21 +105,21 @@ switch(jjstateSet[--i]) { case 2: - if ((0x7fffffeL & l) != 0L) + if ((0x7fffffe80000000L & l) != 0L) { + if (kind > 11) + kind = 11; + jjCheckNAdd(0); + } + else if ((0x7fffffeL & l) != 0L) + { if (kind > 12) kind = 12; jjCheckNAdd(1); } - else if ((0x7fffffe00000000L & l) != 0L) - { - if (kind > 11) - kind = 11; - jjCheckNAdd(0); - } break; case 0: - if ((0x7fffffe00000000L & l) == 0L) + if ((0x7fffffe80000000L & l) == 0L) break; kind = 11; jjCheckNAdd(0); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAG_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAG_Parser.jj 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/LTAG_Parser.jj 2011-05-09 13:37:35 UTC (rev 2787) @@ -21,13 +21,13 @@ PARSER_BEGIN(LTAGTreeParser) -package ltag.reader; +package org.dllearner.algorithm.tbsl.ltag.reader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import ltag.data.*; -import ltag.agreement.*; +import org.dllearner.algorithm.tbsl.ltag.data.*; +import org.dllearner.algorithm.tbsl.ltag.agreement.*; public class LTAGTreeParser { @@ -202,7 +202,7 @@ } } -TOKEN: {<WORD: (["a"-"z"]|["0"-"9"]|["?"]|["-"]|["!"]|[","]|[";"]|["."]|[":"]|["/"])+>} +TOKEN: {<WORD: (["a"-"z"]|["0"-"9"]|["?"]|["-"]|["_"]|["!"]|[","]|[";"]|["."]|[":"]|["/"])+>} TOKEN: {<CATEGORY: (["A"-"Z"])+>} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ParseException.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ParseException.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/ParseException.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -184,4 +184,4 @@ } } -/* JavaCC - OriginalChecksum=83ef9865be98df5303271061c019e2cd (do not edit this line) */ +/* JavaCC - OriginalChecksum=3c249cda5771570567da479d4816c9e8 (do not edit this line) */ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/SimpleCharStream.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/SimpleCharStream.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/SimpleCharStream.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -468,4 +468,4 @@ } } -/* JavaCC - OriginalChecksum=a11c4feda26a580a0ed87cfec6fef57f (do not edit this line) */ +/* JavaCC - OriginalChecksum=6f78e8f501021ab7dfa602bb3724055c (do not edit this line) */ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/Token.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/Token.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/Token.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -128,4 +128,4 @@ } } -/* JavaCC - OriginalChecksum=86ab0d8d1ea8fe24b02fb84bc0089ecf (do not edit this line) */ +/* JavaCC - OriginalChecksum=67903b6a4b68c2296c1fa28eb89303c2 (do not edit this line) */ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/TokenMgrError.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/TokenMgrError.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/reader/TokenMgrError.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -144,4 +144,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=a1db3550bf73c10ce8a15134b3c45c2a (do not edit this line) */ +/* JavaCC - OriginalChecksum=c1534bb74977aaf9daef2e021f7f41e9 (do not edit this line) */ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -108,10 +108,18 @@ String[] dpEntry2b = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; + String[] npEntry1 = {token, + "(NP " + treetoken + " DP[pobj])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]>"}; + String[] npEntry2 = {token, + "(NP " + treetoken + " DP[pobj])", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; result.add(dpEntry1a); result.add(dpEntry1b); result.add(dpEntry2a); result.add(dpEntry2b); + result.add(npEntry1); + result.add(npEntry2); } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -59,11 +59,13 @@ s = Preprocessor.normalize(s); String tagged = tagger.tag(s); System.out.println("Tagged input: " + tagged); - tagged = Preprocessor.condense(tagged); - System.out.println("Preprocessed: " + tagged); - p.parse(tagged,g); + String newtagged = Preprocessor.condenseNominals(tagged); + newtagged = Preprocessor.condense(newtagged); + System.out.println("Preprocessed: " + newtagged); + p.parse(newtagged,g); + if (p.getDerivationTrees().isEmpty()) { p.clear(g,p.getTemps()); clearAgain = false; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-09 13:37:35 UTC (rev 2787) @@ -20,8 +20,8 @@ // TO BE: YES/NO QUESTIONS - is || (S V:'is' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> - is || (S V:'is' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> + is || (S (VP V:'is' DP[subject] DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + is || (S (VP V:'is' DP[subject] ADJ[comp])) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> was || (S V:'was' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> was || (S V:'was' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> are || (S V:'are' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> @@ -86,12 +86,15 @@ more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), greater(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> less than || (DP DET:'less' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), less(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> + // HOW + how || (DP DET:'how' ADJ[adj]) || <x,l1,<<e,t>,t>,[ l1:[?x|] ],[ (x,l2,adj,<e,t>) ],[l2=l1],[]> // EMPTY STUFF // ------------ also || (VP ADV:'also' VP*) || <x,l1,t,[ l1:[|] ],[],[],[]> + also || (DP ADV:'also' DP*) || <x,l1,<<e,t>,t>,[ l1:[|] ],[],[],[]> // WH WORDS Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -11,13 +11,13 @@ public static void main(String[] args) { // TODO Auto-generated method stub - String s = "how/WRB many/JJ software/NN companies/NN are/VBP located/VBN in/IN New/NNP York/NNP"; + String s = "New/NNP York/NNP City/NNP is/VBZ a/DT US/NNP state/NN"; - Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NN[S]?\\s(\\w+))/NN[S]?"); + Pattern nprepPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); Matcher m = nprepPattern.matcher(s); while (m.find()) { System.out.println("Found!"); - s = s.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); + s = s.replaceFirst(m.group(1),m.group(2) + "/JJ"); } System.out.println(s); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-09 04:35:54 UTC (rev 2786) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-09 13:37:35 UTC (rev 2787) @@ -54,7 +54,7 @@ * @param args */ public static void main(String[] args) { - File file = new File("src/main/resources/tbsl/evaluation/dbpedia-test-questions.xml"); + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); List<String> questions = readQuestions(file); Templator templateGenerator = new Templator(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |