From: <chr...@us...> - 2011-05-03 15:27:13
|
Revision: 2781 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2781&view=rev Author: christinaunger Date: 2011-05-03 15:27:07 +0000 (Tue, 03 May 2011) Log Message: ----------- Update SPARQL Template Generation (small fixes) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-03 14:57:22 UTC (rev 2780) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2011-05-03 15:27:07 UTC (rev 2781) @@ -25,7 +25,6 @@ class GrammarFilter { final static String[] NAMED_Strings = {"named", "called"}; - final static String NAME_PREDICATE = "SLOT.pred:title_name"; static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps) { @@ -100,10 +99,10 @@ try { - TreeNode tree = c.construct("(DP NUM:'" + token + "' NP[noun])"); + TreeNode tree = c.construct("NUM:'" + token + "'"); int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree), - Collections.singletonList("<x,l1,<<e,t>,t>,[l1:[ x | count(x,c), equal(c," + token + ")]],[(l2,x,noun,<e,t>)],[l2=l1],[]>")); + Collections.singletonList("<x,l1,e,[l1:[ x | equal(x," + token + ")]],[],[],[]>")); add(parseG, tree, gid-1, localID); localID++; @@ -230,8 +229,9 @@ rawNames += "DP:'" + split[i] + "' "; } semName = semName.substring(1); - out.add(new Pair<String,String>("(NP NP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ | " + NAME_PREDICATE + "(x,'" + semName + "') ] ], [],[],[]>")); - out.add(new Pair<String,String>("(DP DP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<<e,t>,t>,[ l1:[ | " + NAME_PREDICATE + "(x,'" + semName + "') ] ], [],[],[]>")); + out.add(new Pair<String,String>("(NP NP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); + out.add(new Pair<String,String>("(DP DP* ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); + out.add(new Pair<String,String>("(ADJ ADJ:'"+ w +"' " + rawNames + ")", "<x,l1,<e,t>,[ l1:[ | SLOT_title(x,'" + semName + "') ] ], [],[],[ SLOT_title/PROPERTY/title^name ]>")); return out; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-03 14:57:22 UTC (rev 2780) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-03 15:27:07 UTC (rev 2781) @@ -56,6 +56,7 @@ Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); + Pattern gerundinPattern = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)"); Pattern vprepPattern = Pattern.compile("\\s((\\w+)/V[A-Z]+\\s\\w+/IN)"); m = compAdjPattern.matcher(condensedstring); @@ -94,6 +95,10 @@ while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASSIN"); } + m = gerundinPattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/GERUNDIN"); + } m = vprepPattern.matcher(condensedstring); while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-03 14:57:22 UTC (rev 2780) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-03 15:27:07 UTC (rev 2781) @@ -11,7 +11,7 @@ private WordNet wordnet; private String[] noun = {"NN","NNS","NNP","NNPS","NPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; - private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","VPREP"}; + private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP"}; private String[] preps = {"IN"}; public SlotBuilder() { @@ -52,8 +52,9 @@ words.add(token); words.addAll(wordnet.getBestSynonyms(token)); + String tokenfluent = token.replaceAll(" ",""); String slotX = "x/" + type + "/"; - String slotP = "SLOT_" + token + "/" + type + "/"; + String slotP = "SLOT_" + tokenfluent + "/" + type + "/"; for (Iterator<String> i = words.iterator(); i.hasNext();) { String next = i.next().replaceAll(" ","_"); slotX += next; slotP += next; @@ -72,12 +73,12 @@ /* DP */ String[] dpEntry = {token, "(DP (NP " + treetoken + "))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + token + "(x) ] ],[],[],[" + slotP + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotP + "]>"}; result.add(dpEntry); /* NP */ String[] npEntry = {token, "(NP " + treetoken + ")", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x) ] ],[],[],[" + slotP + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotP + "]>"}; result.add(npEntry); } else if (pos.equals("NNP") || pos.equals("NNPS")) { @@ -94,17 +95,17 @@ else if (pos.equals("NPREP")) { /* DP */ String[] dpEntry1a = {token, - "(DP (NP " + treetoken + " P:'of' DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + token + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]>"}; + "(DP (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "]>"}; String[] dpEntry1b = {token, - "(DP (NP " + treetoken + " P:'of' DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + token + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; + "(DP (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; String[] dpEntry2a = {token, - "(DP DET[det] (NP " + treetoken + " P:'of' DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + token + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "]>"}; + "(DP DET[det] (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(y,x) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "]>"}; String[] dpEntry2b = {token, - "(DP DET[det] (NP " + treetoken + " P:'of' DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + token + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; + "(DP DET[det] (NP " + treetoken + " DP[pobj]))", + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), SLOT_of(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slotP + "," + "SLOT_of/PROPERTY/" + "]>"}; result.add(dpEntry1a); result.add(dpEntry1b); result.add(dpEntry2a); @@ -147,6 +148,16 @@ "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); } + else if (pos.equals("GERUNDIN")) { + String[] gerundinEntry1 = {token, + "(NP NP* V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + String[] gerundinEntry2 = {token, + "(ADJ V:'" + token + "' DP[obj]))", + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,obj,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; + result.add(gerundinEntry1); + result.add(gerundinEntry2); + } else if (pos.equals("VPREP")) { String[] passEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-03 14:57:22 UTC (rev 2780) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-03 15:27:07 UTC (rev 2781) @@ -76,8 +76,10 @@ the most || (DET DET:'the' DET:'most') || <y, l1, e, [ l1:[ | l2:[ y | ] THEMOST y l3:[|] ] ], [], [],[]> the least || (DET DET:'the' DET:'least') || <y, l1, e, [ l1:[ | l2:[ y | ] THELEAST y l3:[|] ] ], [], [],[]> + // COUNT more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), greater(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> less than || (DP DET:'less' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), less(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> + // EMPTY STUFF This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |