From: <chr...@us...> - 2011-05-06 16:02:36
|
Revision: 2784 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2784&view=rev Author: christinaunger Date: 2011-05-06 16:02:30 +0000 (Fri, 06 May 2011) Log Message: ----------- Update: SPARQL Template Generation Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -47,17 +47,22 @@ String condensedstring = taggedstring; Matcher m; - Pattern compAdjPattern = Pattern.compile("\\s(\\w+/RBR.([a-zA-Z_0-9]+)/JJ)"); - Pattern superAdjPattern = Pattern.compile("\\s(\\w+/RBS.([a-zA-Z_0-9]+)/JJ)"); - Pattern howAdjPattern = Pattern.compile("\\s(\\w+/WRB.([a-zA-Z_0-9]+)/JJ)"); - Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NNS?.of/IN)"); - Pattern passivePattern1 = Pattern.compile("(((has)|(have)|(had))/VB[A-Z]?.been/VBN.(\\w+)/VBN.by/IN)"); - Pattern passivePattern2 = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); - Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); - Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); - Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); - Pattern gerundinPattern = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)"); - Pattern vprepPattern = Pattern.compile("\\s((\\w+)/V[A-Z]+\\s\\w+/IN)"); + Pattern compAdjPattern = Pattern.compile("\\s(\\w+/RBR.([a-zA-Z_0-9]+)/JJ)"); + Pattern superAdjPattern = Pattern.compile("\\s(\\w+/RBS.([a-zA-Z_0-9]+)/JJ)"); + Pattern howAdjPattern = Pattern.compile("\\s(\\w+/WRB.([a-zA-Z_0-9]+)/JJ)"); + Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NNS?.of/IN)"); + Pattern didPattern = Pattern.compile("(?i)(\\s((did)|(do)|(does))/VB.?)\\s"); + Pattern passivePattern1a = Pattern.compile("(((has)|(have)|(had))/VB[A-Z]?.been/VBN.(\\w+)/VBN.by/IN)"); + Pattern passivePattern1b = Pattern.compile("(\\s((has)|(have)|(had))/VB[A-Z]?(.+\\s)been/VBN\\s(\\w+)/VB(N|D))"); + Pattern passivePattern2a = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); + Pattern passivePattern2b = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)(\\s\\w+)/VB(N|D))"); + Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); + Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); + Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); + Pattern gerundinPattern = Pattern.compile("\\s((\\w+)/((VBG)|(VBN)).\\w+/IN)"); + Pattern vprepPattern = Pattern.compile("\\s((\\w+)/V[A-Z]+\\s\\w+/(IN|TO))"); + Pattern whenPattern = Pattern.compile("(?i)(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); + Pattern wherePattern = Pattern.compile("(?i)(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); m = compAdjPattern.matcher(condensedstring); while (m.find()) { @@ -75,14 +80,26 @@ while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NPREP"); } - m = passivePattern1.matcher(condensedstring); + m = didPattern.matcher(condensedstring); while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),""); + } + m = passivePattern1a.matcher(condensedstring); + while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE"); } - m = passivePattern2.matcher(condensedstring); + m = passivePattern1b.matcher(condensedstring); while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6) + m.group(7)+"/PASSIVE"); + } + m = passivePattern2a.matcher(condensedstring); + while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } + m = passivePattern2b.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7) + m.group(8)+"/PASSIVE"); + } m = passpartPattern.matcher(condensedstring); while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/PASSPART"); @@ -103,47 +120,49 @@ while (m.find()) { condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP"); } + m = whenPattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN"); + } + m = wherePattern.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHERE"); + } return condensedstring; } - public static List<Pair<String,String>> condenseNominalPhrases(List<Pair<String,String>> tokenPOSpairs){ - List<Pair<String,String>> test = new ArrayList<Pair<String,String>>(); + public static List<Pair<String,String>> condenseNominalPhrases(List<Pair<String,String>> tokenPOSpairs) { - String nounPhrase = ""; - String phraseTag = ""; - for(Pair<String,String> pair : tokenPOSpairs){ - if(pair.snd.startsWith("NNP")){ - if(phraseTag.equals("NN")){ - if(!nounPhrase.isEmpty()){ - test.add(new Pair<String, String>(phraseTag.trim(), "NN")); - nounPhrase = ""; - } - } - phraseTag = "NNP"; - nounPhrase += " " + pair.fst; - } else if(pair.snd.startsWith("NN")){ - if(phraseTag.equals("NNP")){ - if(!nounPhrase.isEmpty()){ - test.add(new Pair<String, String>(phraseTag.trim(), "NNP")); - nounPhrase = ""; - } - } - phraseTag = "NN"; - nounPhrase += " " + pair.fst; - } else { - if(!nounPhrase.isEmpty()){ - test.add(new Pair<String, String>(nounPhrase.trim(), phraseTag)); - nounPhrase = ""; - } - test.add(pair); - } + List<Pair<String,String>> out = new ArrayList<Pair<String,String>>(); + + String flat = ""; + for (Pair<String,String> p : tokenPOSpairs) { + flat += " " + p.fst.trim() + "/" + p.snd.trim(); } - if(!nounPhrase.isEmpty()){ - test.add(new Pair<String, String>(nounPhrase.trim(), phraseTag)); - nounPhrase = ""; + flat = flat.trim(); + + Matcher m; + Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?"); + Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?"); + + m = nnpPattern.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); } + m = nnPattern.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); + } - return test; + System.out.println("NNP stuff: " + flat); + + String[] flatParts = flat.split(" "); + for (String part : flatParts) { + System.out.println(part); + out.add(new Pair<String,String>(part.substring(0,part.indexOf("/")).replaceAll("_"," "), part.substring(part.indexOf("/")+1))); + } + + return out; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/POStagger.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -20,6 +20,7 @@ } public POStagger() throws IOException, ClassNotFoundException { taggermodel = "src/main/resources/tbsl/models/bidirectional-distsim-wsj-0-18.tagger"; + //taggermodel = "src/main/resources/tbsl/models/left3words-wsj-0-18.tagger"; tagger = new MaxentTagger(taggermodel); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -11,7 +11,7 @@ private WordNet wordnet; private String[] noun = {"NN","NNS","NNP","NNPS","NPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; - private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP"}; + private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; private String[] preps = {"IN"}; public SlotBuilder() { @@ -50,7 +50,9 @@ } List<String> words = new ArrayList<String>(); words.add(token); - words.addAll(wordnet.getBestSynonyms(token)); + if (!pos.equals("NNP") && !pos.equals("NNPS")) { + words.addAll(wordnet.getBestSynonyms(token)); + } String tokenfluent = token.replaceAll(" ",""); String slotX = "x/" + type + "/"; @@ -125,10 +127,14 @@ } } if (pos.equals("PASSIVE")) { - String[] passEntry = {token, + String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; - result.add(passEntry); + String[] passEntry2 = {token, + "(S DP[wh] (VP DP[dp] V:'" + token + "'))", + "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(y,x) ] ],[(l2,x,wh,<<e,t>,t>),(l3,y,dp,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry1); + result.add(passEntry2); } else if (pos.equals("PASSPART")) { String[] passpartEntry = {token, @@ -164,7 +170,7 @@ "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; result.add(passEntry); } - else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { + else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP") || pos.equals("VB")) { String[] vEntry = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; @@ -176,6 +182,32 @@ "<x,l1,t,[ l1:[ | SLOT_" + token + "(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[ l2=l1 ],[" + slot + "]>"}; result.add(gerEntry); } + else if (pos.equals("WHEN")) { + String dateSlot = "SLOT_" + token + "/PROPERTY/" + token + "Date"; + String tokenSlot = "SLOT_" + token + "/PROPERTY/" + token; + String[] whenEntry1 = {token, + "(S DP[subj] (VP V:'" + token + "'))", + "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + dateSlot + " ]>"}; + String[] whenEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ ?z | SLOT_" + token + "(x,y), SLOT_date(x,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ]," + + "[" + tokenSlot + ", SLOT_date/PROPERTY/date ]>"}; + result.add(whenEntry1); + result.add(whenEntry2); + } + else if (pos.equals("WHERE")) { + String placeSlot = "SLOT_" + token + "/PROPERTY/" + token + "Place"; + String tokenSlot = "SLOT_" + token + "/PROPERTY/" + token; + String[] whereEntry1 = {token, + "(S DP[subj] (VP V:'" + token + "'))", + "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + placeSlot + " ]>"}; + String[] whereEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[|], l4:[ ?z | SLOT_" + token + "(x,y), SLOT_place(x,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ]," + + "[" + tokenSlot + ", SLOT_place/PROPERTY/place ]>"}; + result.add(whereEntry1); + result.add(whereEntry2); + } } /* ADJECTIVES */ Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-05-06 16:02:30 UTC (rev 2784) @@ -1,12 +1,6 @@ -// TEST -//------ - - mushrooms || (NP N:'mushrooms') || <x,l1,<e,t>,[ l1:[ | mushroom(x) ] ], [],[],[]> - - // TO BE -// -------- +// ------ is || (S DP[subject] (VP V:'is' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> is || (S DP[subject] (VP V:'is' ADJ[comp])) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> @@ -24,7 +18,19 @@ is there || (S V:'is' C:'there' DP[dp]) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> +// TO BE: YES/NO QUESTIONS + is || (S V:'is' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + is || (S V:'is' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> + was || (S V:'was' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + was || (S V:'was' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> + are || (S V:'are' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + are || (S V:'are' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> + were || (S V:'were' DP[subject] DP[object]) || <x, l1, t, [ l1:[ | ], l2:[ | x=y ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + were || (S V:'were' DP[subject] ADJ[comp]) || <x, l1, t, [ l1:[ | x=y ]], [ (l2,x,subject,<<e,t>,t>), (l3,y,comp,<e,t>) ], [ l2=l1, l3=l2 ],[]> + + did || (S V:'did' S*) || <x,l1,t,[ l1:[|] ],[],[],[]> + // IMPERATIVES // --------------------- @@ -80,18 +86,23 @@ more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), greater(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> less than || (DP DET:'less' DET:'than' NUM[num] NP[np]) || <x,l1,<<e,t>,t>,[ l1:[ c | count(y,c), less(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> + // EMPTY STUFF +// ------------ also || (VP ADV:'also' VP*) || <x,l1,t,[ l1:[|] ],[],[],[]> + // WH WORDS // -------- - what || (DP WH:'what') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> - which || (DP WH:'which') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> + what || (DP WH:'what') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> + which || (DP WH:'which') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] HOWMANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[]> - who || (DP WH:'who') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> + who || (DP WH:'who') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> + when || (S WH:'when' S[s]) || <x, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/date ]> + where || (S WH:'where' S[s]) || <x, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/place ]> // NEGATION @@ -104,7 +115,7 @@ do not || (VP V:'do' NEG:'not' VP*) || <x,l2,t,[ l1:[ | NOT l2:[|] ] ],[],[],[]> -// CONJUNCTION +// COORDINATION // ------------ and || (S S* CC:'and' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> @@ -113,6 +124,8 @@ and || (VP VP* CC:'and' VP[vp]) || - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - + as well as || (NP NP* CC:'as' CC:'well' CC:'as' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> + or || (S S* CC:'or' S[2]) || - or || (DP DP* CC:'or' DP[2]) || - or || (NP NP* CC:'or' NP[2]) || - Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/POStest.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -8,13 +8,27 @@ public static void main(String[] args) throws IOException, ClassNotFoundException { + POStagger tagger = new POStagger(); - String sentence = "give me all cities in Germany"; + String sentence = "When did Nirvana record Nevermind?"; String tagged = tagger.tag(sentence); System.out.println(tagged); + +// Tagger tagger = new Tagger("en"); +// +// String s = ""; +// +// String[] words = s.split(" "); +// String[] tagged; +// +// tagged = tagger.tag(words); +// +// for (String string : tagged) { +// System.out.println(string); +// } } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -11,13 +11,13 @@ public static void main(String[] args) { // TODO Auto-generated method stub - String s = "was/VBD developed/VBN by/IN"; + String s = "how/WRB many/JJ software/NN companies/NN are/VBP located/VBN in/IN New/NNP York/NNP"; - Pattern nprepPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); + Pattern nprepPattern = Pattern.compile("\\s((\\w+)/NN[S]?\\s(\\w+))/NN[S]?"); Matcher m = nprepPattern.matcher(s); while (m.find()) { System.out.println("Found!"); - s = s.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); + s = s.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3)); } System.out.println(s); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-04 10:00:34 UTC (rev 2783) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateGenerationTest.java 2011-05-06 16:02:30 UTC (rev 2784) @@ -54,7 +54,7 @@ * @param args */ public static void main(String[] args) { - File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); + File file = new File("src/main/resources/tbsl/evaluation/dbpedia-test-questions.xml"); List<String> questions = readQuestions(file); Templator templateGenerator = new Templator(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |