From: <chr...@us...> - 2011-06-16 13:02:44
|
Revision: 2896 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2896&view=rev Author: christinaunger Date: 2011-06-16 13:02:38 +0000 (Thu, 16 Jun 2011) Log Message: ----------- [tbsl] some small fixes Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train.xml trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-16 10:10:24 UTC (rev 2895) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-16 13:02:38 UTC (rev 2896) @@ -46,6 +46,7 @@ /* condense: * x/RBR adj/JJ > adj/JJR, x/RBS adj/JJ > adj/JJS, x/WRB adj/JJ > x/JJH * nn/RBR of/IN > nn/NPREP + * usw. * */ String condensedstring = taggedstring; Matcher m; @@ -60,8 +61,9 @@ Pattern passivePattern1b = Pattern.compile("(\\s((has)|(have)|(had))/VB[A-Z]?(.+\\s)been/VBN\\s(\\w+)/VB(N|D))"); Pattern passivePattern2a = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.by/IN)"); Pattern pseudopassPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(\\w+)/VBN.\\w+/TO)"); - Pattern pseudopwhPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)\\s(\\w+)/VBN.\\w+/TO)"); - Pattern passivePattern2b = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)(\\s\\w+)/VB(N|D))"); + Pattern pseudopwhPattern = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.(.+)\\s(\\w+)/VB(N|D).\\w+/TO)"); + Pattern saveIsThere = Pattern.compile("((is)|(are))/(VB[A-Z]?).there/(RB)"); + Pattern passivePattern2b = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.((.+)\\s\\w+)/VB(N|D))"); Pattern passpartPattern = Pattern.compile("\\s((\\w+)/VBN.by/IN)"); Pattern vpassPattern = Pattern.compile("\\s(\\w+/VBD.(\\w+)/VBN)"); Pattern vpassinPattern = Pattern.compile("\\s((\\w+)/VPASS.\\w+/IN)"); @@ -74,87 +76,112 @@ m = compAdjPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/JJR"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJR"); } m = superAdjPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/JJS"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJS"); } m = howAdjPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/JJH"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/JJH"); } m = nprepPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/NPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/NPREP"); } m = didPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = prepfrontPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by \"\""); condensedstring = condensedstring.replaceFirst(m.group(1),""); } m = passivePattern1a.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(6)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6)+"/PASSIVE"); } m = passivePattern1b.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(6)+m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(6) + m.group(7)+"/PASSIVE"); } m = passivePattern2a.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = pseudopassPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(7)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/VPREP"); } m = pseudopwhPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(7)+m.group(8)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+" "+m.group(8)+"/VPREP"); } + m = saveIsThere.matcher(condensedstring); + while (m.find()) { + condensedstring = condensedstring.replaceFirst(m.group(4),"LEX").replaceFirst(m.group(5),"LEX"); // TODO what a dirty hack! + } m = passivePattern2b.matcher(condensedstring); while (m.find()) { - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7) + m.group(8)+"/PASSIVE"); + System.out.println("Replacing " + m.group(1) + " by " + m.group(7)+"/PASSIVE"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(7)+"/PASSIVE"); } m = passpartPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/PASSPART"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/PASSPART"); } m = vpassPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASS"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASS"); } m = vpassinPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/VPASSIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPASSIN"); } m = gerundinPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/GERUNDIN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/GERUNDIN"); } m = vprepPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"/VPREP"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"/VPREP"); } m = whenPattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN"); } m = wherePattern.matcher(condensedstring); while (m.find()) { + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHERE"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHERE"); } m = adjnounPattern.matcher(condensedstring); while (m.find()) { - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(4)+"/JJNN"); + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(4)+"/JJNPREP"); + System.out.println("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNPREP"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNPREP"); } return condensedstring; Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml 2011-06-16 10:10:24 UTC (rev 2895) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train-tagged(ideal).xml 2011-06-16 13:02:38 UTC (rev 2896) @@ -19239,7 +19239,7 @@ </answers> </question> <question id="15"> -<string>Which/WDT capitals/NNS in/IN Europe/NNP were/VBD host/NN cities/NNS of/IN the/DT summer/NN olympic/JJ games/NNS</string> +<string>Which/WDT capitals/NNS in/IN Europe/NNP were/VBD host/NN cities/NNS of/IN the/DT Summer/NNP Olympic/NNP Games/NNPS</string> <query> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train.xml =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train.xml 2011-06-16 10:10:24 UTC (rev 2895) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/dbpedia-train.xml 2011-06-16 13:02:38 UTC (rev 2896) @@ -19301,7 +19301,7 @@ </question> <question id="15"> <string> -Which capitals in Europe were host cities of the summer olympic games? +Which capitals in Europe were host cities of the Summer Olympic Games? </string> <query> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-06-16 10:10:24 UTC (rev 2895) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/PatternMatchingTest.java 2011-06-16 13:02:38 UTC (rev 2896) @@ -12,13 +12,13 @@ // TODO Auto-generated method stub String nep = "World"; - String s = "how/WRB many/JJ and/CONJ how/WRB big/JJ"; + String s = "is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP"; - Pattern p = Pattern.compile("(\\w+/WRB.(\\w+)(?<!many)/JJ)"); + Pattern p = Pattern.compile("(((is)|(are)|(was)|(were))/VB[A-Z]?.((.+)\\s\\w+)/VB(N|D))(?<!is/VBZ there/RB.+/VB(N|D))"); Matcher m = p.matcher(s); while (m.find()) { - System.out.println("Found! " + m.group(2)); - s = s.replaceFirst(m.group(2),nep+"/NNP"); + System.out.println("Found! " + m.group(1) + " m.group(7): " + m.group(7)); + s = s.replaceFirst(m.group(1),m.group(7)+"/NNP"); } System.out.println(s); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |