From: <chr...@us...> - 2011-12-19 14:20:40
|
Revision: 3508 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3508&view=rev Author: christinaunger Date: 2011-12-19 14:20:34 +0000 (Mon, 19 Dec 2011) Log Message: ----------- [tbsl] parts of the user input enclosed in double quotes is treated as named entity (jetzt aber wirklich!) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -226,9 +226,21 @@ String flat = s; Matcher m; - Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); - Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); - Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + Pattern quotePattern1 = Pattern.compile("``/``(\\s)?(\\w+(/\\w+\\s)).*''/''"); + Pattern quotePattern2 = Pattern.compile("(``/``((.*)_)''/'')"); + Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); + Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); + Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + + m = quotePattern1.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(3),"_"); + m = quotePattern1.matcher(flat); + } + m = quotePattern2.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(2),m.group(3)+"/NNP"); + } m = nnpPattern.matcher(flat); while (m.find()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -14,6 +14,7 @@ import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; @@ -31,6 +32,9 @@ Parser p; Preprocessor pp; + DUDE2UDRS_Converter d2u; + DRS2BasicSPARQL_Converter d2s; + boolean ONE_SCOPE_ONLY = true; public boolean UNTAGGED_INPUT = true; @@ -42,8 +46,8 @@ g = LTAG_Constructor.construct(grammarFiles); -// tagger = new StanfordPartOfSpeechTagger(); - tagger = new ApachePartOfSpeechTagger(); + tagger = new StanfordPartOfSpeechTagger(); +// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; @@ -52,6 +56,9 @@ p.MODE = "BASIC"; pp = new Preprocessor(false); + + d2u = new DUDE2UDRS_Converter(); + d2s = new DRS2BasicSPARQL_Converter(); } public void setUNTAGGED_INPUT(boolean b) { @@ -59,9 +66,7 @@ } public Set<BasicQueryTemplate> buildBasicQueries(String s) { - - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2BasicSPARQL_Converter d2s = new DRS2BasicSPARQL_Converter(); + boolean clearAgain = true; String tagged; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -12,7 +12,7 @@ public class TestFrontend { static String[] GRAMMAR_FILES = {"src/main/resources/lexicon/english.lex"}; - static boolean BASIC_MODE = false; // true for BASIC mode, false for LEIPZIG mode + static boolean BASIC_MODE = true; // true for BASIC mode, false for LEIPZIG mode public static void main(String[] args) { @@ -32,7 +32,13 @@ if (BASIC_MODE) { for (BasicQueryTemplate temp : handler.buildBasicTemplates(s)) { - System.out.println(temp.toString()); + try { + System.out.println(temp.toString()); + } + catch (NullPointerException e) { + System.err.println("NullPointer in BasicQueryTemplate!"); + continue; + } } } else { for (Template temp : handler.buildTemplates(s)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |