From: <chr...@us...> - 2012-05-09 15:28:37
|
Revision: 3698 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3698&view=rev Author: christinaunger Date: 2012-05-09 15:28:25 +0000 (Wed, 09 May 2012) Log Message: ----------- [tbsl] repaired parser/templator shortcomings (and probably broke something else ;) Modified Paths: -------------- trunk/components-ext/pom.xml trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex Added Paths: ----------- trunk/components-ext/src/main/javacc/ trunk/components-ext/src/main/javacc/DRSParser.jj trunk/components-ext/src/main/javacc/DUDE_Parser.jj trunk/components-ext/src/main/javacc/LTAG_Parser.jj Modified: trunk/components-ext/pom.xml =================================================================== --- trunk/components-ext/pom.xml 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/pom.xml 2012-05-09 15:28:25 UTC (rev 3698) @@ -165,7 +165,22 @@ </dependencies> <build> <plugins> + <!-- <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>javacc-maven-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <id>javacc</id> + <goals> + <goal>javacc</goal> + </goals> + </execution> + </executions> + </plugin> + --> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -236,7 +236,7 @@ Pattern quotePattern2 = Pattern.compile("(``/``((.*)_)''/'')"); Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); - Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+)/NN[S]?)(\\W|$)"); m = quotePattern1.matcher(flat); while (m.find()) { @@ -265,7 +265,7 @@ } m = nnnnpPattern.matcher(flat); while (m.find()) { - flat = flat.replaceFirst(m.group(1),m.group(2) + "/JJ"); + flat = flat.replaceFirst(m.group(1),m.group(2) + "_" + m.group(3) + "/NNP" + m.group(4)); m = nnnnpPattern.matcher(flat); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -209,7 +209,7 @@ jj_consume_token(2); referent = dr(); jj_consume_token(2); - word = jj_consume_token(WORD); + word = word(); jj_consume_token(2); type = Type(); jj_consume_token(8); @@ -237,7 +237,7 @@ type.setResultType(result); {if (true) return type;} } else if (jj_2_15(2)) { - word = jj_consume_token(WORD); + word = word(); ElementaryType type=null; if (word.toString().equals("e")) type = new ElementaryType(ElemType.e); @@ -344,7 +344,7 @@ DRS drs1; DRS drs2; if (jj_2_29(2)) { - predicate = jj_consume_token(WORD); + predicate = word(); jj_consume_token(10); dr_list = DR_List(); jj_consume_token(8); @@ -476,9 +476,9 @@ Token type; SlotType slottype = null; List<String> words = null; - ref = jj_consume_token(WORD); + ref = word(); jj_consume_token(14); - type = jj_consume_token(WORD); + type = word(); jj_consume_token(14); if (jj_2_35(2)) { words = Word_List(); @@ -502,7 +502,7 @@ final public List<String> Word_List() throws ParseException { Token word; List<String> words = null; - word = jj_consume_token(WORD); + word = word(); if (jj_2_36(2)) { jj_consume_token(15); words = Word_List(); @@ -522,19 +522,31 @@ final public Token dr() throws ParseException { Token t; if (jj_2_37(2)) { - t = jj_consume_token(WORD); + t = jj_consume_token(A); } else if (jj_2_38(2)) { - t = jj_consume_token(DR); - } else if (jj_2_39(2)) { - t = jj_consume_token(QUOTED_STRING); + t = jj_consume_token(C); } else { jj_consume_token(-1); throw new ParseException(); } - {if (true) return t;} + {if (true) return t;} throw new Error("Missing return statement in function"); } + final public Token word() throws ParseException { + Token t; + if (jj_2_39(2)) { + t = jj_consume_token(A); + } else if (jj_2_40(2)) { + t = jj_consume_token(B); + } else { + jj_consume_token(-1); + throw new ParseException(); + } + {if (true) return t;} + throw new Error("Missing return statement in function"); + } + private boolean jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { return !jj_3_1(); } @@ -808,6 +820,13 @@ finally { jj_save(38, xla); } } + private boolean jj_2_40(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_40(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(39, xla); } + } + private boolean jj_3_12() { if (jj_scan_token(2)) return true; if (jj_3R_1()) return true; @@ -815,12 +834,12 @@ } private boolean jj_3_16() { - if (jj_3R_7()) return true; + if (jj_3R_8()) return true; return false; } - private boolean jj_3R_12() { - if (jj_scan_token(WORD)) return true; + private boolean jj_3R_13() { + if (jj_3R_7()) return true; Token xsp; xsp = jj_scanpos; if (jj_3_36()) jj_scanpos = xsp; @@ -834,7 +853,7 @@ } private boolean jj_3_32() { - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } @@ -848,13 +867,8 @@ return false; } - private boolean jj_3_39() { - if (jj_scan_token(QUOTED_STRING)) return true; - return false; - } - private boolean jj_3R_1() { - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } @@ -865,38 +879,58 @@ private boolean jj_3_31() { if (jj_scan_token(13)) return true; - if (jj_3R_10()) return true; + if (jj_3R_11()) return true; return false; } - private boolean jj_3R_10() { + private boolean jj_3R_11() { if (jj_scan_token(LABEL)) return true; if (jj_scan_token(11)) return true; return false; } - private boolean jj_3_38() { - if (jj_scan_token(DR)) return true; + private boolean jj_3_40() { + if (jj_scan_token(B)) return true; return false; } - private boolean jj_3R_15() { - if (jj_scan_token(WORD)) return true; + private boolean jj_3R_16() { + if (jj_3R_7()) return true; if (jj_scan_token(14)) return true; return false; } + private boolean jj_3_38() { + if (jj_scan_token(C)) return true; + return false; + } + private boolean jj_3_23() { if (jj_scan_token(MOST)) return true; return false; } + private boolean jj_3_39() { + if (jj_scan_token(A)) return true; + return false; + } + private boolean jj_3_30() { - if (jj_3R_9()) return true; + if (jj_3R_10()) return true; if (jj_scan_token(6)) return true; return false; } + private boolean jj_3R_7() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_39()) { + jj_scanpos = xsp; + if (jj_3_40()) return true; + } + return false; + } + private boolean jj_3_34() { if (jj_scan_token(2)) return true; if (jj_3R_4()) return true; @@ -904,29 +938,26 @@ } private boolean jj_3_37() { - if (jj_scan_token(WORD)) return true; + if (jj_scan_token(A)) return true; return false; } - private boolean jj_3R_9() { + private boolean jj_3R_10() { Token xsp; xsp = jj_scanpos; if (jj_3_37()) { jj_scanpos = xsp; - if (jj_3_38()) { - jj_scanpos = xsp; - if (jj_3_39()) return true; + if (jj_3_38()) return true; } - } return false; } private boolean jj_3R_3() { - if (jj_3R_14()) return true; + if (jj_3R_15()) return true; return false; } - private boolean jj_3R_16() { + private boolean jj_3R_17() { Token xsp; xsp = jj_scanpos; if (jj_3_29()) { @@ -943,19 +974,19 @@ } private boolean jj_3_29() { - if (jj_scan_token(WORD)) return true; + if (jj_3R_7()) return true; if (jj_scan_token(10)) return true; return false; } private boolean jj_3_19() { if (jj_scan_token(2)) return true; - if (jj_3R_8()) return true; + if (jj_3R_9()) return true; return false; } private boolean jj_3_15() { - if (jj_scan_token(WORD)) return true; + if (jj_3R_7()) return true; return false; } @@ -966,7 +997,7 @@ } private boolean jj_3R_4() { - if (jj_3R_15()) return true; + if (jj_3R_16()) return true; return false; } @@ -1009,7 +1040,7 @@ private boolean jj_3_33() { if (jj_scan_token(2)) return true; - if (jj_3R_11()) return true; + if (jj_3R_12()) return true; return false; } @@ -1031,8 +1062,8 @@ return false; } - private boolean jj_3R_8() { - if (jj_3R_16()) return true; + private boolean jj_3R_9() { + if (jj_3R_17()) return true; return false; } @@ -1051,7 +1082,7 @@ return false; } - private boolean jj_3R_13() { + private boolean jj_3R_14() { if (jj_scan_token(10)) return true; if (jj_scan_token(LABEL)) return true; return false; @@ -1063,13 +1094,13 @@ return false; } - private boolean jj_3R_11() { - if (jj_3R_9()) return true; + private boolean jj_3R_12() { + if (jj_3R_10()) return true; return false; } private boolean jj_3_17() { - if (jj_3R_8()) return true; + if (jj_3R_9()) return true; return false; } @@ -1091,19 +1122,19 @@ } private boolean jj_3_35() { - if (jj_3R_12()) return true; + if (jj_3R_13()) return true; return false; } private boolean jj_3_36() { if (jj_scan_token(15)) return true; - if (jj_3R_12()) return true; + if (jj_3R_13()) return true; return false; } private boolean jj_3_18() { if (jj_scan_token(2)) return true; - if (jj_3R_7()) return true; + if (jj_3R_8()) return true; return false; } @@ -1112,7 +1143,7 @@ return false; } - private boolean jj_3R_14() { + private boolean jj_3R_15() { Token xsp; xsp = jj_scanpos; if (jj_3_5()) { @@ -1137,12 +1168,12 @@ } private boolean jj_3R_2() { - if (jj_3R_13()) return true; + if (jj_3R_14()) return true; return false; } - private boolean jj_3R_7() { - if (jj_3R_9()) return true; + private boolean jj_3R_8() { + if (jj_3R_10()) return true; Token xsp; xsp = jj_scanpos; if (jj_3_18()) jj_scanpos = xsp; @@ -1173,7 +1204,7 @@ private static void jj_la1_init_1() { jj_la1_1 = new int[] {}; } - final private JJCalls[] jj_2_rtns = new JJCalls[39]; + final private JJCalls[] jj_2_rtns = new JJCalls[40]; private boolean jj_rescan = false; private int jj_gc = 0; @@ -1357,7 +1388,7 @@ /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[33]; + boolean[] la1tokens = new boolean[34]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -1374,7 +1405,7 @@ } } } - for (int i = 0; i < 33; i++) { + for (int i = 0; i < 34; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; @@ -1401,7 +1432,7 @@ private void jj_rescan_token() { jj_rescan = true; - for (int i = 0; i < 39; i++) { + for (int i = 0; i < 40; i++) { try { JJCalls p = jj_2_rtns[i]; do { @@ -1447,6 +1478,7 @@ case 36: jj_3_37(); break; case 37: jj_3_38(); break; case 38: jj_3_39(); break; + case 39: jj_3_40(); break; } } p = p.next; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -206,7 +206,7 @@ Type type; } { - "(" label=<LABEL> "," referent = dr() "," word=<WORD> "," type = Type() ")" + "(" label=<LABEL> "," referent = dr() "," word=word() "," type = Type() ")" { Argument argument = new Argument(); argument.setLabel(new Label(label.toString())); @@ -234,7 +234,7 @@ | - word = <WORD> + word = word() { ElementaryType type=null; if (word.toString().equals("e")) @@ -330,7 +330,7 @@ } { - predicate=<WORD> "(" dr_list=DR_List() ")" + predicate=word() "(" dr_list=DR_List() ")" { Simple_DRS_Condition condition; @@ -443,7 +443,7 @@ List<String> words = null; } { - ref = <WORD> "/" type = <WORD> "/" (words = Word_List())? + ref = word() "/" type = word() "/" (words = Word_List())? { if (words == null) { @@ -465,7 +465,7 @@ List<String> words = null; } { - word = <WORD> ("^" words = Word_List())? + word = word() ("^" words = Word_List())? { if (words == null) { @@ -498,14 +498,20 @@ TOKEN: {<LABEL: "l"(["0"-"9"])+>} -TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} +//TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} +//TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} -TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} +TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9"])+>} +TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} +TOKEN: {<C: ["?","!"](["a"-"z","A"-"Z","0"-"9"])+>} +Token dr() : { Token t; }{ (t=<A> | t=<C>) { return t; } } +Token word() : { Token t; }{ (t=<A> | t=<B>) { return t; } } + +// Token label() : { Token t; }{ (t=<A> | t=<B> | t=<LABEL>) { return t; } } + TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} -Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } - SKIP : { " " | "\t" | "\n" | "\r" } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserConstants.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -31,11 +31,13 @@ /** RegularExpression Id. */ int LABEL = 25; /** RegularExpression Id. */ - int WORD = 26; + int A = 26; /** RegularExpression Id. */ - int DR = 27; + int B = 27; /** RegularExpression Id. */ - int QUOTED_STRING = 28; + int C = 28; + /** RegularExpression Id. */ + int QUOTED_STRING = 29; /** Lexical state. */ int DEFAULT = 0; @@ -68,8 +70,9 @@ "\"NO\"", "\"HOWMANY\"", "<LABEL>", - "<WORD>", - "<DR>", + "<A>", + "<B>", + "<C>", "<QUOTED_STRING>", "\" \"", "\"\\t\"", Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -23,18 +23,15 @@ switch (pos) { case 0: - if ((active0 & 0x800L) != 0L) - { - jjmatchedKind = 26; - return 2; - } if ((active0 & 0x1ff2280L) != 0L) { jjmatchedKind = 26; - return 8; + return 4; } return -1; case 1: + if ((active0 & 0x802000L) != 0L) + return 4; if ((active0 & 0x17f0280L) != 0L) { if (jjmatchedPos != 1) @@ -42,58 +39,56 @@ jjmatchedKind = 26; jjmatchedPos = 1; } - return 8; + return 4; } - if ((active0 & 0x802000L) != 0L) - return 8; return -1; case 2: if ((active0 & 0x17f0280L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 2; - return 8; + return 4; } if ((active0 & 0x2000L) != 0L) - return 8; + return 4; return -1; case 3: if ((active0 & 0x1190200L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 3; - return 8; + return 4; } if ((active0 & 0x660000L) != 0L) - return 8; + return 4; return -1; case 4: + if ((active0 & 0x10000L) != 0L) + return 4; if ((active0 & 0x1180200L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 4; - return 8; + return 4; } - if ((active0 & 0x10000L) != 0L) - return 8; return -1; case 5: if ((active0 & 0x1180000L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 5; - return 8; + return 4; } return -1; case 6: + if ((active0 & 0x1080000L) != 0L) + return 4; if ((active0 & 0x100000L) != 0L) { jjmatchedKind = 26; jjmatchedPos = 6; - return 8; + return 4; } - if ((active0 & 0x1080000L) != 0L) - return 8; return -1; default : return -1; @@ -217,7 +212,7 @@ return jjMoveStringLiteralDfa3_0(active0, 0x20000L); case 84: if ((active0 & 0x2000L) != 0L) - return jjStartNfaWithStates_0(2, 13, 8); + return jjStartNfaWithStates_0(2, 13, 4); break; case 87: return jjMoveStringLiteralDfa3_0(active0, 0x1000000L); @@ -247,7 +242,7 @@ break; case 69: if ((active0 & 0x40000L) != 0L) - return jjStartNfaWithStates_0(3, 18, 8); + return jjStartNfaWithStates_0(3, 18, 4); break; case 76: return jjMoveStringLiteralDfa4_0(active0, 0x100000L); @@ -257,15 +252,15 @@ return jjMoveStringLiteralDfa4_0(active0, 0x10000L); case 84: if ((active0 & 0x20000L) != 0L) - return jjStartNfaWithStates_0(3, 17, 8); + return jjStartNfaWithStates_0(3, 17, 4); break; case 87: if ((active0 & 0x200000L) != 0L) - return jjStartNfaWithStates_0(3, 21, 8); + return jjStartNfaWithStates_0(3, 21, 4); break; case 89: if ((active0 & 0x400000L) != 0L) - return jjStartNfaWithStates_0(3, 22, 8); + return jjStartNfaWithStates_0(3, 22, 4); break; case 112: return jjMoveStringLiteralDfa4_0(active0, 0x200L); @@ -293,7 +288,7 @@ return jjMoveStringLiteralDfa5_0(active0, 0x80000L); case 89: if ((active0 & 0x10000L) != 0L) - return jjStartNfaWithStates_0(4, 16, 8); + return jjStartNfaWithStates_0(4, 16, 4); break; case 101: return jjMoveStringLiteralDfa5_0(active0, 0x200L); @@ -343,11 +338,11 @@ return jjMoveStringLiteralDfa7_0(active0, 0x100000L); case 84: if ((active0 & 0x80000L) != 0L) - return jjStartNfaWithStates_0(6, 19, 8); + return jjStartNfaWithStates_0(6, 19, 4); break; case 89: if ((active0 & 0x1000000L) != 0L) - return jjStartNfaWithStates_0(6, 24, 8); + return jjStartNfaWithStates_0(6, 24, 4); break; default : break; @@ -367,7 +362,7 @@ { case 84: if ((active0 & 0x100000L) != 0L) - return jjStartNfaWithStates_0(7, 20, 8); + return jjStartNfaWithStates_0(7, 20, 4); break; default : break; @@ -388,7 +383,7 @@ private int jjMoveNfa_0(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 8; + jjnewStateCnt = 11; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -403,33 +398,27 @@ { switch(jjstateSet[--i]) { - case 0: - if ((0x3ff400000000000L & l) != 0L) + case 4: + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); + else if (curChar == 58) + jjCheckNAdd(5); + if ((0x3ff000000000000L & l) != 0L) { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - else if (curChar == 39) - jjCheckNAdd(6); - else if ((0x8000000200000000L & l) != 0L) - jjCheckNAdd(4); - if ((0x400400800000000L & l) != 0L) - { if (kind > 26) kind = 26; jjCheckNAdd(2); } break; - case 8: - if ((0x3ff400000000000L & l) != 0L) + case 0: + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); + else if (curChar == 39) + jjCheckNAdd(9); + else if ((0x8000000200000000L & l) != 0L) + jjCheckNAdd(7); + if ((0x3ff000000000000L & l) != 0L) { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - if ((0x400400800000000L & l) != 0L) - { if (kind > 26) kind = 26; jjCheckNAdd(2); @@ -443,35 +432,46 @@ jjstateSet[jjnewStateCnt++] = 1; break; case 2: - if ((0x400400800000000L & l) == 0L) + if ((0x3ff000000000000L & l) == 0L) break; if (kind > 26) kind = 26; jjCheckNAdd(2); break; case 3: - if ((0x8000000200000000L & l) != 0L) - jjCheckNAdd(4); + if ((0x3ff400800000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); break; - case 4: - if ((0x3ff400000000000L & l) == 0L) + case 5: + if ((0x3ff400800000000L & l) == 0L) break; if (kind > 27) kind = 27; - jjCheckNAdd(4); + jjCheckNAdd(5); break; - case 5: - if (curChar == 39) - jjCheckNAdd(6); - break; case 6: - if ((0xffffff7fffffffffL & l) != 0L) - jjCheckNAddTwoStates(6, 7); + if ((0x8000000200000000L & l) != 0L) + jjCheckNAdd(7); break; case 7: - if (curChar == 39 && kind > 28) + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 28) kind = 28; + jjCheckNAdd(7); break; + case 8: + if (curChar == 39) + jjCheckNAdd(9); + break; + case 9: + if ((0xffffff7fffffffffL & l) != 0L) + jjCheckNAddTwoStates(9, 10); + break; + case 10: + if (curChar == 39 && kind > 29) + kind = 29; + break; default : break; } } while(i != startsAt); @@ -483,51 +483,54 @@ { switch(jjstateSet[--i]) { - case 0: + case 4: if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + if ((0x7fffffe07fffffeL & l) != 0L) { if (kind > 26) kind = 26; jjCheckNAdd(2); } - if ((0x7fffffe07fffffeL & l) != 0L) - { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } - if (curChar == 108) - jjstateSet[jjnewStateCnt++] = 1; break; - case 8: + case 0: if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + if ((0x7fffffe07fffffeL & l) != 0L) { if (kind > 26) kind = 26; jjCheckNAdd(2); } - if ((0x7fffffe07fffffeL & l) != 0L) - { - if (kind > 27) - kind = 27; - jjCheckNAdd(4); - } + if (curChar == 108) + jjstateSet[jjnewStateCnt++] = 1; break; case 2: - if ((0x7fffffe87fffffeL & l) == 0L) + if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 26) kind = 26; jjCheckNAdd(2); break; - case 4: - if ((0x7fffffe07fffffeL & l) == 0L) + case 3: + if ((0x7fffffe87fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); + break; + case 5: + if ((0x7fffffe87fffffeL & l) == 0L) break; if (kind > 27) kind = 27; - jjCheckNAdd(4); + jjstateSet[jjnewStateCnt++] = 5; break; - case 6: + case 7: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 28) + kind = 28; + jjstateSet[jjnewStateCnt++] = 7; + break; + case 9: jjAddStates(0, 1); break; default : break; @@ -542,7 +545,7 @@ { switch(jjstateSet[--i]) { - case 6: + case 9: if ((jjbitVec0[i2] & l2) != 0L) jjAddStates(0, 1); break; @@ -557,14 +560,14 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 8 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 11 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } } } static final int[] jjnextStates = { - 6, 7, + 9, 10, }; /** Token literal values. */ @@ -573,21 +576,21 @@ "\163\143\157\160\145\50", "\50", "\72\133", "\174", "\116\117\124", "\57", "\136", "\105\126\105\122\131", "\115\117\123\124", "\123\117\115\105", "\124\110\105\115\117\123\124", "\124\110\105\114\105\101\123\124", "\101\106\105\127", "\115\101\116\131", "\116\117", -"\110\117\127\115\101\116\131", null, null, null, null, null, null, null, null, }; +"\110\117\127\115\101\116\131", null, null, null, null, null, null, null, null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { "DEFAULT", }; static final long[] jjtoToken = { - 0x1fffffffL, + 0x3fffffffL, }; static final long[] jjtoSkip = { - 0x1e0000000L, + 0x3c0000000L, }; protected SimpleCharStream input_stream; -private final int[] jjrounds = new int[8]; -private final int[] jjstateSet = new int[16]; +private final int[] jjrounds = new int[11]; +private final int[] jjstateSet = new int[22]; protected char curChar; /** Constructor. */ public DUDE_ParserTokenManager(SimpleCharStream stream){ @@ -614,7 +617,7 @@ { int i; jjround = 0x80000001; - for (i = 8; i-- > 0;) + for (i = 11; i-- > 0;) jjrounds[i] = 0x80000000; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-09 15:28:25 UTC (rev 3698) @@ -35,6 +35,14 @@ String type = "UNSPEC"; String slot; + /* PRONOUN HACK */ + if (pos.equals("PRP") || pos.equals("PRP$")) { + String[] pronEntry = {token, + "(DET DET:'" + token.toLowerCase() + "')", + "<x,l1,e,[ l1:[ x | ] ],[],[],[]>"}; + result.add(pronEntry); + } + /* NOUNS */ if (equalsOneOf(pos,noun)) { @@ -329,12 +337,12 @@ slot = "SLOT_" + token + "/PROPERTY/"; String[] npAdjunct = {token, "(NP NP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - // "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + - "<x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,<e,t>,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; String[] vpAdjunct = {token, "(VP VP* (PP P:'" + token.toLowerCase() + "' DP[pobj]))", - // "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + - "<x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; + "<x,l1,t,[ l1:[ | SLOT_" + token + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],["+slot+"]>" + + " ;; <x,l1,t,[ l1:[ | empty(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[]>"}; result.add(npAdjunct); result.add(vpAdjunct); } Added: trunk/components-ext/src/main/javacc/DRSParser.jj =================================================================== --- trunk/components-ext/src/main/javacc/DRSParser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/DRSParser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,259 @@ + +options { + LOOKAHEAD = 2; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(DRSParser) + +package org.dllearner.algorithm.tbsl.sem.drs.reader; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.*; +import org.dllearner.algorithm.tbsl.sem.util.Label; + +public class DRSParser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + DRSParser parser = new DRSParser(System.in); + parser.Input(); + } + +} + +PARSER_END(DRSParser) + +/** Root production. */ +void Input() : +{} +{ + DRS() <EOF> +} + +/** DRS */ +DRS DRS() : +{ + Set<DiscourseReferent> dr_set = null; + Set<DRS_Condition> conditions = null; + DRS drs; + Token label = null; + +} +{ + (label=<LABEL> ":")? "[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" + { + if (dr_set == null) + { + dr_set = new HashSet<DiscourseReferent>(); + } + drs = new DRS(); + if (label != null) + { + drs.setLabel(label.toString()); + } + drs.setDiscourseReferents(dr_set); + if (conditions != null) + { + drs.setDRSConditions(conditions); + } + return drs; + } +} + +/** DR_Set*/ +Set<DiscourseReferent> DR_Set() : +{ + Token dr; + Set<DiscourseReferent> dr_set=null; +} +{ + dr = dr() ("," dr_set=DR_Set())? + { + if (dr_set == null) + { + dr_set= new HashSet<DiscourseReferent>(); + } + if (dr.toString().startsWith("?")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("!")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else + { + dr_set.add(new DiscourseReferent(dr.toString(),false,false)); + } + return dr_set; + } +} + +Set<DRS_Condition> Condition_List() : +{ + DRS_Condition condition= null; + Set<DRS_Condition> conditions = null; +} +{ + condition=Condition() ("," conditions=Condition_List())? + { + if (conditions == null) + { + conditions = new HashSet<DRS_Condition>(); + } + conditions.add(condition); + return conditions; + } +} + +DRS_Condition Condition() : +{ + List<DiscourseReferent> dr_list; + Token dr1; + Token dr2; + Token dr; + Token predicate; + Token quantifier; + DRS drs1; + DRS drs2; +} +{ + + predicate=<WORD> "(" dr_list=DR_List() ")" + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate(predicate.toString()); + condition.setArguments(dr_list); + return condition; + } + + | + + dr1 = dr() "=" dr2 = dr() + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate("equal"); + condition.addArgument(new DiscourseReferent(dr1.toString())); + condition.addArgument(new DiscourseReferent(dr2.toString())); + return condition; + } + + | + + "NOT" drs1=DRS() + { + Negated_DRS drs = new Negated_DRS(); + drs.setDRS(drs1); + return drs; + } + + | + + drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() + { + Complex_DRS_Condition drs; + drs = new Complex_DRS_Condition(); + drs.setRestrictor(drs1); + drs.setScope(drs2); + drs.setReferent(new DiscourseReferent(dr.toString())); + + if (quantifier.toString().equals("EVERY")) {drs.setQuantifier(DRS_Quantifier.EVERY);} + if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} + if (quantifier.toString().equals("MOST")) {drs.setQuantifier(DRS_Quantifier.MOST);} + if (quantifier.toString().equals("THEMOST")) {drs.setQuantifier(DRS_Quantifier.THEMOST);} + if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} + if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} + if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} + if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} + if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + + return drs; + + } +} + +/** DR_List*/ +List<DiscourseReferent> DR_List() : +{ + Token dr; + List<DiscourseReferent> dr_list=null; +} +{ + dr = dr() ("," dr_list=DR_List())? + { + if (dr_list == null) + { + dr_list= new ArrayList<DiscourseReferent>(); + } + + if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else { + dr_list.add(0,new DiscourseReferent(dr.toString(),false,false)); + } + + return dr_list; + } +} + + +TOKEN: {<EVERY: "EVERY">} + +TOKEN: {<MOST: "MOST">} + +TOKEN: {<SOME: "SOME">} + +TOKEN: {<THEMOST: "THEMOST">} + +TOKEN: {<THELEAST: "THELEAST">} + +TOKEN: {<AFEW: "AFEW">} + +TOKEN: {<MANY: "MANY">} + +TOKEN: {<NO: "NO">} + +TOKEN: {<HOWMANY: "HOWMANY">} + +TOKEN: {<LABEL: "l"(["0"-"9"])+>} + +TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":"])+>} + +TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} + +TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} + +Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } + +SKIP : { " " | "\t" | "\n" | "\r" } + + Property changes on: trunk/components-ext/src/main/javacc/DRSParser.jj ___________________________________________________________________ Added: svn:executable + * Copied: trunk/components-ext/src/main/javacc/DUDE_Parser.jj (from rev 3694, trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj) =================================================================== --- trunk/components-ext/src/main/javacc/DUDE_Parser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/DUDE_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,513 @@ + +options { + LOOKAHEAD = 2; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(DUDE_Parser) + +package org.dllearner.algorithm.tbsl.sem.dudes.reader; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.dllearner.algorithm.tbsl.sem.drs.*; +import org.dllearner.algorithm.tbsl.sem.dudes.data.*; +import org.dllearner.algorithm.tbsl.sem.util.*; +import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; + +public class DUDE_Parser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + DUDE_Parser parser = new DUDE_Parser(System.in); + parser.Input(); + } + +} + +PARSER_END(DUDE_Parser) + +/** Root production. */ +void Input() : +{} +{ + DUDE() <EOF> +} + +/** DUDE */ +Dude DUDE() : +{ + Token referent; + Token label; + Type type; + List<DRS> drs_list = null; + List<DominanceConstraint> constraints = null; + List<Argument> arg_list = null; + List<Slot> slots = null; +} +{ + "<" referent = dr() "," label=<LABEL> "," type=Type() "," "[" (drs_list=DRS_List())? "]" "," "[" (arg_list = Arg_List())? "]" + "," "[" (constraints = DC_List())? "]" "," "[" (slots = Slot_List())? "]" ">" + { + Dude dude = new Dude(); + dude.setReferent(referent.toString()); + dude.setType(type); + if (drs_list != null) dude.setComponents(drs_list); + dude.setLabel(new Label(label.toString())); + if (arg_list != null) dude.setArguments(arg_list); + if (constraints != null) dude.setDominanceConstraints(constraints); + if (slots != null) dude.setSlots(slots); + return dude; + } +} + +DominanceConstraint DominanceConstraint() : +{ + Label label1; + Label label2; + Token domType; + DominanceConstraint dc = null; +} +{ + label1 = Label() "<" label2 = Label() + { + dc = new DominanceConstraint(label1,label2); + return dc; + } + + | + + label1 = Label() ">" label2 = Label() + { + dc = new DominanceConstraint(label2,label1); + return dc; + } + + | + + label1 = Label() "=" label2 = Label() + { + dc = new DominanceConstraint(label1,label2); + dc.setType(DomType.equal); + return dc; + } +} + + +Label Label() : +{ + Token label; +} +{ + + label = <LABEL> + { + return new Label(label.toString()); + } + + | + + "res(" label = <LABEL> ")" + { + return new Label(label.toString(),Position.res); + } + + | + + "scope(" label = <LABEL> ")" + { + return new Label(label.toString(),Position.scope); + } +} + + +List<DominanceConstraint> DC_List() : +{ +List<DominanceConstraint> dc_list = null; +DominanceConstraint dc = null; +} +{ + dc = DominanceConstraint() ("," dc_list = DC_List())? + { + if (dc_list == null) + { + dc_list = new ArrayList<DominanceConstraint>(); + } + + dc_list.add(0,dc); + return dc_list; + } + + +} + + +List<DRS> DRS_List() : +{ + DRS drs; + List<DRS> drs_list = null; +} +{ + drs = DRS() ("," drs_list = DRS_List())? + { + if (drs_list == null) + { + drs_list = new ArrayList<DRS>(); + } + + drs_list.add(0,drs); + return drs_list; + } +} + +List<Argument> Arg_List() : +{ + Argument argument; + List<Argument> arg_list = null; +} +{ + argument = Argument() ("," arg_list = Arg_List())? + { + if (arg_list == null) + { + arg_list = new ArrayList<Argument>(); + } + + arg_list.add(0,argument); + return arg_list; + } + +} + +Argument Argument() : +{ + Token label; + Token word; + Token referent; + Type type; +} +{ + "(" label=<LABEL> "," referent = dr() "," word=word() "," type = Type() ")" + { + Argument argument = new Argument(); + argument.setLabel(new Label(label.toString())); + argument.setReferent(referent.toString()); + argument.setAnchor(word.toString()); + argument.setType(type); + return argument; + } +} + +Type Type() : +{ + Type argument; + Type result; + Token word; +} +{ + "<" argument = Type() "," result = Type() ">" + { + CompositeType type = new CompositeType(); + type.setArgumentType(argument); + type.setResultType(result); + return type; + } + + | + + word = word() + { + ElementaryType type=null; + if (word.toString().equals("e")) + type = new ElementaryType(ElemType.e); + if (word.toString().equals("t")) + type = new ElementaryType(ElemType.t); + return type; + } +} + + +/** DRS */ +DRS DRS() : +{ + Set<DiscourseReferent> dr_set = null; + Set<DRS_Condition> conditions = null; + DRS drs; + Token label; + +} +{ + label=<LABEL> ":[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" + { + if (dr_set == null) + { + dr_set = new HashSet<DiscourseReferent>(); + } + drs = new DRS(); + drs.setLabel(label.toString()); + drs.setDiscourseReferents(dr_set); + if (conditions != null) + drs.setDRSConditions(conditions); + return drs; + } +} + +/** DR_Set*/ +Set<DiscourseReferent> DR_Set() : +{ + Token dr; + Set<DiscourseReferent> dr_set=null; +} +{ + dr = dr() ("," dr_set=DR_Set())? + { + if (dr_set == null) + { + dr_set= new HashSet<DiscourseReferent>(); + } + if (dr.toString().startsWith("?")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("!")) + { + dr_set.add(new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else + { + dr_set.add(new DiscourseReferent(dr.toString(),false,false)); + } + return dr_set; + } +} + +Set<DRS_Condition> Condition_List() : +{ + DRS_Condition condition= null; + Set<DRS_Condition> conditions = null; +} +{ + condition=Condition() ("," conditions=Condition_List())? + { + if (conditions == null) + { + conditions = new HashSet<DRS_Condition>(); + } + conditions.add(condition); + return conditions; + } +} + +DRS_Condition Condition() : +{ + List<DiscourseReferent> dr_list; + Token dr1; + Token dr2; + Token dr; + Token predicate; + Token quantifier; + DRS drs1; + DRS drs2; +} +{ + + predicate=word() "(" dr_list=DR_List() ")" + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate(predicate.toString()); + condition.setArguments(dr_list); + return condition; + } + + | + + dr1 = dr() "=" dr2 = dr() + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate("equal"); + condition.addArgument(new DiscourseReferent(dr1.toString())); + condition.addArgument(new DiscourseReferent(dr2.toString())); + return condition; + } + + | + + "NOT" drs1=DRS() + { + Negated_DRS drs = new Negated_DRS(); + drs.setDRS(drs1); + return drs; + } + + | + + drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() + { + Complex_DRS_Condition drs; + drs = new Complex_DRS_Condition(); + drs.setRestrictor(drs1); + drs.setScope(drs2); + drs.setReferent(new DiscourseReferent(dr.toString())); + + if (quantifier.toString().equals("EVERY")) {drs.setQuantifier(DRS_Quantifier.EVERY);} + if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} + if (quantifier.toString().equals("MOST")) {drs.setQuantifier(DRS_Quantifier.MOST);} + if (quantifier.toString().equals("THEMOST")) {drs.setQuantifier(DRS_Quantifier.THEMOST);} + if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} + if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} + if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} + if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} + if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + + return drs; + + } +} + +/** DR_List*/ +List<DiscourseReferent> DR_List() : +{ + Token dr; + List<DiscourseReferent> dr_list=null; +} +{ + dr = dr() ("," dr_list=DR_List())? + { + if (dr_list == null) + { + dr_list= new ArrayList<DiscourseReferent>(); + } + + if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),true,false)); + } + else if (dr.toString().startsWith("?")) { + dr_list.add(0,new DiscourseReferent(dr.toString().substring(1),false,true)); + } + else { + dr_list.add(0,new DiscourseReferent(dr.toString(),false,false)); + } + + return dr_list; + } +} + + +List<Slot> Slot_List() : +{ + Slot slot; + List<Slot> slots = null; +} +{ + slot = Slot() ("," slots = Slot_List())? + { + if (slots == null) + { + slots = new ArrayList<Slot>(); + } + + slots.add(slot); + return slots; + } +} + +Slot Slot() : +{ + Token ref; + Token type; + SlotType slottype = null; + List<String> words = null; +} +{ + ref = word() "/" type = word() "/" (words = Word_List())? + { + if (words == null) + { + words = new ArrayList<String>(); + } + if (type.toString().equals("CLASS")) { slottype = SlotType.CLASS; } + else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } + else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } + else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } + else { slottype = SlotType.UNSPEC; } + + return new Slot(ref.toString(),slottype,words); + } +} + +List<String> Word_List() : +{ + Token word; + List<String> words = null; +} +{ + word = word() ("^" words = Word_List())? + { + if (words == null) + { + words = new ArrayList<String>(); + } + + words.add(0,word.toString()); + return words; + } +} + + +TOKEN: {<EVERY: "EVERY">} + +TOKEN: {<MOST: "MOST">} + +TOKEN: {<SOME: "SOME">} + +TOKEN: {<THEMOST: "THEMOST">} + +TOKEN: {<THELEAST: "THELEAST">} + +TOKEN: {<AFEW: "AFEW">} + +TOKEN: {<MANY: "MANY">} + +TOKEN: {<NO: "NO">} + +TOKEN: {<HOWMANY: "HOWMANY">} + +TOKEN: {<LABEL: "l"(["0"-"9"])+>} + +TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} + +TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} + +TOKEN: {<QUOTED_STRING: "\'" (~["\'"])+ "\'" >} + +Token dr() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } +Token word() : { Token t; }{ (t=<WORD> | t=<DR> | t=<QUOTED_STRING>) { return t; } } + +SKIP : { " " | "\t" | "\n" | "\r" } + + + Added: trunk/components-ext/src/main/javacc/LTAG_Parser.jj =================================================================== --- trunk/components-ext/src/main/javacc/LTAG_Parser.jj (rev 0) +++ trunk/components-ext/src/main/javacc/LTAG_Parser.jj 2012-05-09 15:28:25 UTC (rev 3698) @@ -0,0 +1,212 @@ + +options { + LOOKAHEAD = 5; + CHOICE_AMBIGUITY_CHECK = 2; + OTHER_AMBIGUITY_CHECK = 1; + STATIC = false; + DEBUG_PARSER = false; + DEBUG_LOOKAHEAD = false; + DEBUG_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + JAVA_UNICODE_ESCAPE = false; + UNICODE_INPUT = false; + IGNORE_CASE = false; + USER_TOKEN_MANAGER = false; + USER_CHAR_STREAM = false; + BUILD_PARSER = true; + BUILD_TOKEN_MANAGER = true; + SANITY_CHECK = true; + FORCE_LA_CHECK = false; +} + +PARSER_BEGIN(LTAGTreeParser) + +package org.dllearner.algorithm.tbsl.ltag.reader; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.dllearner.algorithm.tbsl.ltag.data.*; +import org.dllearner.algorithm.tbsl.ltag.agreement.*; + +public class LTAGTreeParser { + + /** Main entry point. */ + public static void main(String args[]) throws ParseException { + LTAGTreeParser parser = new LTAGTreeParser(System.in); + parser.Input(); + } + +} + +PARSER_END(LTAGTreeParser) + +/** Root production. */ +void Input() : +{} +{ + Tree() <EOF> +} + +/** Tree */ +TreeNode Tree() : +{ + Category category; + String terminal = ""; + List<TreeNode> treelist; + Token word; + Feature feature = null; +} + +{ + // SubstNode with case constraints (e.g. DP[subj]|nom) + category = Cat() "[" word=<WORD> "]" ("{" feature=Feat() "}")? + { + SubstNode substnode = new SubstNode(word.toString(),category,feature); + return substnode; + } + + | + + // FootNode (e.g. S*) + category = Cat() "*" + { + FootNode footnode = new FootNode(category); + return footnode; + } + + | + + // FootNode with no adjunction allowed (e.g. ^S*) + "^" category = Cat() "*" + { + FootNode footnode = new FootNode(category); + footnode.setAdjConstraint(true); + return footnode; + } + + | + + // TreeNode which has case feature marked (e.g. (S|nom ...)) + "(" category = Cat() ("{" feature=Feat() "}")? treelist=TreeList() ")" + { + TreeNode tree = new Tree(); + tree.setCategory(category); + tree.setChildren(treelist); + tree.setParentForTree(); + tree.setFeature(feature); + return tree; + } + + | + + // TreeNode with no case feature an no adjunction allowed (e.g. (^S DP...)) + "(" "^" category = Cat() treelist=TreeList() ")" + { + TreeNode tree = new Tree(); + tree.setCategory(category); + tree.setChildren(treelist); + tree.setParentForTree(); + tree.setAdjConstraint(true); + return tree; + } + + | + + // TerminalNode with case feature (e.g. N|nom:'house') + category = Cat() ("{" feature=Feat() "}")? ":" "'" (terminal = Terminal())? "'" + { + TerminalNode node = new TerminalNode(terminal, category); + node.setCategory(category); + node.setFeature(feature); + return node; + } + +} + + +String Terminal() : +{ + Token word; + String terminal=null; +} +{ + word = <WORD> (terminal=Terminal())? + { + if (terminal != null) return word.toString() + " " + terminal; + return word.toString(); + } +} + +List<TreeNode> TreeList() : +{ + List<TreeNode> treelist = null; + TreeNode tree; +} +{ + + tree = Tree() (treelist=TreeList())? + { + if (treelist == null) + { + treelist = new ArrayList<TreeNode>(); + } + + treelist.add(0,tree); + return treelist; + } +} + + +Category Cat() : +{ + Token cat; +} +{ + cat=<CATEGORY> + { + if (cat.toString().equals("DP")) return Category.DP; + if (cat.toString().equals("NP")) return Category.NP; + if (cat.toString().equals("N")) return Category.N; + if (cat.toString().equals("S")) return Category.S; + if (cat.toString().equals("V")) return Category.V; + if (cat.toString().equals("P")) return Category.P; + if (cat.toString().equals("VP")) return Category.VP; + if (cat.toString().equals("PP")) return Category.PP; + if (cat.toString().equals("DET")) return Category.DET; + if (cat.toString().equals("WH")) return Category.WH; + if (cat.toString().equals("ADV")) return Category.ADV; + if (cat.toString().equals("ADJ")) return Category.ADJ; + if (cat.toString().equals("ADJCOMP")) return Category.ADJCOMP; + if (cat.toString().equals("PART")) return Category.PART; + if (cat.toString().equals("PUNCT")) return Category.PUNCT; + if (cat.toString().equals("CC")) return Category.CC; + if (cat.toString().equals("EX")) return Category.EX; + if (cat.toString().equals("NUM")) return Category.NUM; + if (cat.toString().equals("C")) return Category.C; + if (cat.toString().equals("NEG")) return Category.NEG; + } + +} + +Feature Feat() : +{ + Token raw; +} +{ + + raw=<WORD> + { + + return Feature.construct(raw.toString()); + } +} + +TOKEN: {<WORD: (["a"-"z"]|["0"-"9"]|["?"]|["-"]|["_"]|["!"]|[","]|[";"]|["."]|[":"]|["/"])+>} + +TOKEN: {<CATEGORY: (["A"-"Z"])+>} + +SKIP : { " " | "\t" | "\n" | "\r" } + + + Property changes on: trunk/components-ext/src/main/javacc/LTAG_Parser.jj ___________________________________________________________________ Added: svn:executable + * Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-08 11:53:28 UTC (rev 3697) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-09 15:28:25 UTC (rev 3698) @@ -90,7 +90,7 @@ the least || (DET DET:'the' DET:'least') || <y, l1, e, [ l1:[ | l2:[ y | ] THELEAST y l3:[|] ] ], [], [],[]> // NECESSARY "CHEAT" - highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> ;; <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/PROPERTY/height ]> + highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/PROPERTY/height ]> ;; <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> // COUNT more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count_greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> @@ -125,6 +125,7 @@ what || (DP WH:'what') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> which || (DP WH:'which') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] HOWMANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[]> + how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ ?y | ] ], [ (l4,y,noun,<e,t>) ], [ l4=l1 ],[]> who || (DP WH:'who') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> whom || (DP WH:'whom') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> when || (S WH:'when' S[s]) || <x, l1, t, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/date ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |