From: <jen...@us...> - 2011-11-02 14:07:37
|
Revision: 3361 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3361&view=rev Author: jenslehmann Date: 2011-11-02 14:07:30 +0000 (Wed, 02 Nov 2011) Log Message: ----------- - fixed a difficult to find bug in the refinement operator for string datatypes (related to an optimisation using property ranges) - added support for single quoted strings in manchester parser (allows to use strings in conf files) Modified Paths: -------------- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParser.java trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParserTokenManager.java trunk/components-core/src/main/java/org/dllearner/parser/manchester.jj trunk/components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java trunk/components-core/src/test/java/org/dllearner/test/junit/ParserTests.java trunk/examples/nlp2rdf/reuters_gold_vs_copper/learn.conf Modified: trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/main/java/org/dllearner/algorithms/celoe/CELOE.java 2011-11-02 14:07:30 UTC (rev 3361) @@ -453,7 +453,7 @@ // for(Description refinement : refinements) { // System.out.println("refinement: " + refinement); // } -// if(loop % 100 == 0) { +// if((loop+1) % 500 == 0) { // System.out.println(getMinimumHorizontalExpansion() + " - " + getMaximumHorizontalExpansion()); // System.exit(0); // } Modified: trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParser.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParser.java 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParser.java 2011-11-02 14:07:30 UTC (rev 3361) @@ -190,11 +190,6 @@ finally { jj_save(5, xla); } } - private boolean jj_3R_3() { - if (jj_scan_token(STRING)) return true; - return false; - } - private boolean jj_3R_8() { if (jj_scan_token(21)) return true; if (jj_3R_2()) return true; @@ -366,6 +361,11 @@ return false; } + private boolean jj_3R_3() { + if (jj_scan_token(STRING)) return true; + return false; + } + /** Generated Token Manager. */ public ManchesterSyntaxParserTokenManager token_source; SimpleCharStream jj_input_stream; Modified: trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParserTokenManager.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParserTokenManager.java 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/main/java/org/dllearner/parser/ManchesterSyntaxParserTokenManager.java 2011-11-02 14:07:30 UTC (rev 3361) @@ -18,11 +18,11 @@ { case 0: if ((active0 & 0x40000L) != 0L) - return 31; + return 34; return -1; case 1: if ((active0 & 0x40000L) != 0L) - return 31; + return 34; return -1; default : return -1; @@ -83,7 +83,7 @@ if ((active0 & 0x20000L) != 0L) return jjStopAtPos(1, 17); else if ((active0 & 0x40000L) != 0L) - return jjStartNfaWithStates_0(1, 18, 31); + return jjStartNfaWithStates_0(1, 18, 34); break; case 79: return jjMoveStringLiteralDfa2_0(active0, 0xc00L); @@ -220,7 +220,7 @@ private int jjMoveNfa_0(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 31; + jjnewStateCnt = 34; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -235,15 +235,6 @@ { switch(jjstateSet[--i]) { - case 31: - if ((0xbfffffffffffdbffL & l) != 0L) - jjCheckNAddTwoStates(4, 5); - else if (curChar == 62) - { - if (kind > 20) - kind = 20; - } - break; case 0: if ((0x3fe000000000000L & l) != 0L) { @@ -255,15 +246,26 @@ { if (kind > 8) kind = 8; - jjCheckNAdd(28); + jjCheckNAdd(31); } else if (curChar == 47) jjAddStates(3, 5); else if (curChar == 60) + jjCheckNAddTwoStates(7, 8); + else if (curChar == 39) jjCheckNAddTwoStates(4, 5); else if (curChar == 34) jjCheckNAddTwoStates(1, 2); break; + case 34: + if ((0xbfffffffffffdbffL & l) != 0L) + jjCheckNAddTwoStates(7, 8); + else if (curChar == 62) + { + if (kind > 20) + kind = 20; + } + break; case 1: if ((0xfffffffbffffdbffL & l) != 0L) jjCheckNAddTwoStates(1, 2); @@ -273,131 +275,143 @@ kind = 19; break; case 3: - if (curChar == 60) + if (curChar == 39) jjCheckNAddTwoStates(4, 5); break; case 4: - if ((0xbfffffffffffdbffL & l) != 0L) + if ((0xfffffffbffffdbffL & l) != 0L) jjCheckNAddTwoStates(4, 5); break; case 5: + if (curChar == 39 && kind > 19) + kind = 19; + break; + case 6: + if (curChar == 60) + jjCheckNAddTwoStates(7, 8); + break; + case 7: + if ((0xbfffffffffffdbffL & l) != 0L) + jjCheckNAddTwoStates(7, 8); + break; + case 8: if (curChar == 62 && kind > 20) kind = 20; break; - case 6: + case 9: if (curChar == 47) jjAddStates(3, 5); break; - case 7: + case 10: if (curChar == 47) jjCheckNAddStates(6, 8); break; - case 8: + case 11: if ((0xffffffffffffdbffL & l) != 0L) jjCheckNAddStates(6, 8); break; - case 9: + case 12: if ((0x2400L & l) != 0L && kind > 5) kind = 5; break; - case 10: + case 13: if (curChar == 10 && kind > 5) kind = 5; break; - case 11: + case 14: if (curChar == 13) - jjstateSet[jjnewStateCnt++] = 10; + jjstateSet[jjnewStateCnt++] = 13; break; - case 12: + case 15: if (curChar == 42) - jjCheckNAddTwoStates(13, 14); + jjCheckNAddTwoStates(16, 17); break; - case 13: + case 16: if ((0xfffffbffffffffffL & l) != 0L) - jjCheckNAddTwoStates(13, 14); + jjCheckNAddTwoStates(16, 17); break; - case 14: + case 17: if (curChar == 42) jjCheckNAddStates(9, 11); break; - case 15: + case 18: if ((0xffff7bffffffffffL & l) != 0L) - jjCheckNAddTwoStates(16, 14); + jjCheckNAddTwoStates(19, 17); break; - case 16: + case 19: if ((0xfffffbffffffffffL & l) != 0L) - jjCheckNAddTwoStates(16, 14); + jjCheckNAddTwoStates(19, 17); break; - case 17: + case 20: if (curChar == 47 && kind > 6) kind = 6; break; - case 18: + case 21: if (curChar == 42) - jjstateSet[jjnewStateCnt++] = 12; + jjstateSet[jjnewStateCnt++] = 15; break; - case 19: + case 22: if (curChar == 42) - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(23, 24); break; - case 20: + case 23: if ((0xfffffbffffffffffL & l) != 0L) - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(23, 24); break; - case 21: + case 24: if (curChar == 42) jjCheckNAddStates(12, 14); break; - case 22: + case 25: if ((0xffff7bffffffffffL & l) != 0L) - jjCheckNAddTwoStates(23, 21); + jjCheckNAddTwoStates(26, 24); break; - case 23: + case 26: if ((0xfffffbffffffffffL & l) != 0L) - jjCheckNAddTwoStates(23, 21); + jjCheckNAddTwoStates(26, 24); break; - case 24: + case 27: if (curChar == 47 && kind > 7) kind = 7; break; - case 25: + case 28: if ((0x3fe000000000000L & l) == 0L) break; if (kind > 8) kind = 8; jjCheckNAddStates(0, 2); break; - case 26: + case 29: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 8) kind = 8; - jjCheckNAdd(26); + jjCheckNAdd(29); break; - case 27: + case 30: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(27, 28); + jjCheckNAddTwoStates(30, 31); break; - case 28: + case 31: if (curChar != 46) break; if (kind > 9) kind = 9; - jjCheckNAdd(29); + jjCheckNAdd(32); break; - case 29: + case 32: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 9) kind = 9; - jjCheckNAdd(29); + jjCheckNAdd(32); break; - case 30: + case 33: if (curChar != 48) break; if (kind > 8) kind = 8; - jjCheckNAdd(28); + jjCheckNAdd(31); break; default : break; } @@ -410,32 +424,36 @@ { switch(jjstateSet[--i]) { - case 31: - case 4: + case 34: + case 7: if ((0xffffffffefffffffL & l) != 0L) - jjCheckNAddTwoStates(4, 5); + jjCheckNAddTwoStates(7, 8); break; case 1: if ((0xffffffffefffffffL & l) != 0L) jjAddStates(15, 16); break; - case 8: + case 4: + if ((0xffffffffefffffffL & l) != 0L) + jjAddStates(17, 18); + break; + case 11: jjAddStates(6, 8); break; - case 13: - jjCheckNAddTwoStates(13, 14); - break; - case 15: case 16: - jjCheckNAddTwoStates(16, 14); + jjCheckNAddTwoStates(16, 17); break; - case 20: - jjCheckNAddTwoStates(20, 21); + case 18: + case 19: + jjCheckNAddTwoStates(19, 17); break; - case 22: case 23: - jjCheckNAddTwoStates(23, 21); + jjCheckNAddTwoStates(23, 24); break; + case 25: + case 26: + jjCheckNAddTwoStates(26, 24); + break; default : break; } } while(i != startsAt); @@ -448,37 +466,41 @@ { switch(jjstateSet[--i]) { - case 31: - case 4: + case 34: + case 7: if ((jjbitVec0[i2] & l2) != 0L) - jjCheckNAddTwoStates(4, 5); + jjCheckNAddTwoStates(7, 8); break; case 1: if ((jjbitVec0[i2] & l2) != 0L) jjAddStates(15, 16); break; - case 8: + case 4: if ((jjbitVec0[i2] & l2) != 0L) - jjAddStates(6, 8); + jjAddStates(17, 18); break; - case 13: + case 11: if ((jjbitVec0[i2] & l2) != 0L) - jjCheckNAddTwoStates(13, 14); + jjAddStates(6, 8); break; - case 15: case 16: if ((jjbitVec0[i2] & l2) != 0L) - jjCheckNAddTwoStates(16, 14); + jjCheckNAddTwoStates(16, 17); break; - case 20: + case 18: + case 19: if ((jjbitVec0[i2] & l2) != 0L) - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(19, 17); break; - case 22: case 23: if ((jjbitVec0[i2] & l2) != 0L) - jjCheckNAddTwoStates(23, 21); + jjCheckNAddTwoStates(23, 24); break; + case 25: + case 26: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddTwoStates(26, 24); + break; default : break; } } while(i != startsAt); @@ -490,15 +512,15 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 34 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } } } static final int[] jjnextStates = { - 26, 27, 28, 7, 18, 19, 8, 9, 11, 14, 15, 17, 21, 22, 24, 1, - 2, + 29, 30, 31, 10, 21, 22, 11, 12, 14, 17, 18, 20, 24, 25, 27, 1, + 2, 4, 5, }; /** Token literal values. */ @@ -518,8 +540,8 @@ 0xfeL, }; protected SimpleCharStream input_stream; -private final int[] jjrounds = new int[31]; -private final int[] jjstateSet = new int[62]; +private final int[] jjrounds = new int[34]; +private final int[] jjstateSet = new int[68]; protected char curChar; /** Constructor. */ public ManchesterSyntaxParserTokenManager(SimpleCharStream stream){ @@ -546,7 +568,7 @@ { int i; jjround = 0x80000001; - for (i = 31; i-- > 0;) + for (i = 34; i-- > 0;) jjrounds[i] = 0x80000000; } Modified: trunk/components-core/src/main/java/org/dllearner/parser/manchester.jj =================================================================== --- trunk/components-core/src/main/java/org/dllearner/parser/manchester.jj 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/main/java/org/dllearner/parser/manchester.jj 2011-11-02 14:07:30 UTC (rev 3361) @@ -64,7 +64,8 @@ | < NOT: "not" > | < GE: ">=" > | < LE: "<=" > - | < STRING: "\"" (~["\"","\\","\n","\r"])* "\"" > + // support single quotes and double quotes + | < STRING: "\"" (~["\"","\\","\n","\r"])* "\"" | "'" (~["\"","\\","\n","\r"])* "'"> | < URI: "<" (~[">","\\","\n","\r"])* ">" > } Modified: trunk/components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java =================================================================== --- trunk/components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/main/java/org/dllearner/refinementoperators/RhoDRDown.java 2011-11-02 14:07:30 UTC (rev 3361) @@ -145,6 +145,7 @@ private Map<NamedClass, Set<ObjectProperty>> appOP = new TreeMap<NamedClass, Set<ObjectProperty>>(); private Map<NamedClass, Set<DatatypeProperty>> appBD = new TreeMap<NamedClass, Set<DatatypeProperty>>(); private Map<NamedClass, Set<DatatypeProperty>> appDD = new TreeMap<NamedClass, Set<DatatypeProperty>>(); + private Map<NamedClass, Set<DatatypeProperty>> appSD = new TreeMap<NamedClass, Set<DatatypeProperty>>(); // most general applicable properties private Map<NamedClass,Set<ObjectProperty>> mgr = new TreeMap<NamedClass,Set<ObjectProperty>>(); @@ -838,6 +839,7 @@ private void computeTopRefinements(int maxLength, NamedClass domain) { long topComputationTimeStartNs = System.nanoTime(); +// System.out.println("computing top refinements for " + domain + " up to length " + maxLength); if(domain == null && m.size() == 0) computeM(); @@ -957,6 +959,12 @@ topARefinementsLength.put(domain,maxLength); topComputationTimeNs += System.nanoTime() - topComputationTimeStartNs; + +// if(domain == null) { +// System.out.println("computed top refinements: " + topRefinementsCumulative.get(maxLength)); +// } else { +// System.out.println("computed top refinements: " + topARefinementsCumulative.get(domain).get(maxLength)); +// } } // compute M_\top @@ -1046,6 +1054,8 @@ } m.put(4,m4); +// System.out.println("m: " + m); + mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; } @@ -1183,7 +1193,7 @@ } mA.get(nc).put(4,m4); -// System.out.println(mA.get(nc)); +// System.out.println("m for " + nc + ": " + mA.get(nc)); mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs; } @@ -1330,7 +1340,7 @@ private void computeMgsdRecursive(NamedClass domain, Set<DatatypeProperty> currProperties, Set<DatatypeProperty> mgsdTmp) { for(DatatypeProperty prop : currProperties) { - if(appDD.get(domain).contains(prop)) + if(appSD.get(domain).contains(prop)) mgsdTmp.add(prop); else computeMgsdRecursive(domain, reasoner.getSubProperties(prop), mgsdTmp); @@ -1372,7 +1382,19 @@ if(!isDisjoint(domain,d)) applicableDDPs.add(role); } - appDD.put(domain, applicableDDPs); + appDD.put(domain, applicableDDPs); + + // string datatype properties + Set<DatatypeProperty> mostGeneralSDPs = reasoner.getStringDatatypeProperties(); + Set<DatatypeProperty> applicableSDPs = new TreeSet<DatatypeProperty>(); + for(DatatypeProperty role : mostGeneralSDPs) { +// Description d = (NamedClass) rs.getDomain(role); + Description d = reasoner.getDomain(role); +// System.out.println("domain: " + d); + if(!isDisjoint(domain,d)) + applicableSDPs.add(role); + } + appSD.put(domain, applicableSDPs); } // returns true of the intersection contains elements disjoint Modified: trunk/components-core/src/test/java/org/dllearner/test/junit/ParserTests.java =================================================================== --- trunk/components-core/src/test/java/org/dllearner/test/junit/ParserTests.java 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/components-core/src/test/java/org/dllearner/test/junit/ParserTests.java 2011-11-02 14:07:30 UTC (rev 3361) @@ -68,6 +68,8 @@ "(<http://example.com/prop> some (<http://example.com/class1> and <http://example.com/class2>))", // another nested expression "(<http://nlp2rdf.lod2.eu/schema/string/Document> and (<http://nlp2rdf.lod2.eu/schema/string/subStringTrans> some <http://www.w3.org/2002/07/owl#Thing>))", + // a test with a single quoted string + "(<http://nlp2rdf.lod2.eu/schema/string/Document> and (<http://nlp2rdf.lod2.eu/schema/string/subStringTrans> some ( <http://nlp2rdf.lod2.eu/schema/sso/lemma> value 'copper')))" }; // loop through all test cases Modified: trunk/examples/nlp2rdf/reuters_gold_vs_copper/learn.conf =================================================================== --- trunk/examples/nlp2rdf/reuters_gold_vs_copper/learn.conf 2011-11-02 12:46:04 UTC (rev 3360) +++ trunk/examples/nlp2rdf/reuters_gold_vs_copper/learn.conf 2011-11-02 14:07:30 UTC (rev 3361) @@ -36,9 +36,10 @@ alg.type = "celoe" // alg.writeSearchTree = true alg.replaceSearchTree = true -alg.maxExecutionTimeInSeconds = 100 +alg.maxExecutionTimeInSeconds = 10 alg.maxNrOfResults = 10 alg.startClass = "(<http://nlp2rdf.lod2.eu/schema/string/Document> and (<http://nlp2rdf.lod2.eu/schema/string/subStringTrans> some <http://www.w3.org/2002/07/owl#Thing>))" +// solution: +// alg.startClass = "(<http://nlp2rdf.lod2.eu/schema/string/Document> and (<http://nlp2rdf.lod2.eu/schema/string/subStringTrans> some ( <http://nlp2rdf.lod2.eu/schema/sso/lemma> value 'copper')))" - This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |