From: <chr...@us...> - 2012-08-27 12:13:51
|
Revision: 3838 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3838&view=rev Author: christinaunger Date: 2012-08-27 12:13:42 +0000 (Mon, 27 Aug 2012) Log Message: ----------- extension of the final filtering process that throws out those templates that don't make sense Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -504,6 +504,9 @@ else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } + else if (type.toString().equals("STRING")) { slottype = SlotType.STRING; } + else if (type.toString().equals("INTEGER")) { slottype = SlotType.INTEGER; } + else if (type.toString().equals("BOOLEAN")) { slottype = SlotType.BOOLEAN; } else { slottype = SlotType.UNSPEC; } {if (true) return new Slot(ref.toString(),slottype,words);} @@ -884,13 +887,18 @@ return false; } + private boolean jj_3_42() { + if (jj_scan_token(B)) return true; + return false; + } + private boolean jj_3_3() { if (jj_3R_3()) return true; return false; } - private boolean jj_3_42() { - if (jj_scan_token(B)) return true; + private boolean jj_3_40() { + if (jj_scan_token(C)) return true; return false; } @@ -910,11 +918,21 @@ return false; } - private boolean jj_3_40() { - if (jj_scan_token(C)) return true; + private boolean jj_3_41() { + if (jj_scan_token(A)) return true; return false; } + private boolean jj_3R_7() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_41()) { + jj_scanpos = xsp; + if (jj_3_42()) return true; + } + return false; + } + private boolean jj_3R_16() { if (jj_3R_7()) return true; if (jj_scan_token(14)) return true; @@ -927,21 +945,6 @@ return false; } - private boolean jj_3_41() { - if (jj_scan_token(A)) return true; - return false; - } - - private boolean jj_3R_7() { - Token xsp; - xsp = jj_scanpos; - if (jj_3_41()) { - jj_scanpos = xsp; - if (jj_3_42()) return true; - } - return false; - } - private boolean jj_3_39() { if (jj_scan_token(A)) return true; return false; @@ -1131,18 +1134,18 @@ return false; } + private boolean jj_3_38() { + if (jj_scan_token(15)) return true; + if (jj_3R_13()) return true; + return false; + } + private boolean jj_3_7() { if (jj_3R_5()) return true; if (jj_scan_token(6)) return true; return false; } - private boolean jj_3_38() { - if (jj_scan_token(15)) return true; - if (jj_3R_13()) return true; - return false; - } - private boolean jj_3_37() { if (jj_3R_13()) return true; return false; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-08-27 12:13:42 UTC (rev 3838) @@ -458,6 +458,9 @@ else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } + else if (type.toString().equals("STRING")) { slottype = SlotType.STRING; } + else if (type.toString().equals("INTEGER")) { slottype = SlotType.INTEGER; } + else if (type.toString().equals("BOOLEAN")) { slottype = SlotType.BOOLEAN; } else { slottype = SlotType.UNSPEC; } return new Slot(ref.toString(),slottype,words); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -1,6 +1,6 @@ package org.dllearner.algorithm.tbsl.sparql; public enum SlotType { - RESOURCE, CLASS, OBJECTPROPERTY, DATATYPEPROPERTY, PROPERTY, LITERAL, UNSPEC, + RESOURCE, CLASS, OBJECTPROPERTY, DATATYPEPROPERTY, PROPERTY, LITERAL, STRING, INTEGER, BOOLEAN, UNSPEC, SYMPROPERTY // TODO don't use them anymore } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-08-27 12:13:42 UTC (rev 3838) @@ -40,12 +40,12 @@ // check for clash (v=LITERAL && v=RESOURCE) for (Slot s : slots) { if ((s.words.get(0).equals(slot.words.get(0)) || s.anchor.equals(slot.words.get(0))) - && !s.type.equals(slot.type)) + && ((slot.type.equals(SlotType.RESOURCE) && isLiteral(s.type)) || (s.type.equals(SlotType.RESOURCE) && isLiteral(slot.type)))) // !s.type.equals(slot.type)) return null; } // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) SlotType clashing = null; - if (slot.type.equals(SlotType.LITERAL)) clashing = SlotType.OBJECTPROPERTY; + if (isLiteral(slot.type)) clashing = SlotType.OBJECTPROPERTY; else if (slot.type.equals(SlotType.RESOURCE)) clashing = SlotType.DATATYPEPROPERTY; for (Slot s : slots) { if (clashing != null && s.type.equals(clashing)) { @@ -57,6 +57,27 @@ } } } + // check for clashes with FILTERS + for (SPARQL_Filter filter : query.filter) { + for (SPARQL_Pair ts : filter.getTerms()) { + if (ts.a.getName().equals(var) && (isIntegerType(ts.type) || ts.type.equals(SPARQL_PairType.REGEX))) { + // clash 1: counting a literal + for (SPARQL_Term sel : query.selTerms) { + if (sel.name.equals(var) && sel.aggregate.equals(SPARQL_Aggregate.COUNT)) + return null; + } + // clash 2: FILTER regex(?var,...) and FILTER (?var > ...) + for (SPARQL_Filter f : query.filter) { + if (!f.equals(filter)) { + for (SPARQL_Pair p : f.getTerms()) { + if (p.a.name.equals(var) && (p.type.equals(SPARQL_PairType.REGEX) && isIntegerType(ts.type)) || (ts.type.equals(SPARQL_PairType.REGEX) && isIntegerType(p.type))) + return null; + } + } + } + } + } + } } for (Slot slot : slots) { @@ -65,7 +86,7 @@ for (SPARQL_Triple triple : query.conditions) { if (triple.property.toString().equals("rdf:type") && triple.value.toString().equals("?"+slot.anchor)) { for (Slot s : argslots) { - if (s.words.contains(triple.variable.toString().replace("?","")) && s.type.equals(SlotType.LITERAL)) + if (s.words.contains(triple.variable.toString().replace("?","")) && isLiteral(s.type)) return null; } } @@ -81,7 +102,7 @@ for (String arg : args) { for (Slot s : argslots) { if (s.words.contains(arg.replace("?",""))) { - if (s.type.equals(SlotType.LITERAL)) slot.type = SlotType.DATATYPEPROPERTY; + if (isLiteral(s.type)) slot.type = SlotType.DATATYPEPROPERTY; else if (s.type.equals(SlotType.RESOURCE)) slot.type = SlotType.OBJECTPROPERTY; } } @@ -111,8 +132,29 @@ } slots = keep; + // additionally, filter out those templates that count a var that does not occur in the triples + // (these templates should not be built in the first place, but they are...) + for (SPARQL_Term t : query.selTerms) { + if (t.aggregate.equals(SPARQL_Aggregate.COUNT)) { + String v = t.name; + boolean fine = false; + for (SPARQL_Triple triple : query.conditions) { + if ((triple.variable.name.equals(v) || triple.value.name.equals(v))) { + fine = true; break; + } + } + if (!fine) return null; + } + } + return this; } + private boolean isLiteral(SlotType st) { + return st.equals(SlotType.STRING) || st.equals(SlotType.INTEGER) || st.equals(SlotType.LITERAL); + } + private boolean isIntegerType(SPARQL_PairType p) { + return p.equals(SPARQL_PairType.GT) || p.equals(SPARQL_PairType.LT) || p.equals(SPARQL_PairType.GTEQ) || p.equals(SPARQL_PairType.LTEQ); + } public String toString() { Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-26 12:24:54 UTC (rev 3837) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-27 12:13:42 UTC (rev 3838) @@ -31,7 +31,7 @@ below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/prices ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |