From: <chr...@us...> - 2012-06-14 15:30:10
|
Revision: 3741 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3741&view=rev Author: christinaunger Date: 2012-06-14 15:30:01 +0000 (Thu, 14 Jun 2012) Log Message: ----------- [tbsl] basic update for regex descriptions, more filtering conditions on templates, and ASK -> SELECT hack Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -1,9 +1,6 @@ package org.dllearner.algorithm.tbsl.converter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import org.dllearner.algorithm.tbsl.sem.drs.Complex_DRS_Condition; import org.dllearner.algorithm.tbsl.sem.drs.DRS; @@ -30,8 +27,8 @@ public class DRS2SPARQL_Converter { - // suppresses console output - private boolean silent = true; + private boolean silent = true; // suppresses console output + private boolean oxford = true; List<Slot> slots; Template template; List<Integer> usedInts; @@ -100,7 +97,8 @@ if (!restructureEmpty(drs)) { return null; } -// System.out.println("--- DRS (after) : " + drs); // DEBUG + replaceRegextoken(drs); + // System.out.println("--- DRS (after) : " + drs); // DEBUG for (DiscourseReferent referent : drs.collectDRs()) { if (referent.isMarked()) { @@ -318,6 +316,21 @@ new SPARQL_Term(simple.getArguments().get(1).getValue(),true), SPARQL_PairType.REGEX))); } + else if (predicate.equals("regextoken")) { + String arg = simple.getArguments().get(1).getValue(); + String regex = null; + for (Slot slot : slots) { + if (slot.getAnchor().equals(arg)) { + if (!slot.getWords().isEmpty()) regex = slot.getWords().get(0); + } + } + if (regex != null) { + query.addFilter(new SPARQL_Filter(new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term("'"+regex+"'",false), + SPARQL_PairType.REGEX))); + } + } else { if (arity == 1) { SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false);term.setIsVariable(true); @@ -333,6 +346,45 @@ } } } + + // TODO this is a hack in order to avoid ASK queries if DP is parsed + if (oxford) { + Hashtable<String,Integer> vs = new Hashtable<String,Integer>(); + String v1; String v2; + for (SPARQL_Triple c : query.getConditions()) { + v1 = c.getVariable().toString().replace("?",""); + v2 = c.getValue().toString().replace("?",""); + // is it a slot variable? + boolean v1isSlotVar = false; + boolean v2isSlotVar = false; + for (Slot s : slots) { + if (s.getAnchor().equals(v1)) v1isSlotVar = true; + if (s.getAnchor().equals(v2)) v2isSlotVar = true; + } + if (!v1isSlotVar && !v1.matches("[0..9]+") && !v1.contains("count")) { + if (vs.containsKey(v1)) vs.put(v1,vs.get(v1)+1); + else vs.put(v1,1); + } + if (!v2isSlotVar && !v2.matches("[0..9]+") && !v2.contains("count")) { + if (vs.containsKey(v2)) vs.put(v2,vs.get(v2)+1); + else vs.put(v2,1); + } + } + + int max = 0; String maxvar = null; + for (String var : vs.keySet()) { + if (vs.get(var) > max) { + max = vs.get(var); + maxvar = var; + } + } + if (maxvar != null) { + SPARQL_Term term = new SPARQL_Term(maxvar); + term.setIsVariable(true); + query.addSelTerm(term); + } + } + return query; } @@ -340,41 +392,41 @@ Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { - if(c.getPredicate().equals("equal")) { - equalsConditions.add(c); - } + if(c.getPredicate().equals("equal")) equalsConditions.add(c); } DiscourseReferent firstArg; DiscourseReferent secondArg; boolean firstIsURI; boolean secondIsURI; + boolean firstIsInt; + boolean secondIsInt; for (Simple_DRS_Condition c : equalsConditions) { - firstArg = c.getArguments().get(0); + firstArg = c.getArguments().get(0); secondArg = c.getArguments().get(1); firstIsURI = isUri(firstArg.getValue()); secondIsURI = isUri(secondArg.getValue()); + firstIsInt = firstArg.getValue().matches("[0..9]+"); + secondIsInt = secondArg.getValue().matches("[0..9]+"); - boolean oneArgIsInt = firstArg.toString().matches("[0..9]") || secondArg.toString().matches("[0..9]"); - drs.removeCondition(c); - if (firstIsURI) { - drs.replaceEqualRef(secondArg, firstArg, false); + if (firstIsURI || firstIsInt) { + drs.replaceEqualRef(secondArg, firstArg, true); for (Slot s : slots) { if (s.getAnchor().equals(secondArg.getValue())) { s.setAnchor(firstArg.getValue()); } } - } else if (secondIsURI) { - drs.replaceEqualRef(firstArg, secondArg, false); + } else if (secondIsURI || secondIsInt) { + drs.replaceEqualRef(firstArg, secondArg, true); for (Slot s : slots) { if (s.getAnchor().equals(firstArg.getValue())) { s.setAnchor(secondArg.getValue()); } } - } else if (!oneArgIsInt) { + } else { drs.replaceEqualRef(firstArg, secondArg, false); for (Slot s : slots) { if (s.getAnchor().equals(firstArg.getValue())) { @@ -396,6 +448,55 @@ } } + private void replaceRegextoken(DRS drs) { + + Set<Simple_DRS_Condition> cs = new HashSet<Simple_DRS_Condition>(); + for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { + if(c.getPredicate().equals("regextoken")) cs.add(c); + } + + String var; + String newvar; + String regex = ""; + String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum"}; + Set<Simple_DRS_Condition> used = new HashSet<Simple_DRS_Condition>(); + + for (Simple_DRS_Condition c : cs) { + var = c.getArguments().get(1).getValue(); + newvar = c.getArguments().get(0).getValue(); + for (Simple_DRS_Condition cond : drs.getAllSimpleConditions()) { + boolean takeit = false; + for (DiscourseReferent dr : cond.getArguments()) { + if (dr.getValue().equals(var)) { + takeit = true; + for (String f : forbidden) if (f.equals(cond.getPredicate())) takeit= false; + } + } + if (takeit) { + regex += cond.getPredicate().replace("SLOT","") + " "; + used.add(cond); + } + else { + for (DiscourseReferent dr : cond.getArguments()) { + if (dr.getValue().equals(var)) dr.setValue(newvar); + } + } + } + if (!regex.isEmpty()) { + c.getArguments().remove(1); + c.getArguments().add(new DiscourseReferent("'"+regex.trim()+"'")); + c.setPredicate("regex"); + } + for (Slot s : slots) { + if (s.getWords().contains(var)) { + s.getWords().remove(var); + s.getWords().add(newvar); + } + } + } + for (Simple_DRS_Condition cond : used) drs.removeCondition(cond); + } + private boolean restructureEmpty(DRS drs) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -18,6 +18,9 @@ static final String[] genericReplacements = { "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; + static final String[] hackReplacements = { " 1 "," one "," 2 "," two "," 3 "," three "," 4 "," four "," 5 "," five "," 6 "," six "," 7 "," seven ", + " 8 "," eight "," 9 "," nine "," 10 "," ten "," 11 "," eleven "," 12 "," twelve "," 13 "," thirteen "," 14 "," fourteen "," 15 "," fifteen ", + " 16 "," sixteen "," 17 "," seventeen "," 18 "," eighteen "," 19 "," nineteen "," 20 "," twenty "}; static boolean USE_NER; static boolean VERBOSE; static NER ner; @@ -49,7 +52,9 @@ replacements.addAll(Arrays.asList(repl)); replacements.addAll(Arrays.asList(englishReplacements)); replacements.addAll(Arrays.asList(genericReplacements)); + replacements.addAll(Arrays.asList(hackReplacements)); + s = s.replaceAll(",\\s"," and "); for (int i = 0; i < replacements.size(); i += 2) { s = s.replaceAll(replacements.get(i), replacements.get(i + 1)); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -33,7 +33,28 @@ public Template checkandrefine() { Set<Slot> argslots = new HashSet<Slot>(); - for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot); + for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) { + String var = slot.words.get(0); + // check for clash (v=LITERAL && v=RESOURCE) + for (Slot s : argslots) { + if (s.words.get(0).equals(slot.words.get(0)) && !s.type.equals(slot.type)) + return null; + } + // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) + SlotType clashing = null; + if (slot.type.equals(SlotType.LITERAL)) clashing = SlotType.OBJECTPROPERTY; + else if (slot.type.equals(SlotType.RESOURCE)) clashing = SlotType.DATATYPEPROPERTY; + for (Slot s : slots) { + if (clashing != null && s.type.equals(clashing)) { + for (SPARQL_Triple triple : query.conditions) { + if (triple.property.toString().equals("?"+s.anchor)) { + if (triple.value.toString().equals("?"+var)) return null; + } + } + } + } + argslots.add(slot); + } for (Slot slot : slots) { // check for clashes Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -32,9 +32,9 @@ public class Templator { private static final Logger logger = Logger.getLogger(Templator.class); + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; @@ -49,8 +49,8 @@ WordNet wordnet; LingPipeLemmatizer lem = new LingPipeLemmatizer(); - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-14 15:30:01 UTC (rev 3741) @@ -172,6 +172,16 @@ eight || (NP NUM:'eight' NP*) || <x,l1,<e,t>,[l1:[x|count(x,8)]],[],[],[ SLOT_arg/RESOURCE/x ]> nine || (NP NUM:'nine' NP*) || <x,l1,<e,t>,[l1:[x|count(x,9)]],[],[],[ SLOT_arg/RESOURCE/x ]> ten || (NP NUM:'ten' NP*) || <x,l1,<e,t>,[l1:[x|count(x,10)]],[],[],[ SLOT_arg/RESOURCE/x ]> + eleven || (NP NUM:'one' NP*) || <x,l1,<e,t>,[l1:[x|count(x,11)]],[],[],[ SLOT_arg/RESOURCE/x ]> + twelve || (NP NUM:'two' NP*) || <x,l1,<e,t>,[l1:[x|count(x,12)]],[],[],[ SLOT_arg/RESOURCE/x ]> + thirteen || (NP NUM:'thirteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,13)]],[],[],[ SLOT_arg/RESOURCE/x ]> + fourteen || (NP NUM:'fourteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,14)]],[],[],[ SLOT_arg/RESOURCE/x ]> + fifteen || (NP NUM:'fifteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,15)]],[],[],[ SLOT_arg/RESOURCE/x ]> + sixteen || (NP NUM:'sixteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,16)]],[],[],[ SLOT_arg/RESOURCE/x ]> + seventeen || (NP NUM:'seventeen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,17)]],[],[],[ SLOT_arg/RESOURCE/x ]> + eighteen || (NP NUM:'eighteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,18)]],[],[],[ SLOT_arg/RESOURCE/x ]> + nineteen || (NP NUM:'nineteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,19)]],[],[],[ SLOT_arg/RESOURCE/x ]> + twenty || (NP NUM:'twenty' NP*) || <x,l1,<e,t>,[l1:[x|count(x,20)]],[],[],[ SLOT_arg/RESOURCE/x ]> one || (NUM NUM:'one') || <x,l1,e,[l1:[x|equal(x,1)]],[],[],[ SLOT_arg/LITERAL/x ]> two || (NUM NUM:'two') || <x,l1,e,[l1:[x|equal(x,2)]],[],[],[ SLOT_arg/LITERAL/x ]> @@ -183,4 +193,13 @@ eight || (NUM NUM:'eight') || <x,l1,e,[l1:[x|equal(x,8)]],[],[],[ SLOT_arg/LITERAL/x ]> nine || (NUM NUM:'nine') || <x,l1,e,[l1:[x|equal(x,9)]],[],[],[ SLOT_arg/LITERAL/x ]> ten || (NUM NUM:'ten') || <x,l1,e,[l1:[x|equal(x,10)]],[],[],[ SLOT_arg/LITERAL/x ]> - + eleven || (NUM NUM:'eleven') || <x,l1,e,[l1:[x|equal(x,11)]],[],[],[ SLOT_arg/LITERAL/x ]> + twelve || (NUM NUM:'twelve') || <x,l1,e,[l1:[x|equal(x,12)]],[],[],[ SLOT_arg/LITERAL/x ]> + thirteen || (NUM NUM:'thirteen') || <x,l1,e,[l1:[x|equal(x,13)]],[],[],[ SLOT_arg/LITERAL/x ]> + fourteen || (NUM NUM:'fourteen') || <x,l1,e,[l1:[x|equal(x,14)]],[],[],[ SLOT_arg/LITERAL/x ]> + fifteen || (NUM NUM:'fifteen') || <x,l1,e,[l1:[x|equal(x,15)]],[],[],[ SLOT_arg/LITERAL/x ]> + sixteen || (NUM NUM:'sixteen') || <x,l1,e,[l1:[x|equal(x,16)]],[],[],[ SLOT_arg/LITERAL/x ]> + seventeen || (NUM NUM:'seventeen') || <x,l1,e,[l1:[x|equal(x,17)]],[],[],[ SLOT_arg/LITERAL/x ]> + eighteen || (NUM NUM:'eighteen') || <x,l1,e,[l1:[x|equal(x,18)]],[],[],[ SLOT_arg/LITERAL/x ]> + nineteen || (NUM NUM:'nineteen') || <x,l1,e,[l1:[x|equal(x,19)]],[],[],[ SLOT_arg/LITERAL/x ]> + twenty || (NUM NUM:'twenty') || <x,l1,e,[l1:[x|equal(x,20)]],[],[],[ SLOT_arg/LITERAL/x ]> Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 15:30:01 UTC (rev 3741) @@ -10,7 +10,7 @@ for less than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> // MONTHS @@ -26,4 +26,4 @@ september || (DP DP:'september') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,9) ]], [],[],[]> october || (DP DP:'october') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,10) ]], [],[],[]> november || (DP DP:'november') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,11) ]], [],[],[]> - december || (DP DP:'december') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,12) ]], [],[],[]> \ No newline at end of file + december || (DP DP:'december') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,12) ]], [],[],[]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |