You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(120) |
Sep
(36) |
Oct
(116) |
Nov
(17) |
Dec
(44) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(143) |
Feb
(192) |
Mar
(74) |
Apr
(84) |
May
(105) |
Jun
(64) |
Jul
(49) |
Aug
(120) |
Sep
(159) |
Oct
(156) |
Nov
(51) |
Dec
(28) |
2009 |
Jan
(17) |
Feb
(55) |
Mar
(33) |
Apr
(57) |
May
(54) |
Jun
(28) |
Jul
(6) |
Aug
(16) |
Sep
(38) |
Oct
(30) |
Nov
(26) |
Dec
(52) |
2010 |
Jan
(7) |
Feb
(91) |
Mar
(65) |
Apr
(2) |
May
(14) |
Jun
(25) |
Jul
(38) |
Aug
(48) |
Sep
(80) |
Oct
(70) |
Nov
(75) |
Dec
(77) |
2011 |
Jan
(68) |
Feb
(53) |
Mar
(51) |
Apr
(35) |
May
(65) |
Jun
(101) |
Jul
(29) |
Aug
(230) |
Sep
(95) |
Oct
(49) |
Nov
(110) |
Dec
(63) |
2012 |
Jan
(41) |
Feb
(42) |
Mar
(25) |
Apr
(46) |
May
(51) |
Jun
(44) |
Jul
(45) |
Aug
(29) |
Sep
(12) |
Oct
(9) |
Nov
(17) |
Dec
(2) |
2013 |
Jan
(12) |
Feb
(14) |
Mar
(7) |
Apr
(16) |
May
(54) |
Jun
(27) |
Jul
(11) |
Aug
(5) |
Sep
(85) |
Oct
(27) |
Nov
(37) |
Dec
(32) |
2014 |
Jan
(8) |
Feb
(29) |
Mar
(5) |
Apr
(3) |
May
(22) |
Jun
(3) |
Jul
(4) |
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <chr...@us...> - 2012-06-18 07:44:34
|
Revision: 3758 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3758&view=rev Author: christinaunger Date: 2012-06-18 07:44:24 +0000 (Mon, 18 Jun 2012) Log Message: ----------- [tbsl] . -> .+ Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-18 07:37:27 UTC (rev 3757) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-18 07:44:24 UTC (rev 3758) @@ -11,15 +11,15 @@ in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> - for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - cheaper than . pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - below . pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - between . and . pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-18 07:37:36
|
Revision: 3757 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3757&view=rev Author: christinaunger Date: 2012-06-18 07:37:27 +0000 (Mon, 18 Jun 2012) Log Message: ----------- [tbsl] "Oh look... is it a stockbroker? Is it a quantity Surveyor? Is it a church warden? NO! It's Bicycle Repair Man!" Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-18 07:16:33 UTC (rev 3756) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-18 07:37:27 UTC (rev 3757) @@ -474,7 +474,7 @@ } } if (takeit) { - regex += cond.getPredicate().replace("SLOT","") + " "; + regex += cond.getPredicate().replace("SLOT_","") + " "; used.add(cond); } else { @@ -488,6 +488,7 @@ c.getArguments().add(new DiscourseReferent("'"+regex.trim()+"'")); c.setPredicate("regex"); } + else {} // TODO should not happen! for (Slot s : slots) { if (s.getWords().contains(var)) { s.getWords().remove(var); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-18 07:16:33 UTC (rev 3756) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-18 07:37:27 UTC (rev 3757) @@ -223,7 +223,7 @@ while (m.find()) { // if (!m.group(4).startsWith("NNP")) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN "); // } } m = adjnprepPattern.matcher(condensedstring); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-18 07:16:33 UTC (rev 3756) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-18 07:37:27 UTC (rev 3757) @@ -33,7 +33,7 @@ case B: return "BOUND(" + a + ")"; case EQ: - return a + " == " + b; + return a + " = " + b; case GT: return a + " > " + b; case GTEQ: Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-18 07:16:33 UTC (rev 3756) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-18 07:37:27 UTC (rev 3757) @@ -68,7 +68,7 @@ many || (DP DET:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] MANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[]> the || (DP DET:'the' NP[noun]) || <x, l1, <<e,t>,t>, [ l1:[x|] ], [ (l2,x,noun,<e,t>) ], [ l2=l1 ],[]> at least || (DP DET:'at' DET:'least' NUM[num] NP[noun]) || <y,l1,<<e,t>,t>,[l1:[ y,j |count(a,y,j), greaterorequal(j,x) ]],[(l2,y,noun,<e,t>),(l3,x,num,e)],[ l1=l2, l2=l3 ],[]> - at most || (DP DET:'at' DET:'most' NUM[num] NP[noun]) || <y,l1,<<e,t>,t>,[l1:[ y,j | count(a,y,j), smallerorequal(j,x) ]],[(l2,y,noun,<e,t>),(l3,x,num,e)],[ l1=l2, l2=l3 ],[]> + at most || (DP DET:'at' DET:'most' NUM[num] NP[noun]) || <y,l1,<<e,t>,t>,[l1:[ y,j | count(a,y,j), lessorequal(j,x) ]],[(l2,y,noun,<e,t>),(l3,x,num,e)],[ l1=l2, l2=l3 ],[]> exactly || (DP DET:'exactly' NUM[num] NP[noun]) || <y,l1,<<e,t>,t>,[l1:[ y,j | count(y,j), equals(j,x) ]],[(l2,y,noun,<e,t>),(l3,x,num,e)],[ l1=l2, l2=l3 ],[]> other || (NP ADJ:'other' NP*) || <x,l1,<e,t>,[ l1:[ | ] ], [],[],[]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-18 07:16:43
|
Revision: 3756 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3756&view=rev Author: lorenz_b Date: 2012-06-18 07:16:33 +0000 (Mon, 18 Jun 2012) Log Message: ----------- Added isEmpty() method. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultSet.java Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultSet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultSet.java 2012-06-17 20:10:02 UTC (rev 3755) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultSet.java 2012-06-18 07:16:33 UTC (rev 3756) @@ -35,5 +35,9 @@ public String toString() { return items.toString(); } + + public boolean isEmpty(){ + return items.isEmpty(); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-17 20:10:09
|
Revision: 3755 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3755&view=rev Author: lorenz_b Date: 2012-06-17 20:10:02 +0000 (Sun, 17 Jun 2012) Log Message: ----------- Started faster implementation of template generation process. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -9,6 +9,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -86,7 +87,8 @@ private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); - private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); + private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); + private Monitor sparqlMon = MonitorFactory.getTimeMonitor("sparql"); private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; @@ -129,6 +131,11 @@ private String currentlyExecutedQuery; + private boolean dropZeroScoredQueries = true; + private boolean useManualMappingsIfExistOnly = true; + + private boolean multiThreaded = true; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -280,16 +287,23 @@ template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); currentlyExecutedQuery = null; + +// templateMon.reset(); +// sparqlMon.reset(); } public void learnSPARQLQueries() throws NoTemplateFoundException{ reset(); //generate SPARQL query templates logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); + templateMon.start(); + if(multiThreaded){ + templates = templateGenerator.buildTemplatesMultiThreaded(question); + } else { + templates = templateGenerator.buildTemplates(question); + } + templateMon.stop(); + logger.info("Done in " + templateMon.getLastValue() + "ms."); if(templates.isEmpty()){ throw new NoTemplateFoundException(); } @@ -672,8 +686,16 @@ } } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); + for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { + WeightedQuery wQ = iterator.next(); + if(dropZeroScoredQueries){ + if(wQ.getScore() == 0){ + iterator.remove(); + } + } else { + wQ.setScore(wQ.getScore()/t.getSlots().size()); + } + } allQueries.addAll(queries); List<Query> qList = new ArrayList<Query>(); @@ -752,7 +774,7 @@ private List<String> getLemmatizedWords(List<String> words){ logger.info("Pruning word list " + words + "..."); - mon.start(); +// mon.start(); List<String> pruned = new ArrayList<String>(); for(String word : words){ //currently only stem single words @@ -766,8 +788,8 @@ } } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); +// mon.stop(); +// logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Pruned list: " + pruned); return pruned; } @@ -806,46 +828,51 @@ private void validate(List<String> queries, SPARQL_QueryType queryType){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); - mon.start(); + sparqlMon.start(); if(queryType == SPARQL_QueryType.SELECT){ for(String query : queries){ - logger.info("Testing query:\n" + query); - com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); - q.setLimit(1); - ResultSet rs = executeSelect(q.toString());//executeSelect(query); - - List<String> results = new ArrayList<String>(); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - if(qs.get(projectionVar).isLiteral()){ - results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); - } else if(qs.get(projectionVar).isURIResource()){ - results.add(qs.get(projectionVar).asResource().getURI()); + List<String> results; + try { + logger.info("Testing query:\n" + query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString());//executeSelect(query); + + results = new ArrayList<String>(); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + if(qs.get(projectionVar).isLiteral()){ + results.add(qs.get(projectionVar).asLiteral().getLexicalForm()); + } else if(qs.get(projectionVar).isURIResource()){ + results.add(qs.get(projectionVar).asResource().getURI()); + } + } - - } - if(!results.isEmpty()){ - try{ - int cnt = Integer.parseInt(results.get(0)); - if(cnt > 0){learnedPos = queries.indexOf(query); + if(!results.isEmpty()){ + try{ + int cnt = Integer.parseInt(results.get(0)); + if(cnt > 0){learnedPos = queries.indexOf(query); + learnedSPARQLQueries.put(query, results); + if(stopIfQueryResultNotEmpty){ + return; + } + } + } catch (NumberFormatException e){ learnedSPARQLQueries.put(query, results); + learnedPos = queries.indexOf(query); if(stopIfQueryResultNotEmpty){ return; } } - } catch (NumberFormatException e){ - learnedSPARQLQueries.put(query, results); - learnedPos = queries.indexOf(query); - if(stopIfQueryResultNotEmpty){ - return; - } + logger.info("Result: " + results); } - + } catch (Exception e) { + e.printStackTrace(); } - logger.info("Result: " + results); + } } else if(queryType == SPARQL_QueryType.ASK){ for(String query : queries){ @@ -862,8 +889,8 @@ } } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); + sparqlMon.stop(); + logger.info("Done in " + sparqlMon.getLastValue() + "ms."); } private boolean executeAskQuery(String query){ @@ -976,15 +1003,19 @@ rs.add(mappingIndex.getResourcesWithScores(word)); } } - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 50)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 50)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs.add(index.getResourcesWithScores(word, 20)); } - rs.add(index.getResourcesWithScores(word, 20)); } + for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); // //get the labels of the redirects and compute the highest similarity @@ -1012,6 +1043,10 @@ } + public String getTaggedInput(){ + return templateGenerator.getTaggedInput(); + } + private boolean isDatatypeProperty(String uri){ Boolean isDatatypeProperty = null; if(mappingIndex != null){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -2,6 +2,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.ltag.data.TreeNode; @@ -9,6 +11,9 @@ import org.dllearner.algorithm.tbsl.sem.dudes.reader.ParseException; import org.dllearner.algorithm.tbsl.sem.util.Pair; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; + public class Parser { private static final Logger logger = Logger.getLogger(Parser.class); @@ -91,7 +96,54 @@ return derivationTrees; } + + public List<DerivationTree> parseMultiThreaded(String taggeduserinput, LTAGLexicon grammar) { + derivationTrees.clear(); + derivedTrees.clear(); + dudes.clear(); + temporaryEntries.clear(); + + if (!VERBOSE) GrammarFilter.VERBOSE = false; + + /* + * create a local copy of the grammar with own treeIDs. This is + * necessary since if an input string contains the same token multiple + * times, a tree for each token is added. Both trees need to have + * different treeIDs for the parser to work correctly. + */ + parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + + String inputNoTags = ""; + for (String s : taggeduserinput.split(" ")) { + inputNoTags += s.substring(0,s.indexOf("/")) + " "; + } + + this.input = ("# ".concat(inputNoTags.replaceAll("'","").trim())).split(" "); + int n = this.input.length; + + + if (SHOW_GRAMMAR) { + logger.trace(parseGrammar); + } + if (SHOW_LEXICAL_COVERAGE) { + logger.trace("# OF TREES FOUND: " + parseGrammar.size()); + logger.trace("# OF INPUT TOKENS: " + n); + } + + List<Pair<TreeNode, Short>> initTrees = parseGrammar.getInitTrees(); + + internalParseMultiThreaded(initTrees, n); + + if (USE_DPS_AS_INITTREES && derivationTrees.isEmpty()) { + internalParseMultiThreaded(parseGrammar.getDPInitTrees(), n); + } + + if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); + return derivationTrees; + + } + private void internalParse(List<Pair<TreeNode, Short>> initTrees, int n) { TREELOOP: for (int k = 0; k < initTrees.size(); k++) { @@ -211,6 +263,23 @@ } } + + private void internalParseMultiThreaded(List<Pair<TreeNode, Short>> initTrees, int n) { + Monitor parseMon = MonitorFactory.getTimeMonitor("parse"); + ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + parseMon.start(); + for (int k = 0; k < initTrees.size(); k++) { + Pair<TreeNode, Short> pair = initTrees.get(k); + TreeNode tree = pair.getFirst(); + short tid = pair.getSecond(); + threadPool.execute(new TreeProcessor(tree, tid, n)); + } + threadPool.shutdown(); + while(!threadPool.isTerminated()){ + + } + parseMon.start(); + } private List<List<ParseState>> makeStateSets() { @@ -298,7 +367,21 @@ return derivedTrees; } + + public List<TreeNode> buildDerivedTreesMultiThreaded(LTAGLexicon G) throws ParseException { + ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + for (DerivationTree dTree : derivationTrees) { + threadPool.execute(new DerivationTreeProcessor(dTree, G)); + } + threadPool.shutdown(); + while(!threadPool.isTerminated()){ + + } + return derivedTrees; + + } + /** * get List of Dudes parallely constructed by Parser.buildDerivedTrees() */ @@ -342,5 +425,169 @@ grammar.clear(temps); } + + class TreeProcessor implements Runnable{ + + private TreeNode tree; + private short tid; + private int n; + + public TreeProcessor(TreeNode tree, short tid, int n) { + this.tree = tree; + this.tid = tid; + this.n = n; + } + + @Override + public void run() { + List<List<ParseState>> stateSets = makeStateSets(); + + ParseState start = new ParseState(tree, tid); + // the inittree is already used + start.getUsedTrees().add(tid); + + stateSets.get(0).add(start); + boolean skip = false; + for (int i = 0; i < n; i++) { + + if (i > 0) { + stateSets.get(i - 1).clear(); + if (USE_LESS_MEMORY) { + System.gc(); + } + } + + List<ParseState> localStateSet = new ArrayList<ParseState>( + stateSets.get(i)); + List<ParseState> localStateSet2 = new ArrayList<ParseState>(); + + stateSets.get(i).clear(); + + while (localStateSet.size() > 0) { + + for (int j = 0; j < localStateSet.size(); j++) { + ParseState state = localStateSet.get(j); + + List<ParseState> newStates; + + OPLOOP: for (Class<?> c : operations) { + + try { + + ParserOperation op = (ParserOperation) c + .newInstance(); + + newStates = (op.go(i, state, input, + parseGrammar)); + + if (!newStates.isEmpty()) { + + for (ParseState newState : newStates) { + if (newState.i.equals(i)) { + localStateSet2.add(newState); + } + + if ((op instanceof Scanner) + || (newState.isEndState() && newState.i == n - 1)) { + stateSets.get(newState.i).add( + newState); + } + } + + op = null; + break OPLOOP; + + } + + } catch (InstantiationException e) { + e.printStackTrace(); + + } catch (IllegalAccessException e) { + e.printStackTrace(); + + } + + } + + } + + localStateSet = null; + localStateSet = new ArrayList<ParseState>(localStateSet2); + localStateSet2 = new ArrayList<ParseState>(); + + } + + localStateSet = null; + localStateSet2 = null; + + /* + * if the parser could not scan the next input token this run / + * initial tree is rejected + */ + if (i < n - 1 && stateSets.get(i + 1).isEmpty()) { + + stateSets.get(i).clear(); + skip = true; + break; + + } + + } + + if(!skip){ + for (ParseState state : stateSets.get(n - 1)) { + + +// if (state.isEndState() && state.t.equals(tree)) { + if (state.isEndState()) { + if (state.t.equals(tree)) { + + derivationTrees.add(createDerivationTree(state, + parseGrammar)); + + } + } + + } + } + + + } + + } + + class DerivationTreeProcessor implements Runnable{ + + private DerivationTree dTree; + private LTAGLexicon lexicon; + + public DerivationTreeProcessor(DerivationTree dTree, LTAGLexicon lexicon) { + this.dTree = dTree; + this.lexicon = lexicon; + } + + @Override + public void run() { + try { + List<Pair<TreeNode, Dude>> pairs = DerivedTree.build(dTree, parseGrammar, lexicon, CONSTRUCT_SEMANTICS); + + for (Pair<TreeNode,Dude> pair : pairs) { + TreeNode x = pair.getFirst(); + Dude dude = pair.getSecond(); + + if (!derivedTrees.contains(x) || !dudes.contains(dude)) { + derivedTrees.add(x); + dudes.add(dude); + } + + } + } catch (ParseException e) { + e.printStackTrace(); + } + + } + + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-16 11:21:55 UTC (rev 3754) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-17 20:10:02 UTC (rev 3755) @@ -6,11 +6,12 @@ import java.util.Hashtable; import java.util.List; import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import net.didion.jwnl.data.POS; import org.apache.log4j.Logger; - import org.dllearner.algorithm.tbsl.converter.DRS2SPARQL_Converter; import org.dllearner.algorithm.tbsl.converter.DUDE2UDRS_Converter; import org.dllearner.algorithm.tbsl.ltag.parser.LTAGLexicon; @@ -58,6 +59,11 @@ boolean USE_WORDNET = true; boolean VERBOSE = true; + private String taggedInput; + + private Set<Template> templates; + private Set<DRS> drses; + public Templator() { this(new StanfordPartOfSpeechTagger(), new WordNet()); } @@ -141,7 +147,7 @@ tagged = s; s = extractSentence(tagged); } - + taggedInput = tagged; String newtagged; if (USE_NER) { newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); @@ -244,9 +250,6 @@ newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); } } - if(newwords.isEmpty()){ - - } if (newwords.isEmpty()) { newwords.add(slot.getWords().get(0)); } @@ -271,11 +274,171 @@ if (clearAgain) { p.clear(g,p.getTemps()); } - System.gc(); +// System.gc(); return templates; } + public Set<Template> buildTemplatesMultiThreaded(String s) { + + boolean clearAgain = true; + + String tagged; + if (UNTAGGED_INPUT) { + s = pp.normalize(s); + tagged = tagger.tag(s); + if (VERBOSE) logger.trace("Tagged input: " + tagged); + } + else { + tagged = s; + s = extractSentence(tagged); + } + taggedInput = tagged; + String newtagged; + if (USE_NER) { + newtagged = pp.condenseNominals(pp.findNEs(tagged,s)); + } + else newtagged = pp.condenseNominals(tagged); + + newtagged = pp.condense(newtagged); + if (VERBOSE) logger.trace("Preprocessed: " + newtagged); + + p.parseMultiThreaded(newtagged,g); + + if (p.getDerivationTrees().isEmpty()) { + p.clear(g,p.getTemps()); + clearAgain = false; + if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); + } + else { + try { + p.buildDerivedTreesMultiThreaded(g); + } catch (ParseException e) { + if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); + } + } + + // build pairs <String,POStag> from tagged + Hashtable<String,String> postable = new Hashtable<String,String>(); + for (String st : newtagged.split(" ")) { + postable.put(st.substring(0,st.indexOf("/")).toLowerCase(),st.substring(st.indexOf("/")+1));; + } + // + + drses = new HashSet<DRS>(); + templates = new HashSet<Template>(); + +// ExecutorService threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); +// for (Dude dude : p.getDudes()) { +// threadPool.execute(new DudeProcessor(dude, postable)); +// } +// threadPool.shutdown(); +// while(!threadPool.isTerminated()){} + + for (Dude dude : p.getDudes()) { + + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { +// // DEBUG + if (VERBOSE) { + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } + } +// // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + + } + } + + + if (clearAgain) { + p.clear(g,p.getTemps()); + } +// System.gc(); + + return templates; + } + + public String getTaggedInput() { + return taggedInput; + } + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ @@ -330,5 +493,107 @@ return taggedSentence; } + + class DudeProcessor implements Runnable{ + + private Dude dude; + private Hashtable<String,String> postable; + + public DudeProcessor(Dude dude, Hashtable<String,String> postable) { + this.dude = dude; + this.postable = postable; + } + @Override + public void run() { + UDRS udrs = d2u.convert(dude); + if (udrs != null) { + + for (DRS drs : udrs.initResolve()) { + + List<Slot> slots = new ArrayList<Slot>(); + slots.addAll(dude.getSlots()); + d2s.setSlots(slots); + d2s.redundantEqualRenaming(drs); + + if (!containsModuloRenaming(drses,drs)) { +// // DEBUG + if (VERBOSE) { + System.out.println(dude); + System.out.println(drs); + for (Slot sl : slots) { + System.out.println(sl.toString()); + } + } +// // + drses.add(drs); + + try { + Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); + if (temp == null) { + continue; + } + + if (USE_WORDNET) { // find WordNet synonyms + List<String> newwords; + String word; + String pos; + for (Slot slot : temp.getSlots()) { + if (!slot.getWords().isEmpty()) { + + word = slot.getWords().get(0); + pos = postable.get(word.toLowerCase().replace(" ","_")); + + POS wordnetpos = null; + if (pos != null) { + if (equalsOneOf(pos,noun)) { + wordnetpos = POS.NOUN; + } + else if (equalsOneOf(pos,adjective)) { + wordnetpos = POS.ADJECTIVE; + } + else if (equalsOneOf(pos,verb)) { + wordnetpos = POS.VERB; + } + } + + List<String> strings = new ArrayList<String>(); + if (wordnetpos != null && wordnetpos.equals(POS.ADJECTIVE)) { + strings = wordnet.getAttributes(word); + } + + newwords = new ArrayList<String>(); + newwords.addAll(slot.getWords()); + newwords.addAll(strings); + + if (wordnetpos != null && !slot.getSlotType().equals(SlotType.RESOURCE)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,getLemmatizedWord(word))); + for (String att : getLemmatizedWords(strings)) { + newwords.addAll(wordnet.getBestSynonyms(wordnetpos,att)); + } + } + if (newwords.isEmpty()) { + newwords.add(slot.getWords().get(0)); + } + List<String> newwordslist = new ArrayList<String>(); + newwordslist.addAll(newwords); + slot.setWords(newwordslist); + } + } + } + // + + templates.add(temp); + } catch (java.lang.ClassCastException e) { + continue; + } + if (ONE_SCOPE_ONLY) { break; } + } + } + } + } + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-16 11:22:01
|
Revision: 3754 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3754&view=rev Author: christinaunger Date: 2012-06-16 11:21:55 +0000 (Sat, 16 Jun 2012) Log Message: ----------- [tbsl] repaired resource slot problem Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -117,12 +117,12 @@ // System.out.println("--- referent: " + referent.toString()); // DEBUG for (Slot s : slots) { // System.out.println("--- slot: " + s.toString()); // DEBUG - if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) { + if (s.getAnchor().equals(referent.getValue()) || s.getAnchor().equals(referent.toString())) { // System.out.println(" fits!"); // DEBUG - template.addSlot(s); - break; - } - } + template.addSlot(s); + break; + } + } } for (Slot s : slots) if (s.getAnchor().equals("SLOT_arg")) template.addSlot(s); @@ -410,16 +410,22 @@ if (firstIsURI || firstIsInt) { drs.replaceEqualRef(secondArg, firstArg, true); for (Slot s : slots) { - if (s.getAnchor().equals(secondArg.getValue())) { - s.setAnchor(firstArg.getValue()); - } + if (s.getAnchor().equals(secondArg.getValue())) + s.setAnchor(firstArg.getValue()); + if (s.getWords().contains(secondArg.getValue())) { + s.getWords().remove(secondArg.getValue()); + s.getWords().add(firstArg.getValue()); + } } } else if (secondIsURI || secondIsInt) { drs.replaceEqualRef(firstArg, secondArg, true); for (Slot s : slots) { - if (s.getAnchor().equals(firstArg.getValue())) { - s.setAnchor(secondArg.getValue()); - } + if (s.getAnchor().equals(firstArg.getValue())) + s.setAnchor(secondArg.getValue()); + if (s.getWords().contains(firstArg.getValue())) { + s.getWords().remove(firstArg.getValue()); + s.getWords().add(secondArg.getValue()); + } } } else { drs.replaceEqualRef(firstArg, secondArg, false); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -96,8 +96,8 @@ Pattern whenPattern = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern wherePattern = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern adjsPattern = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)"); - Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); - Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?)"); +// Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); + Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?(\\s|\\z))"); Pattern adjnprepPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)"); m = compAdjPattern.matcher(condensedstring); @@ -219,15 +219,12 @@ if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); } - m = adjnnpPattern.matcher(condensedstring); - while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NNP"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NNP"); - } m = adjnounPattern.matcher(condensedstring); while (m.find()) { +// if (!m.group(4).startsWith("NNP")) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); +// } } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-16 10:15:00 UTC (rev 3753) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-16 11:21:55 UTC (rev 3754) @@ -33,11 +33,14 @@ public Template checkandrefine() { Set<Slot> argslots = new HashSet<Slot>(); - for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) { + for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot); + + for (Slot slot : argslots) { String var = slot.words.get(0); // check for clash (v=LITERAL && v=RESOURCE) - for (Slot s : argslots) { - if (s.words.get(0).equals(slot.words.get(0)) && !s.type.equals(slot.type)) + for (Slot s : slots) { + if ((s.words.get(0).equals(slot.words.get(0)) || s.anchor.equals(slot.words.get(0))) + && !s.type.equals(slot.type)) return null; } // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) @@ -53,7 +56,6 @@ } } } - argslots.add(slot); } for (Slot slot : slots) { @@ -99,7 +101,14 @@ } // finally remove all argslots - slots.removeAll(argslots); +// slots.removeAll(argslots); // removes all (argslots + resource slots) +// for (Slot sl : argslots) slots.remove(sl); // removes resource slots + List<Slot> keep = new ArrayList<Slot>(); + for (Slot s : slots) { + if (!s.anchor.startsWith("SLOT_arg")) + keep.add(s); + } + slots = keep; return this; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-16 10:15:06
|
Revision: 3753 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3753&view=rev Author: christinaunger Date: 2012-06-16 10:15:00 +0000 (Sat, 16 Jun 2012) Log Message: ----------- [tbsl] repaired count problem Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Quantifier.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-16 10:15:00 UTC (rev 3753) @@ -251,7 +251,17 @@ if (predicate.equals("count")) { // COUNT(?x) AS ?c - query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); + if (simple.getArguments().get(1).getValue().matches("[0-9]+")) { + String fresh = "v"+createFresh(); + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, fresh)); + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(fresh,false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),literal), + SPARQL_PairType.EQ))); + } else { + query.addSelTerm(new SPARQL_Term(simple.getArguments().get(0).getValue(), SPARQL_Aggregate.COUNT, simple.getArguments().get(1).getValue())); + } return query; } else if (predicate.equals("sum")) { query.addSelTerm(new SPARQL_Term(simple.getArguments().get(1).getValue(), SPARQL_Aggregate.SUM)); @@ -313,25 +323,9 @@ query.addFilter(new SPARQL_Filter( new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term(simple.getArguments().get(1).getValue(),true), + new SPARQL_Term(simple.getArguments().get(1).getValue().replace("_","").trim(),true), SPARQL_PairType.REGEX))); } - else if (predicate.equals("regextoken")) { - String arg = simple.getArguments().get(1).getValue(); - String regex = null; - for (Slot slot : slots) { - if (slot.getAnchor().equals(arg)) { - if (!slot.getWords().isEmpty()) regex = slot.getWords().get(0); - } - } - if (regex != null) { - query.addFilter(new SPARQL_Filter( - new SPARQL_Pair( - new SPARQL_Term(simple.getArguments().get(0).getValue(),false), - new SPARQL_Term("'"+regex+"'",false), - SPARQL_PairType.REGEX))); - } - } else { if (arity == 1) { SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false);term.setIsVariable(true); @@ -409,8 +403,8 @@ secondArg = c.getArguments().get(1); firstIsURI = isUri(firstArg.getValue()); secondIsURI = isUri(secondArg.getValue()); - firstIsInt = firstArg.getValue().matches("[0..9]+"); - secondIsInt = secondArg.getValue().matches("[0..9]+"); + firstIsInt = firstArg.getValue().matches("(\\?)?[0..9]+"); + secondIsInt = secondArg.getValue().matches("(\\?)?[0..9]+"); drs.removeCondition(c); if (firstIsURI || firstIsInt) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Quantifier.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Quantifier.java 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/DRS_Quantifier.java 2012-06-16 10:15:00 UTC (rev 3753) @@ -1,5 +1,5 @@ package org.dllearner.algorithm.tbsl.sem.drs; public enum DRS_Quantifier { - SOME, EVERY, MOST, MANY, HOWMANY, THEMOST, FEW, NO, THELEAST + SOME, EVERY, MOST, MANY, HOWMANY, THEMOST, FEW, NO, THELEAST, OR } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.java 2012-06-16 10:15:00 UTC (rev 3753) @@ -194,6 +194,7 @@ if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + if (quantifier.toString().equals("OR")) {drs.setQuantifier(DRS_Quantifier.OR);} {if (true) return drs;} } else { @@ -523,6 +524,11 @@ return false; } + private boolean jj_3_22() { + if (jj_scan_token(A)) return true; + return false; + } + private boolean jj_3_12() { if (jj_scan_token(HOWMANY)) return true; return false; @@ -544,8 +550,13 @@ return false; } - private boolean jj_3_22() { - if (jj_scan_token(A)) return true; + private boolean jj_3R_3() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_22()) { + jj_scanpos = xsp; + if (jj_3_23()) return true; + } return false; } @@ -560,16 +571,6 @@ return false; } - private boolean jj_3R_3() { - Token xsp; - xsp = jj_scanpos; - if (jj_3_22()) { - jj_scanpos = xsp; - if (jj_3_23()) return true; - } - return false; - } - private boolean jj_3_5() { if (jj_scan_token(5)) return true; if (jj_3R_2()) return true; @@ -581,11 +582,6 @@ return false; } - private boolean jj_3_14() { - if (jj_scan_token(NO)) return true; - return false; - } - private boolean jj_3R_4() { Token xsp; xsp = jj_scanpos; @@ -596,6 +592,11 @@ return false; } + private boolean jj_3_14() { + if (jj_scan_token(NO)) return true; + return false; + } + private boolean jj_3R_1() { if (jj_3R_4()) return true; Token xsp; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/drs/reader/DRSParser.jj 2012-06-16 10:15:00 UTC (rev 3753) @@ -191,6 +191,7 @@ if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + if (quantifier.toString().equals("OR")) {drs.setQuantifier(DRS_Quantifier.OR);} return drs; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-06-16 10:15:00 UTC (rev 3753) @@ -412,6 +412,7 @@ if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + if (quantifier.toString().equals("OR")) {drs.setQuantifier(DRS_Quantifier.OR);} {if (true) return drs;} } else { @@ -830,14 +831,6 @@ finally { jj_save(39, xla); } } - private boolean jj_3R_13() { - if (jj_3R_7()) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3_36()) jj_scanpos = xsp; - return false; - } - private boolean jj_3_12() { if (jj_scan_token(2)) return true; if (jj_3R_1()) return true; @@ -891,17 +884,17 @@ return false; } + private boolean jj_3_38() { + if (jj_scan_token(C)) return true; + return false; + } + private boolean jj_3R_11() { if (jj_scan_token(LABEL)) return true; if (jj_scan_token(11)) return true; return false; } - private boolean jj_3_38() { - if (jj_scan_token(C)) return true; - return false; - } - private boolean jj_3R_16() { if (jj_3R_7()) return true; if (jj_scan_token(14)) return true; @@ -923,19 +916,13 @@ return false; } - private boolean jj_3_23() { - if (jj_scan_token(MOST)) return true; - return false; - } - private boolean jj_3_37() { if (jj_scan_token(A)) return true; return false; } - private boolean jj_3_30() { - if (jj_3R_10()) return true; - if (jj_scan_token(6)) return true; + private boolean jj_3_23() { + if (jj_scan_token(MOST)) return true; return false; } @@ -949,6 +936,12 @@ return false; } + private boolean jj_3_30() { + if (jj_3R_10()) return true; + if (jj_scan_token(6)) return true; + return false; + } + private boolean jj_3_34() { if (jj_scan_token(2)) return true; if (jj_3R_4()) return true; @@ -993,17 +986,17 @@ return false; } + private boolean jj_3R_4() { + if (jj_3R_16()) return true; + return false; + } + private boolean jj_3_10() { if (jj_scan_token(9)) return true; if (jj_scan_token(LABEL)) return true; return false; } - private boolean jj_3R_4() { - if (jj_3R_16()) return true; - return false; - } - private boolean jj_3_28() { if (jj_scan_token(NO)) return true; return false; @@ -1175,6 +1168,14 @@ return false; } + private boolean jj_3R_13() { + if (jj_3R_7()) return true; + Token xsp; + xsp = jj_scanpos; + if (jj_3_36()) jj_scanpos = xsp; + return false; + } + private boolean jj_3R_8() { if (jj_3R_10()) return true; Token xsp; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-06-16 10:15:00 UTC (rev 3753) @@ -382,6 +382,7 @@ if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + if (quantifier.toString().equals("OR")) {drs.setQuantifier(DRS_Quantifier.OR);} return drs; @@ -504,8 +505,8 @@ //TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} //TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} -TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9","'","_"])+>} -TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9"])+>} // oder eher: SLOT_([...])+ +TOKEN: {<A: (["a"-"z","A"-"Z","0"-"9","'","_","-"])+>} +TOKEN: {<B: (["a"-"z","A"-"Z","_",".","#","0"-"9"])+":"(["a"-"z","A"-"Z","_",".","#","0"-"9","-"])+>} // oder eher: SLOT_([...])+ TOKEN: {<C: ["?","!"](["a"-"z","A"-"Z","0"-"9"])+>} Token dr() : { Token t; }{ (t=<A> | t=<C>) { return t; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_ParserTokenManager.java 2012-06-16 10:15:00 UTC (rev 3753) @@ -403,7 +403,7 @@ jjCheckNAddTwoStates(3, 4); else if (curChar == 58) jjCheckNAdd(5); - if ((0x3ff008000000000L & l) != 0L) + if ((0x3ff208000000000L & l) != 0L) { if (kind > 26) kind = 26; @@ -417,7 +417,7 @@ jjCheckNAdd(9); else if ((0x8000000200000000L & l) != 0L) jjCheckNAdd(7); - if ((0x3ff008000000000L & l) != 0L) + if ((0x3ff208000000000L & l) != 0L) { if (kind > 26) kind = 26; @@ -432,7 +432,7 @@ jjstateSet[jjnewStateCnt++] = 1; break; case 2: - if ((0x3ff008000000000L & l) == 0L) + if ((0x3ff208000000000L & l) == 0L) break; if (kind > 26) kind = 26; @@ -443,7 +443,7 @@ jjCheckNAddTwoStates(3, 4); break; case 5: - if ((0x3ff400800000000L & l) == 0L) + if ((0x3ff600800000000L & l) == 0L) break; if (kind > 27) kind = 27; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-16 10:15:00 UTC (rev 3753) @@ -142,8 +142,6 @@ and || (S S* CC:'and' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> and || (DP DP* CC:'and' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> - and || (VP VP* CC:'and' VP[vp]) || - - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - but || (S S* CC:'but' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> but || (DP DP* CC:'but' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> @@ -151,11 +149,10 @@ as well as || (NP NP* CC:'as' CC:'well' CC:'as' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> - or || (S S* CC:'or' S[2]) || - - or || (DP DP* CC:'or' DP[2]) || - - or || (NP NP* CC:'or' NP[2]) || - - or || (VP VP* CC:'or' VP[2]) || - - or || (ADJ ADJ* CC:'or' ADJ[2]) || - + or || (S S* CC:'or' S[cc]) || <y, l2, <<e,t>,t>, [ l1:[ | l2:[ | ] OR y l3:[|] ] ], [ (l4,y,cc,<e,t>) ], [ l4=l3 ],[]> + or || (DP DP* CC:'or' DP[cc]) || <y, l2, <<e,t>,t>, [ l1:[ | l2:[ | ] OR y l3:[|] ] ], [ (l4,y,cc,<e,t>) ], [ l4=l3 ],[]> + or || (NP NP* CC:'or' NP[cc]) || <y, l2, <<e,t>,t>, [ l1:[ | l2:[ | ] OR y l3:[|] ] ], [ (l4,y,cc,<e,t>) ], [ l4=l3 ],[]> + or || (ADJ ADJ* CC:'or' ADJ[cc]) || - // EXISTENTIAL Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-16 04:19:36 UTC (rev 3752) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-16 10:15:00 UTC (rev 3753) @@ -4,10 +4,10 @@ close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> near || (NP NP* (PP P:'near' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> nearby || (NP NP* (PP P:'nearby' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_nearby(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_nearby/OBJECTPROPERTY/near ]> - within walking distance from || (NP NP* (PP P:'within' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> - within minutes of || (NP NP* (PP P:'within' (NP N:'minutes' P:'of' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> - in walking distance from || (NP NP* (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> - at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + within walking distance from || (NP NP* (PP P:'within' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> + within minutes of || (NP NP* (PP P:'within' (NP N:'minutes' P:'of' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> + in walking distance from || (NP NP* (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> + at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2012-06-16 04:19:42
|
Revision: 3752 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3752&view=rev Author: jenslehmann Date: 2012-06-16 04:19:36 +0000 (Sat, 16 Jun 2012) Log Message: ----------- removed subjective question Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 16:39:44 UTC (rev 3751) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-16 04:19:36 UTC (rev 3752) @@ -76,10 +76,8 @@ question: freehold houses with 2 bedrooms and a living room in Banbury -question: houses in Oxford city centre with at most 2 bedrooms +question: houses in Oxford city centre with at most 2 bedrooms -question: houses with garage within minutes of Oxford schools and in a quiet road - question: victorian town houses in north Oxford question: terrace houses with west facing garden @@ -116,9 +114,9 @@ question: show me the retirement houses for sale near Oxford. -question: give me a flat which is far from the river. +question: give me a flat with at least two bathrooms for less than 1000 pounds. -question: show me a flat which is close to three bars. +question: show me a flat which is close to 3 bars. question: what is the average price of furnished 1 bedroom apartments in Heddington? @@ -176,7 +174,7 @@ question: house with a courtyard -question: house with fireplace and a garden that is not communal +question: house with a fireplace and a garden that is not communal question: house with parking but not Victorian This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-15 16:39:51
|
Revision: 3751 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3751&view=rev Author: christinaunger Date: 2012-06-15 16:39:44 +0000 (Fri, 15 Jun 2012) Log Message: ----------- [tbsl] final changes Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -54,7 +54,7 @@ replacements.addAll(Arrays.asList(genericReplacements)); replacements.addAll(Arrays.asList(hackReplacements)); - s = s.replaceAll(",\\s"," and "); + s = s.replaceAll(",\\s"," and ").replaceAll(" and but "," but "); for (int i = 0; i < replacements.size(); i += 2) { s = s.replaceAll(replacements.get(i), replacements.get(i + 1)); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -287,6 +287,10 @@ "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + symslot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; + String[] adjEntry = {token, + "(NP ADJ:'"+token+"' NP*)", + "<x,l1,<e,t>,[ l1:[ | SLOT_description(x,y), regex(y,'"+ token +"')] ],[],[],[ SLOT_description/DATATYPEPROPERTY/description ]>"}; + result.add(adjEntry); result.add(vEntry); } else if (pos.equals("VB")) { Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-15 16:39:44 UTC (rev 3751) @@ -45,6 +45,7 @@ show || (S (VP V:'show' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> list me || (S (VP V:'list' (DP N:'me') DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> list || (S (VP V:'list' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> + find || (S (VP V:'find' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> // DETERMINER @@ -143,6 +144,10 @@ and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> and || (VP VP* CC:'and' VP[vp]) || - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - + + but || (S S* CC:'but' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> + but || (DP DP* CC:'but' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> + but || (NP NP* CC:'but' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> as well as || (NP NP* CC:'as' CC:'well' CC:'as' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 16:39:44 UTC (rev 3751) @@ -13,10 +13,13 @@ for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than . pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below . pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + between . and . pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 16:39:44 UTC (rev 3751) @@ -38,7 +38,7 @@ question: Give me all houses with gated entrance or in a gated community -question: Give me all recently refurbished houses with lift (access) +question: Give me all recently refurbished houses with lift question: Give me all houses with balcony and vaulted ceiling @@ -46,17 +46,17 @@ question: Give me all houses in a corner or end-of-terrace plot -question: Give me all houses with 3 bedrooms, but cheaper than 150000 GBP +question: Give me all houses with 3 bedrooms, but cheaper than 150000 pounds question: Give me all houses with at least 2 reception rooms and a garden -question: Give me all houses in/with a courtyard +question: Give me all houses with a courtyard question: Give me all houses with fireplace and a garden that is not communal question: Give me all houses with parking but not Victorian -//question: Give me all houses with 2-5 bedrooms, a balcony, and a price less than 450000 GBP +//question: Give me all houses with 2 to 5 bedrooms, a balcony, and a price less than 450000 pounds question: Give me all houses with 3 bathrooms, but not en-suite @@ -64,9 +64,9 @@ question: Give me all houses with double glazing and central heating -question: Give me all houses with “no upper chain” (means immediately available) +question: Give me all houses immediately available -question: Give me all houses listed Grade I or Grade II (means “under protection for age …”) +question: Give me all houses listed Grade I or Grade II question: Give me all houses that have recently replumbed @@ -76,4 +76,4 @@ question: Give me all houses in Banbury Road -question: Give me all houses in the area of (John) Radcliffe hospital \ No newline at end of file +question: Give me all houses in the area of John Radcliffe hospital \ No newline at end of file Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 16:39:44 UTC (rev 3751) @@ -58,7 +58,7 @@ question: houses close to The King's Arms pub -question: houses with garden large at least 2 acres +question: houses with a garden that is at least 2 acres big question: houses with many reception rooms @@ -68,13 +68,13 @@ question: houses with double glazed windows -question: 2 bedroom houses near oxford train station +question: 2 bedroom houses near Oxford train station question: 4 bedroom detached houses in Oxford -question: studio apartments in summertown, Oxford +question: studio apartments in Summertown, Oxford -question: freehold houses with 2 bedrooms and a living room in banbury +question: freehold houses with 2 bedrooms and a living room in Banbury question: houses in Oxford city centre with at most 2 bedrooms @@ -94,7 +94,7 @@ question: flats near supermarket -question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? +question: give me flats in central Oxford with at least one bedroom below 1000 pounds a month? question: find 2 bedroom flats in walking distance from the computer science departement! @@ -122,7 +122,7 @@ question: what is the average price of furnished 1 bedroom apartments in Heddington? -question: how many flats are offered in central Oxford below 1000 GBP a month? +question: how many flats are offered in central Oxford below 1000 pounds a month? question: houses close to The Ashmolean Museum, with more than one bedroom @@ -162,7 +162,7 @@ question: houses with gated entrance or in a gated community -question: recently refurbished houses with lift (access) +question: recently refurbished houses with lift question: house with balcony and vaulted ceiling @@ -170,7 +170,7 @@ question: house in a corner or end-of-terrace plot -question: house with 3 bedrooms, but cheaper than 150000 GBP +question: house with 3 bedrooms, but cheaper than 150000 pounds question: house with at least 2 reception rooms and a garden @@ -186,11 +186,11 @@ question: house with double glazing and central heating -question: house with “no upper chain” (means immediately available) +question: house with immediately available -question: house listed Grade I or Grade II (means “under protection for age …”) +question: house listed Grade I or Grade II -question: house that has recently replumbed +question: house that has been recently replumbed question: house in Witney or Wolvercote @@ -198,4 +198,4 @@ question: house in Banbury Road -question: house in the area of (John) Radcliffe hospital +question: house in the area of John Radcliffe hospital Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -63,9 +63,9 @@ while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); if(question.isEmpty() || question.startsWith("//")) continue; - if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ - question = "Give me all " + question; - } + //if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ + // question = "Give me all " + question; + //} System.out.println("########################################################"); questionCnt++; System.out.println(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-15 14:46:56
|
Revision: 3750 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3750&view=rev Author: christinaunger Date: 2012-06-15 14:46:46 +0000 (Fri, 15 Jun 2012) Log Message: ----------- [tbsl] a few more lexical extensions Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -470,7 +470,7 @@ for (DiscourseReferent dr : cond.getArguments()) { if (dr.getValue().equals(var)) { takeit = true; - for (String f : forbidden) if (f.equals(cond.getPredicate())) takeit= false; + for (String f : forbidden) if (cond.getPredicate().contains(f)) takeit= false; } } if (takeit) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -96,6 +96,7 @@ Pattern whenPattern = Pattern.compile("\\A(when/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern wherePattern = Pattern.compile("\\A(where/WRB\\s(.+\\s)(\\w+)/((V[A-Z]+)|(PASS[A-Z]+)))"); Pattern adjsPattern = Pattern.compile("((\\w+)/JJ.(\\w+)/JJ)"); + Pattern adjnnpPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NNP(S)?)"); Pattern adjnounPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NN(S)?)"); Pattern adjnprepPattern = Pattern.compile("((\\w+)(?<!many)/JJ.(\\w+)/NPREP)"); @@ -217,7 +218,12 @@ while (m.find()) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJ"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJ"); - } + } + m = adjnnpPattern.matcher(condensedstring); + while (m.find()) { + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NNP"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NNP"); + } m = adjnounPattern.matcher(condensedstring); while (m.find()) { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:46:46 UTC (rev 3750) @@ -79,14 +79,16 @@ } // treetoken String treetoken = "N:'" + token.toLowerCase() + "'"; - if (token.trim().contains(" ")) { - String[] tokenParts = token.split(" "); - treetoken = ""; - for (String t : tokenParts) { - treetoken += " N:'" + t.toLowerCase() + "'"; - } - treetoken = treetoken.trim(); + String[] tokenParts; + if (token.trim().contains(" ")) tokenParts = token.split(" "); +// else if (token.contains("_")) tokenParts = token.split("_"); + else { tokenParts = new String[1]; tokenParts[0] = token; } + treetoken = ""; + for (String t : tokenParts) { + treetoken += " N:'" + t.toLowerCase() + "'"; } + treetoken = treetoken.trim(); + // if (pos.equals("NN") || pos.equals("NNS")) { /* DP */ @@ -165,7 +167,7 @@ else if(pos.equals("JJNN") && token.contains("_")) { String[] tokens = token.split("_"); String nntoken = tokens[tokens.length-1]; - String jjtoken = token.replace("SLOT_","").replace(nntoken,"").replace("_"," ").trim(); + String jjtoken = token.replace("SLOT_","").replace(nntoken,"").trim(); String slotfluent = "SLOT_" + tokenfluent + "/CLASS/" + token; String slotnn = "SLOT_" + nntoken + "/CLASS/" + nntoken; // String semantics = "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> " + @@ -302,6 +304,10 @@ String[] wasGerEntry = {token, "(S DP[comp] (VP V:'was' DP[subject] V:'" + token + "'))", "<y,l1,t,[ l1:[ | SLOT_" + token + "(y,z) ] ],[(l2,y,comp,<<e,t>,t>), (l3,z,subject,<<e,t>,t>) ],[ l2=l1, l3=l1 ],[" + symslot + "]>"}; + String[] adjEntry = {token, + "(NP ADJ:'"+token+"' NP*)", + "<x,l1,<e,t>,[ l1:[ | SLOT_description(x,y), regex(y,'"+ token +"')] ],[],[],[ SLOT_description/DATATYPEPROPERTY/description ]>"}; + result.add(adjEntry); result.add(gerEntry); result.add(wasGerEntry); } @@ -331,6 +337,8 @@ result.add(whereEntry1); result.add(whereEntry2); } + + // TODO relative clauses missing! } /* ADJECTIVES */ Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:20:33 UTC (rev 3749) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:46:46 UTC (rev 3750) @@ -2,12 +2,18 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> + near || (NP NP* (PP P:'near' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + nearby || (NP NP* (PP P:'nearby' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_nearby(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_nearby/OBJECTPROPERTY/near ]> + within walking distance from || (NP NP* (PP P:'within' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + within minutes of || (NP NP* (PP P:'within' (NP N:'minutes' P:'of' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + in walking distance from || (NP NP* (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> + at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near ]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jen...@us...> - 2012-06-15 14:20:44
|
Revision: 3749 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3749&view=rev Author: jenslehmann Date: 2012-06-15 14:20:33 +0000 (Fri, 15 Jun 2012) Log Message: ----------- updated eval file Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:14:08 UTC (rev 3748) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:20:33 UTC (rev 3749) @@ -16,12 +16,8 @@ question: houses with more than 1 reception room -question: houses in Didcot furnished to a high standard - question: houses with conservatory room and less than 900000 pounds -question: detached bungalows in Oxfordshire - question: houses in Old Marston question: family houses with more than 2 bathrooms and more than 4 bedrooms @@ -72,11 +68,9 @@ question: houses with double glazed windows -question: houses far from city centre - question: 2 bedroom houses near oxford train station -question: 4 bedroom detached houses in oxford +question: 4 bedroom detached houses in Oxford question: studio apartments in summertown, Oxford @@ -96,54 +90,112 @@ question: houses available from June 15th. -question: houses on rawlinson road +question: houses on Rawlinson Road question: flats near supermarket -question: flats with bill included +question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? +question: find 2 bedroom flats in walking distance from the computer science departement! +question: find a property with 2 bedrooms close to some park. -question: give me all flats in central Oxford with at least one bedroom below 1000 GBP a month. +question: find a flat in the area around Cowley Road with 2 bedrooms. -question: Give me all 2 bedroom flats in walking distance from the computer science departement! +question: find a furnished place with one bedroom close to the Radcliffe Camera. -//question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. +question: find an unfurnished house with at least 2 bedrooms in Summertown. -question: find a property with 2 bedrooms close to some park. +question: find a furnished flat with one bedroom for smokers. -//question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. +question: find a flat with parking in central Oxford. -question: Give me all flats in the area around Cowley Road with 2 bedrooms. +question: find a representative house in Summertown. -question: Give me all furnished places with one bedroom close to the Radcliffe Camera. +question: which places are offered close to the train station? -question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. +question: show me the retirement houses for sale near Oxford. -question: Give me all furnished flats with one bedroom for smokers. +question: give me a flat which is far from the river. -question: Give me all flats with parking in central Oxford. +question: show me a flat which is close to three bars. -question: Give me all cheap places in Cowley. +question: what is the average price of furnished 1 bedroom apartments in Heddington? -question: Give me all representative houses in Summertown. +question: how many flats are offered in central Oxford below 1000 GBP a month? -//question: find a property for sale, with 2 bedrooms, parking, close to shops. +question: houses close to The Ashmolean Museum, with more than one bedroom -//question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement +question: houses in Botley Road -question: Give me all places offered close to the train station? +question: houses in Littlemore -question: Give me all retirement houses for sale near Oxford. +question: houses that cost between 600000 and 800000 pounds -question: Give me all houses that I can BBQ. +question: houses with 3 bedrooms in Florence Park Road -question: Give me all flats which are far from the river. +question: houses with front garden and rear garden -question: Give me all flats which are close to three bars. +question: houses in Oxford, close to Railway Station -question: What is the average price of furnished 1 bedroom apartments in Heddington? +question: houses with ample parking -question: How many flats are offered in central Oxford below 1000 GBP a month? +question: house with electric central heating system -question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? +question: houses with three en-suites near Oxford + +question: houses with countryside views + +question: houses with farmland views + +question: houses nearby River Thames + +question: houses having one utility room or cloakroom + +question: houses in Oxfordshire with fireplaces + +question: houses with open plan kitchen near Oxford + +question: houses with walled garden near Oxford + +question: houses with river views + +question: houses with gated entrance or in a gated community + +question: recently refurbished houses with lift (access) + +question: house with balcony and vaulted ceiling + +question: house that is a maisonette and on Gloucester Green + +question: house in a corner or end-of-terrace plot + +question: house with 3 bedrooms, but cheaper than 150000 GBP + +question: house with at least 2 reception rooms and a garden + +question: house with a courtyard + +question: house with fireplace and a garden that is not communal + +question: house with parking but not Victorian + +question: house with 3 bathrooms, but not en-suite + +question: house in a retirement complex + +question: house with double glazing and central heating + +question: house with “no upper chain” (means immediately available) + +question: house listed Grade I or Grade II (means “under protection for age …”) + +question: house that has recently replumbed + +question: house in Witney or Wolvercote + +question: house with more than 2 bedrooms, but not in Marston + +question: house in Banbury Road + +question: house in the area of (John) Radcliffe hospital This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-15 14:14:14
|
Revision: 3748 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3748&view=rev Author: lorenz_b Date: 2012-06-15 14:14:08 +0000 (Fri, 15 Jun 2012) Log Message: ----------- Set name of eval queries file. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:01:20 UTC (rev 3747) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:14:08 UTC (rev 3748) @@ -52,7 +52,7 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE2))); + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE1))); BufferedWriter out = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + LOG_FILE), false)); int questionCnt = 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-15 14:01:27
|
Revision: 3747 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3747&view=rev Author: lorenz_b Date: 2012-06-15 14:01:20 +0000 (Fri, 15 Jun 2012) Log Message: ----------- Updates for oxford eval. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt Removed Paths: ------------- trunk/components-ext/src/main/resources/tbsl/evaluation.txt Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -123,10 +123,12 @@ private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; - private Set<WeightedQuery> generatedQueries; + private SortedSet<WeightedQuery> generatedQueries; private SPARQLReasoner reasoner; + private String currentlyExecutedQuery; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -277,6 +279,7 @@ learnedSPARQLQueries = new HashMap<String, Object>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); + currentlyExecutedQuery = null; } public void learnSPARQLQueries() throws NoTemplateFoundException{ @@ -317,12 +320,12 @@ } - public Set<WeightedQuery> getGeneratedQueries() { + public SortedSet<WeightedQuery> getGeneratedQueries() { return generatedQueries; } - public Set<WeightedQuery> getGeneratedQueries(int topN) { - Set<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { + SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); int max = Math.min(topN, generatedQueries.size()); for(WeightedQuery wQ : generatedQueries){ topNQueries.add(wQ); @@ -382,7 +385,7 @@ } - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.info("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -399,7 +402,7 @@ slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); Set<Allocation> allocations; @@ -864,6 +867,7 @@ } private boolean executeAskQuery(String query){ + currentlyExecutedQuery = query; QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); for(String uri : endpoint.getDefaultGraphURIs()){ qe.addDefaultGraph(uri); @@ -873,6 +877,7 @@ } private ResultSet executeSelect(String query) { + currentlyExecutedQuery = query; ResultSet rs; if (model == null) { if (cache == null) { @@ -890,6 +895,9 @@ return rs; } + public String getCurrentlyExecutedQuery() { + return currentlyExecutedQuery; + } public int getLearnedPosition() { if(learnedPos >= 0){ Deleted: trunk/components-ext/src/main/resources/tbsl/evaluation.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -1,149 +0,0 @@ -question: houses in Headington - -question: houses in Abingdon with more than 2 bedrooms - -question: houses with garden in Wheatley - -question: detached houses in Oxford - -question: Victorian houses in Oxfordshire - -question: Edwardian house in Oxfordshire for less than 1000000 - -question: houses with double garage - -question: houses with large garden and equipped kitchen - -question: houses with more than 1 reception room - -question: houses in Didcot furnished to a high standard - -question: houses with conservatory room and less than 900000 pounds - -question: detached bungalows in Oxfordshire - -question: houses in Old Marston - -question: family houses with more than 2 bathrooms and more than 4 bedrooms - -question: houses close to Iffley Sport Centre - -question: houses in Oxford close to the train station - -question: houses in Summertown for less than 400000 pounds - -question: two floors houses in East Oxford - -question: brand new houses in Oxford for less than 500000 pounds - -question: houses close to Brookes University - -question: houses in Jericho area - -question: house close to Headington hospitals - -question: modern houses with gas central heating - -question: houses with electric heating - -question: houses less than 500000 within area OX16 - -question: houses close to an Italian restaurant - -question: houses at walking distance from a pharmacy - -question: houses at walking distance from Tesco or Sainsburys shops - -question: houses nearby Sheldonian Theatre - -question: houses with underfloor heating - -question: houses with wood floor - -question: houses close to The King's Arms pub - -question: houses with garden large at least 2 acres - -question: houses with many reception rooms - -question: houses built around 1950 - -question: houses with balcony - -question: houses with double glazed windows - -question: houses far from city centre - -question: 2 bedroom houses near oxford train station - -question: 4 bedroom detached houses in oxford - -question: studio apartments in summertown, Oxford - -question: freehold houses with 2 bedrooms and a living room in banbury - -question: houses in Oxford city centre with at most 2 bedrooms - -question: houses with garage within minutes of Oxford schools and in a quiet road - -question: victorian town houses in north Oxford - -question: terrace houses with west facing garden - -question: modernised end terrace houses with private parking - -question: three bedroom houses with open fireplace - -question: houses available from June 15th. - -question: houses on rawlinson road - -question: flats near supermarket - -question: flats with bill included - - - -question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? - -question: Give me all 2 bedroom flats in walking distance from the computer science departement! - -question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. - -question: find a property with 2 bedrooms close to some park. - -question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. - -question: Give me all flats in the area around Cowley Road with 2 bedrooms. - -question: Give me all furnished places with one bedroom close to the Radcliffe Camera. - -question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. - -question: Give me all furnished flats with one bedroom for smokers. - -question: Give me all flats with parking in central Oxford. - -question: Give me all cheap places in Cowley. - -question: Give me all representative houses in Summertown. - -question: find a property for sale, with 2 bedrooms, parking, close to shops. - -question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement - -question: Give me all places offered close to the train station? - -question: Give me all retirement houses for sale near Oxford. - -question: Give me all houses that I can BBQ. - -question: Give me all flats which are far from the river. - -question: Give me all flats which are close to three bars. - -question: What is the average price of furnished 1 bedroom apartments in Heddington? - -question: How many flats are offered in central Oxford below 1000 GBP a month? - -question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Added: trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -0,0 +1,79 @@ +question: Give me all houses close to The Ashmolean Museum, with more than one bedroom + +question: Give me all houses in Botley Road + +question: Give me all houses in Littlemore + +question: Give me all houses that cost between 600000 and 800000 pounds + +question: Give me all houses with 3 bedrooms in Florence Park Road + +question: Give me all houses with front garden and rear garden + +//question: Give me all houses with 2 bathrooms, 5 bedrooms and at least 1 reception + +question: Give me all houses in Oxford, close to Railway Station + +question: Give me all houses with ample parking + +question: Give me all house with electric central heating system + +question: Give me all houses with three en-suites near Oxford + +question: Give me all houses with countryside views + +question: Give me all houses with farmland views + +question: Give me all houses nearby River Thames + +question: Give me all houses having one utility room or cloakroom + +question: Give me all houses in Oxfordshire with fireplaces + +question: Give me all houses with open plan kitchen near Oxford + +question: Give me all houses with walled garden near Oxford + +question: Give me all houses with river views + +question: Give me all houses with gated entrance or in a gated community + +question: Give me all recently refurbished houses with lift (access) + +question: Give me all houses with balcony and vaulted ceiling + +question: Give me all houses that are a maisonette and on Gloucester Green + +question: Give me all houses in a corner or end-of-terrace plot + +question: Give me all houses with 3 bedrooms, but cheaper than 150000 GBP + +question: Give me all houses with at least 2 reception rooms and a garden + +question: Give me all houses in/with a courtyard + +question: Give me all houses with fireplace and a garden that is not communal + +question: Give me all houses with parking but not Victorian + +//question: Give me all houses with 2-5 bedrooms, a balcony, and a price less than 450000 GBP + +question: Give me all houses with 3 bathrooms, but not en-suite + +question: Give me all houses in a retirement complex + +question: Give me all houses with double glazing and central heating + +question: Give me all houses with “no upper chain” (means immediately available) + +question: Give me all houses listed Grade I or Grade II (means “under protection for age …”) + +question: Give me all houses that have recently replumbed + +question: Give me all houses in Witney or Wolvercote + +question: Give me all houses with more than 2 bedrooms, but not in Marston + +question: Give me all houses in Banbury Road + +question: Give me all houses in the area of (John) Radcliffe hospital \ No newline at end of file Copied: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt (from rev 3746, trunk/components-ext/src/main/resources/tbsl/evaluation.txt) =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -0,0 +1,149 @@ +question: houses in Headington + +question: houses in Abingdon with more than 2 bedrooms + +question: houses with garden in Wheatley + +question: detached houses in Oxford + +question: Victorian houses in Oxfordshire + +question: Edwardian house in Oxfordshire for less than 1000000 + +question: houses with double garage + +question: houses with large garden and equipped kitchen + +question: houses with more than 1 reception room + +question: houses in Didcot furnished to a high standard + +question: houses with conservatory room and less than 900000 pounds + +question: detached bungalows in Oxfordshire + +question: houses in Old Marston + +question: family houses with more than 2 bathrooms and more than 4 bedrooms + +question: houses close to Iffley Sport Centre + +question: houses in Oxford close to the train station + +question: houses in Summertown for less than 400000 pounds + +question: two floors houses in East Oxford + +question: brand new houses in Oxford for less than 500000 pounds + +question: houses close to Brookes University + +question: houses in Jericho area + +question: house close to Headington hospitals + +question: modern houses with gas central heating + +question: houses with electric heating + +question: houses less than 500000 within area OX16 + +question: houses close to an Italian restaurant + +question: houses at walking distance from a pharmacy + +question: houses at walking distance from Tesco or Sainsburys shops + +question: houses nearby Sheldonian Theatre + +question: houses with underfloor heating + +question: houses with wood floor + +question: houses close to The King's Arms pub + +question: houses with garden large at least 2 acres + +question: houses with many reception rooms + +question: houses built around 1950 + +question: houses with balcony + +question: houses with double glazed windows + +question: houses far from city centre + +question: 2 bedroom houses near oxford train station + +question: 4 bedroom detached houses in oxford + +question: studio apartments in summertown, Oxford + +question: freehold houses with 2 bedrooms and a living room in banbury + +question: houses in Oxford city centre with at most 2 bedrooms + +question: houses with garage within minutes of Oxford schools and in a quiet road + +question: victorian town houses in north Oxford + +question: terrace houses with west facing garden + +question: modernised end terrace houses with private parking + +question: three bedroom houses with open fireplace + +question: houses available from June 15th. + +question: houses on rawlinson road + +question: flats near supermarket + +question: flats with bill included + + + +question: give me all flats in central Oxford with at least one bedroom below 1000 GBP a month. + +question: Give me all 2 bedroom flats in walking distance from the computer science departement! + +//question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. + +question: find a property with 2 bedrooms close to some park. + +//question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. + +question: Give me all flats in the area around Cowley Road with 2 bedrooms. + +question: Give me all furnished places with one bedroom close to the Radcliffe Camera. + +question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. + +question: Give me all furnished flats with one bedroom for smokers. + +question: Give me all flats with parking in central Oxford. + +question: Give me all cheap places in Cowley. + +question: Give me all representative houses in Summertown. + +//question: find a property for sale, with 2 bedrooms, parking, close to shops. + +//question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement + +question: Give me all places offered close to the train station? + +question: Give me all retirement houses for sale near Oxford. + +question: Give me all houses that I can BBQ. + +question: Give me all flats which are far from the river. + +question: Give me all flats which are close to three bars. + +question: What is the average price of furnished 1 bedroom apartments in Heddington? + +question: How many flats are offered in central Oxford below 1000 GBP a month? + +question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -10,6 +10,7 @@ import java.util.HashMap; import java.util.Map; +import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SPARQLIndex; @@ -19,13 +20,20 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; +import com.hp.hpl.jena.query.QueryParseException; +import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; + public class OxfordEvaluation { - private static final String QUERIES_FILE = OxfordEvaluation.class.getClassLoader().getResource("tbsl/evaluation.txt").getPath(); + private static final String QUERIES_FILE1 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_queries.txt").getPath(); + private static final String QUERIES_FILE2 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_additional_queries.txt").getPath(); + private static final String LOG_DIRECTORY = "log/oxford/"; + private static final String LOG_FILE = "evaluation.txt"; public static void main(String[] args) throws Exception{ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); ExtractionDBCache cache = new ExtractionDBCache("cache"); + new File(LOG_DIRECTORY).mkdirs(); SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); @@ -44,45 +52,69 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE))); - BufferedWriter out = new BufferedWriter(new FileWriter(new File("log/oxford_eval.txt"))); + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE2))); + BufferedWriter out = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + LOG_FILE), false)); - int questionNr = 0; + int questionCnt = 0; int errorCnt = 0; - int noQueryCnt = 0; + int noTemplateFoundCnt = 0; + int noQueryWithNonEmptyResultSetCnt = 0; String question = null; while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); - if(question.isEmpty()) continue; + if(question.isEmpty() || question.startsWith("//")) continue; if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ question = "Give me all " + question; } System.out.println("########################################################"); - questionNr++; + questionCnt++; System.out.println(question); try { + out.write("****************************************\n"); + out.write("QUESTION: " + question + "\n"); learner.setQuestion(question); learner.learnSPARQLQueries(); String learnedQuery = learner.getBestSPARQLQuery(); if(learnedQuery != null){ question2QueryMap.put(question, learnedQuery); learnedQuestions++; - out.write("****************************************\n" + question + "\n" + learnedQuery + "\n****************************************"); + out.write("ANSWER FOUND: YES\n"); + out.write(learnedQuery + "\n"); } else { - noQueryCnt++; - out.write("****************************************\n" + question + "\nNO QUERY WITH NON-EMPTY RESULTSET FOUND\n****************************************"); + noQueryWithNonEmptyResultSetCnt++; + out.write("ANSWER FOUND: NO\n"); + out.write("REASON: NO SPARQL QUERY WITH NON-EMPTY RESULTSET FOUND\n"); + out.write("SPARQL QUERY WITH HIGHEST SCORE TESTED:\n" + learner.getGeneratedQueries().first()); + } } catch (Exception e) { e.printStackTrace(); + out.write("ANSWER FOUND: NO\n"); + if(e instanceof NoTemplateFoundException){ + noTemplateFoundCnt++; + out.write("REASON: NO TEMPLATE FOUND"); + } else { + errorCnt++; + out.write("REASON: ERROR OCCURED (" + e.getClass() + ")\n"); + if(e instanceof QueryExceptionHTTP || e instanceof QueryParseException){ + out.write("\nLast tested SPARQL query: " + learner.getCurrentlyExecutedQuery()); + } + } + } catch (Error e){ + e.printStackTrace(); + out.write("ANSWER FOUND: NO\n"); errorCnt++; - out.write("****************************************\n" + question + "\nERROR: " + e.getClass() + "\n****************************************"); + out.write("REASON: ERROR OCCURED (" + e.getClass() + ")\n"); } + out.write("\n****************************************"); out.flush(); } - out.write("################################\n"); - out.write("Questions with answer: " + learnedQuestions + "\n"); - out.write("Questions with no answer (and no error): " + noQueryCnt + "\n"); - out.write("Questions with error: " + errorCnt + "\n"); + out.write("\n\n###################SUMMARY################\n"); + out.write("Questions tested:\t" + questionCnt + "\n"); + out.write("Questions with answer:\t" + learnedQuestions + "\n"); + out.write("Questions with no answer (and no error):\t" + noQueryWithNonEmptyResultSetCnt + "\n"); + out.write("Questions with no templates:\t" + noTemplateFoundCnt + "\n"); + out.write("Questions with other errors:\t" + errorCnt + "\n"); in.close(); out.close(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -118,6 +118,8 @@ String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; question = "Give me all Victorian houses in Oxfordshire"; + question = "Give me all Edwardian house in Oxfordshire for less than 1000000."; +// question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 18:41:36
|
Revision: 3746 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3746&view=rev Author: lorenz_b Date: 2012-06-14 18:41:30 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Added mapping entry for gr:description property. Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-14 18:39:50 UTC (rev 3745) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-14 18:41:30 UTC (rev 3746) @@ -1,2 +1,3 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code -http://www.w3.org/2006/vcard/ns#locality|address, location \ No newline at end of file +http://www.w3.org/2006/vcard/ns#locality|address, location +http://purl.org/goodrelations/v1#description|description \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 18:39:56
|
Revision: 3745 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3745&view=rev Author: lorenz_b Date: 2012-06-14 18:39:50 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Changed path of oxford test queries file to project internal file tbsl/evaluation.txt. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 18:36:51 UTC (rev 3744) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 18:39:50 UTC (rev 3745) @@ -21,7 +21,7 @@ public class OxfordEvaluation { - private static final String QUERIES_FILE = "/home/lorenz/evaluation.txt"; + private static final String QUERIES_FILE = OxfordEvaluation.class.getClassLoader().getResource("tbsl/evaluation.txt").getPath(); public static void main(String[] args) throws Exception{ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 18:36:57
|
Revision: 3744 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3744&view=rev Author: lorenz_b Date: 2012-06-14 18:36:51 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Removed own description hack, because now templates are generated for this case. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 18:35:22 UTC (rev 3743) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 18:36:51 UTC (rev 3744) @@ -625,9 +625,8 @@ if(!predicate.isVariable()){//only consider triple where predicate is URI String predicateURI = predicate.getName().replace("<", "").replace(">", ""); if(isDatatypeProperty(predicateURI)){//if data property - String objectVar = triple.getValue().getName(); q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); } } } @@ -635,35 +634,36 @@ } - } else if(slot.getSlotType() == SlotType.CLASS){ - String token = slot.getWords().get(0); - if(slot.getToken().contains("house")){ - String regexToken = token.replace("houses", "").replace("house", "").trim(); - try { - Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); - SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); - if(alloc != null && !alloc.isEmpty()){ - String uri = alloc.first().getUri(); - for(WeightedQuery query : queries){ - Query q = query.getQuery(); - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Term subject = triple.getVariable(); - SPARQL_Term object = new SPARQL_Term("desc"); - object.setIsVariable(true); - object.setIsURI(false); - q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); - } - q.replaceVarWithURI(slot.getAnchor(), uri); - - } - } - } catch (Exception e) { - e.printStackTrace(); - } - } - } + } +// else if(slot.getSlotType() == SlotType.CLASS){ +// String token = slot.getWords().get(0); +// if(slot.getToken().contains("house")){ +// String regexToken = token.replace("houses", "").replace("house", "").trim(); +// try { +// Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); +// SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); +// if(alloc != null && !alloc.isEmpty()){ +// String uri = alloc.first().getUri(); +// for(WeightedQuery query : queries){ +// Query q = query.getQuery(); +// for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ +// SPARQL_Term subject = triple.getVariable(); +// SPARQL_Term object = new SPARQL_Term("desc"); +// object.setIsVariable(true); +// object.setIsURI(false); +// q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); +// q.addFilter(new SPARQL_Filter(new SPARQL_Pair( +// object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); +// } +// q.replaceVarWithURI(slot.getAnchor(), uri); +// +// } +// } +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } +// } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 18:35:28
|
Revision: 3743 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3743&view=rev Author: lorenz_b Date: 2012-06-14 18:35:22 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Added constructor. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-14 17:14:55 UTC (rev 3742) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-14 18:35:22 UTC (rev 3743) @@ -24,9 +24,18 @@ super(name); this.name = name.replace("?","").replace("!",""); isURI = uri; + setIsVariable(true); alias = name; } + public SPARQL_Term(String name, boolean uri, boolean variable) { + super(name); + this.name = name.replace("?","").replace("!",""); + isURI = uri; + alias = name; + setIsVariable(variable); + } + public SPARQL_Term(String name, SPARQL_Aggregate aggregate) { super(name); this.aggregate = aggregate; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-14 17:15:02
|
Revision: 3742 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3742&view=rev Author: christinaunger Date: 2012-06-14 17:14:55 +0000 (Thu, 14 Jun 2012) Log Message: ----------- [tbsl] some more hacks - ?\195?\164h... domain adaptations Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 15:30:01 UTC (rev 3741) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 17:14:55 UTC (rev 3742) @@ -98,7 +98,7 @@ return null; } replaceRegextoken(drs); - // System.out.println("--- DRS (after) : " + drs); // DEBUG +// System.out.println("--- DRS (after) : " + drs); // DEBUG for (DiscourseReferent referent : drs.collectDRs()) { if (referent.isMarked()) { @@ -325,7 +325,8 @@ } } if (regex != null) { - query.addFilter(new SPARQL_Filter(new SPARQL_Pair( + query.addFilter(new SPARQL_Filter( + new SPARQL_Pair( new SPARQL_Term(simple.getArguments().get(0).getValue(),false), new SPARQL_Term("'"+regex+"'",false), SPARQL_PairType.REGEX))); @@ -458,7 +459,7 @@ String var; String newvar; String regex = ""; - String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum"}; + String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum","location","description"}; Set<Simple_DRS_Condition> used = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : cs) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 15:30:01 UTC (rev 3741) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 17:14:55 UTC (rev 3742) @@ -220,8 +220,8 @@ } m = adjnounPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NN"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-14 15:30:01 UTC (rev 3741) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-14 17:14:55 UTC (rev 3742) @@ -68,7 +68,7 @@ // words.addAll(wordnet.getBestSynonyms(token)); } - String tokenfluent = token.replaceAll(" ","").replaceAll("_",""); + String tokenfluent = token.replaceAll(" ","_");//.replaceAll("_",""); String slotX = "x/" + type + "/"; String slotP = "SLOT_" + tokenfluent + "/" + type + "/"; String slotC = "SLOT_" + tokenfluent + "/CLASS/"; @@ -165,23 +165,30 @@ else if(pos.equals("JJNN") && token.contains("_")) { String[] tokens = token.split("_"); String nntoken = tokens[tokens.length-1]; + String jjtoken = token.replace("SLOT_","").replace(nntoken,"").replace("_"," ").trim(); String slotfluent = "SLOT_" + tokenfluent + "/CLASS/" + token; String slotnn = "SLOT_" + nntoken + "/CLASS/" + nntoken; - String semantics = "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> " + - ";; <x,l1,<e,t>,[ l1:[ | SLOT_" + nntoken + "(x)"; - String slots = slotnn; - for (int i=0; i<(tokens.length-1); i++) { - semantics += ", SLOT_" + tokens[i] + "(x)"; - slots += ",SLOT_" + tokens[i] + "/CLASS/" + tokens[i]; - } - semantics += "] ],[],[],[" + slots + "]>"; +// String semantics = "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> " + +// ";; <x,l1,<e,t>,[ l1:[ | SLOT_" + nntoken + "(x)"; +// String slots = slotnn; +// for (int i=0; i<(tokens.length-1); i++) { +// semantics += ", SLOT_" + tokens[i] + "(x)"; +// slots += ",SLOT_" + tokens[i] + "/CLASS/" + tokens[i]; +// } +// semantics += "] ],[],[],[" + slots + "]>"; String[] npEntry = {token, "(NP " + treetoken + " )", - semantics }; -// "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> ;; " + -// "<x,l1,<e,t>,[ l1:[ | SLOT_" + nntoken + "(x), SLOT_" + jjtoken + "(x) ] ],[],[],[" + slotnn + "," + slotjj + "]>"}; + // semantics }; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> ;; " + + "<x,l1,<e,t>,[ l1:[ | SLOT_" + nntoken + "(x), SLOT_description(x,y), regex(y,'"+ jjtoken +"')] ],[],[],[" + slotnn + ",SLOT_description/DATATYPEPROPERTY/description ]>"}; + String[] dpEntry = {token, + "(DP (NP " + treetoken + " ))", + // semantics }; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slotfluent + "]> ;; " + + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + nntoken + "(x), SLOT_description(x,y), regex(y,'"+ jjtoken +"')] ],[],[],[" + slotnn + ",SLOT_description/DATATYPEPROPERTY/description ]>"}; result.add(npEntry); + result.add(dpEntry); } else if (pos.equals("NNSAME")) { String slot = "SLOT_" + token + "/" + type + "/" + token; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 15:30:01 UTC (rev 3741) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 17:14:55 UTC (rev 3742) @@ -6,8 +6,10 @@ since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for more than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-14 15:30:10
|
Revision: 3741 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3741&view=rev Author: christinaunger Date: 2012-06-14 15:30:01 +0000 (Thu, 14 Jun 2012) Log Message: ----------- [tbsl] basic update for regex descriptions, more filtering conditions on templates, and ASK -> SELECT hack Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -1,9 +1,6 @@ package org.dllearner.algorithm.tbsl.converter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import org.dllearner.algorithm.tbsl.sem.drs.Complex_DRS_Condition; import org.dllearner.algorithm.tbsl.sem.drs.DRS; @@ -30,8 +27,8 @@ public class DRS2SPARQL_Converter { - // suppresses console output - private boolean silent = true; + private boolean silent = true; // suppresses console output + private boolean oxford = true; List<Slot> slots; Template template; List<Integer> usedInts; @@ -100,7 +97,8 @@ if (!restructureEmpty(drs)) { return null; } -// System.out.println("--- DRS (after) : " + drs); // DEBUG + replaceRegextoken(drs); + // System.out.println("--- DRS (after) : " + drs); // DEBUG for (DiscourseReferent referent : drs.collectDRs()) { if (referent.isMarked()) { @@ -318,6 +316,21 @@ new SPARQL_Term(simple.getArguments().get(1).getValue(),true), SPARQL_PairType.REGEX))); } + else if (predicate.equals("regextoken")) { + String arg = simple.getArguments().get(1).getValue(); + String regex = null; + for (Slot slot : slots) { + if (slot.getAnchor().equals(arg)) { + if (!slot.getWords().isEmpty()) regex = slot.getWords().get(0); + } + } + if (regex != null) { + query.addFilter(new SPARQL_Filter(new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term("'"+regex+"'",false), + SPARQL_PairType.REGEX))); + } + } else { if (arity == 1) { SPARQL_Term term = new SPARQL_Term(simple.getArguments().get(0).getValue(),false);term.setIsVariable(true); @@ -333,6 +346,45 @@ } } } + + // TODO this is a hack in order to avoid ASK queries if DP is parsed + if (oxford) { + Hashtable<String,Integer> vs = new Hashtable<String,Integer>(); + String v1; String v2; + for (SPARQL_Triple c : query.getConditions()) { + v1 = c.getVariable().toString().replace("?",""); + v2 = c.getValue().toString().replace("?",""); + // is it a slot variable? + boolean v1isSlotVar = false; + boolean v2isSlotVar = false; + for (Slot s : slots) { + if (s.getAnchor().equals(v1)) v1isSlotVar = true; + if (s.getAnchor().equals(v2)) v2isSlotVar = true; + } + if (!v1isSlotVar && !v1.matches("[0..9]+") && !v1.contains("count")) { + if (vs.containsKey(v1)) vs.put(v1,vs.get(v1)+1); + else vs.put(v1,1); + } + if (!v2isSlotVar && !v2.matches("[0..9]+") && !v2.contains("count")) { + if (vs.containsKey(v2)) vs.put(v2,vs.get(v2)+1); + else vs.put(v2,1); + } + } + + int max = 0; String maxvar = null; + for (String var : vs.keySet()) { + if (vs.get(var) > max) { + max = vs.get(var); + maxvar = var; + } + } + if (maxvar != null) { + SPARQL_Term term = new SPARQL_Term(maxvar); + term.setIsVariable(true); + query.addSelTerm(term); + } + } + return query; } @@ -340,41 +392,41 @@ Set<Simple_DRS_Condition> equalsConditions = new HashSet<Simple_DRS_Condition>(); for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { - if(c.getPredicate().equals("equal")) { - equalsConditions.add(c); - } + if(c.getPredicate().equals("equal")) equalsConditions.add(c); } DiscourseReferent firstArg; DiscourseReferent secondArg; boolean firstIsURI; boolean secondIsURI; + boolean firstIsInt; + boolean secondIsInt; for (Simple_DRS_Condition c : equalsConditions) { - firstArg = c.getArguments().get(0); + firstArg = c.getArguments().get(0); secondArg = c.getArguments().get(1); firstIsURI = isUri(firstArg.getValue()); secondIsURI = isUri(secondArg.getValue()); + firstIsInt = firstArg.getValue().matches("[0..9]+"); + secondIsInt = secondArg.getValue().matches("[0..9]+"); - boolean oneArgIsInt = firstArg.toString().matches("[0..9]") || secondArg.toString().matches("[0..9]"); - drs.removeCondition(c); - if (firstIsURI) { - drs.replaceEqualRef(secondArg, firstArg, false); + if (firstIsURI || firstIsInt) { + drs.replaceEqualRef(secondArg, firstArg, true); for (Slot s : slots) { if (s.getAnchor().equals(secondArg.getValue())) { s.setAnchor(firstArg.getValue()); } } - } else if (secondIsURI) { - drs.replaceEqualRef(firstArg, secondArg, false); + } else if (secondIsURI || secondIsInt) { + drs.replaceEqualRef(firstArg, secondArg, true); for (Slot s : slots) { if (s.getAnchor().equals(firstArg.getValue())) { s.setAnchor(secondArg.getValue()); } } - } else if (!oneArgIsInt) { + } else { drs.replaceEqualRef(firstArg, secondArg, false); for (Slot s : slots) { if (s.getAnchor().equals(firstArg.getValue())) { @@ -396,6 +448,55 @@ } } + private void replaceRegextoken(DRS drs) { + + Set<Simple_DRS_Condition> cs = new HashSet<Simple_DRS_Condition>(); + for (Simple_DRS_Condition c : drs.getAllSimpleConditions()) { + if(c.getPredicate().equals("regextoken")) cs.add(c); + } + + String var; + String newvar; + String regex = ""; + String[] forbidden = {"regextoken","regex","count","minimum","maximum","greater","less","greaterorequal","lessorequal","equal","sum"}; + Set<Simple_DRS_Condition> used = new HashSet<Simple_DRS_Condition>(); + + for (Simple_DRS_Condition c : cs) { + var = c.getArguments().get(1).getValue(); + newvar = c.getArguments().get(0).getValue(); + for (Simple_DRS_Condition cond : drs.getAllSimpleConditions()) { + boolean takeit = false; + for (DiscourseReferent dr : cond.getArguments()) { + if (dr.getValue().equals(var)) { + takeit = true; + for (String f : forbidden) if (f.equals(cond.getPredicate())) takeit= false; + } + } + if (takeit) { + regex += cond.getPredicate().replace("SLOT","") + " "; + used.add(cond); + } + else { + for (DiscourseReferent dr : cond.getArguments()) { + if (dr.getValue().equals(var)) dr.setValue(newvar); + } + } + } + if (!regex.isEmpty()) { + c.getArguments().remove(1); + c.getArguments().add(new DiscourseReferent("'"+regex.trim()+"'")); + c.setPredicate("regex"); + } + for (Slot s : slots) { + if (s.getWords().contains(var)) { + s.getWords().remove(var); + s.getWords().add(newvar); + } + } + } + for (Simple_DRS_Condition cond : used) drs.removeCondition(cond); + } + private boolean restructureEmpty(DRS drs) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -18,6 +18,9 @@ static final String[] genericReplacements = { "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; + static final String[] hackReplacements = { " 1 "," one "," 2 "," two "," 3 "," three "," 4 "," four "," 5 "," five "," 6 "," six "," 7 "," seven ", + " 8 "," eight "," 9 "," nine "," 10 "," ten "," 11 "," eleven "," 12 "," twelve "," 13 "," thirteen "," 14 "," fourteen "," 15 "," fifteen ", + " 16 "," sixteen "," 17 "," seventeen "," 18 "," eighteen "," 19 "," nineteen "," 20 "," twenty "}; static boolean USE_NER; static boolean VERBOSE; static NER ner; @@ -49,7 +52,9 @@ replacements.addAll(Arrays.asList(repl)); replacements.addAll(Arrays.asList(englishReplacements)); replacements.addAll(Arrays.asList(genericReplacements)); + replacements.addAll(Arrays.asList(hackReplacements)); + s = s.replaceAll(",\\s"," and "); for (int i = 0; i < replacements.size(); i += 2) { s = s.replaceAll(replacements.get(i), replacements.get(i + 1)); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -33,7 +33,28 @@ public Template checkandrefine() { Set<Slot> argslots = new HashSet<Slot>(); - for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot); + for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) { + String var = slot.words.get(0); + // check for clash (v=LITERAL && v=RESOURCE) + for (Slot s : argslots) { + if (s.words.get(0).equals(slot.words.get(0)) && !s.type.equals(slot.type)) + return null; + } + // check for clash (v=LITERAL && p(...,v)=OBJECTPROPERTY) || (v=RESOURCE && p(...,v)=DATATYPEPROPERTY) + SlotType clashing = null; + if (slot.type.equals(SlotType.LITERAL)) clashing = SlotType.OBJECTPROPERTY; + else if (slot.type.equals(SlotType.RESOURCE)) clashing = SlotType.DATATYPEPROPERTY; + for (Slot s : slots) { + if (clashing != null && s.type.equals(clashing)) { + for (SPARQL_Triple triple : query.conditions) { + if (triple.property.toString().equals("?"+s.anchor)) { + if (triple.value.toString().equals("?"+var)) return null; + } + } + } + } + argslots.add(slot); + } for (Slot slot : slots) { // check for clashes Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-14 15:30:01 UTC (rev 3741) @@ -32,9 +32,9 @@ public class Templator { private static final Logger logger = Logger.getLogger(Templator.class); + + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; - private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; @@ -49,8 +49,8 @@ WordNet wordnet; LingPipeLemmatizer lem = new LingPipeLemmatizer(); - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); + DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); + DRS2SPARQL_Converter d2s = new DRS2SPARQL_Converter(); boolean ONE_SCOPE_ONLY = true; boolean UNTAGGED_INPUT = true; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-14 15:30:01 UTC (rev 3741) @@ -172,6 +172,16 @@ eight || (NP NUM:'eight' NP*) || <x,l1,<e,t>,[l1:[x|count(x,8)]],[],[],[ SLOT_arg/RESOURCE/x ]> nine || (NP NUM:'nine' NP*) || <x,l1,<e,t>,[l1:[x|count(x,9)]],[],[],[ SLOT_arg/RESOURCE/x ]> ten || (NP NUM:'ten' NP*) || <x,l1,<e,t>,[l1:[x|count(x,10)]],[],[],[ SLOT_arg/RESOURCE/x ]> + eleven || (NP NUM:'one' NP*) || <x,l1,<e,t>,[l1:[x|count(x,11)]],[],[],[ SLOT_arg/RESOURCE/x ]> + twelve || (NP NUM:'two' NP*) || <x,l1,<e,t>,[l1:[x|count(x,12)]],[],[],[ SLOT_arg/RESOURCE/x ]> + thirteen || (NP NUM:'thirteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,13)]],[],[],[ SLOT_arg/RESOURCE/x ]> + fourteen || (NP NUM:'fourteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,14)]],[],[],[ SLOT_arg/RESOURCE/x ]> + fifteen || (NP NUM:'fifteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,15)]],[],[],[ SLOT_arg/RESOURCE/x ]> + sixteen || (NP NUM:'sixteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,16)]],[],[],[ SLOT_arg/RESOURCE/x ]> + seventeen || (NP NUM:'seventeen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,17)]],[],[],[ SLOT_arg/RESOURCE/x ]> + eighteen || (NP NUM:'eighteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,18)]],[],[],[ SLOT_arg/RESOURCE/x ]> + nineteen || (NP NUM:'nineteen' NP*) || <x,l1,<e,t>,[l1:[x|count(x,19)]],[],[],[ SLOT_arg/RESOURCE/x ]> + twenty || (NP NUM:'twenty' NP*) || <x,l1,<e,t>,[l1:[x|count(x,20)]],[],[],[ SLOT_arg/RESOURCE/x ]> one || (NUM NUM:'one') || <x,l1,e,[l1:[x|equal(x,1)]],[],[],[ SLOT_arg/LITERAL/x ]> two || (NUM NUM:'two') || <x,l1,e,[l1:[x|equal(x,2)]],[],[],[ SLOT_arg/LITERAL/x ]> @@ -183,4 +193,13 @@ eight || (NUM NUM:'eight') || <x,l1,e,[l1:[x|equal(x,8)]],[],[],[ SLOT_arg/LITERAL/x ]> nine || (NUM NUM:'nine') || <x,l1,e,[l1:[x|equal(x,9)]],[],[],[ SLOT_arg/LITERAL/x ]> ten || (NUM NUM:'ten') || <x,l1,e,[l1:[x|equal(x,10)]],[],[],[ SLOT_arg/LITERAL/x ]> - + eleven || (NUM NUM:'eleven') || <x,l1,e,[l1:[x|equal(x,11)]],[],[],[ SLOT_arg/LITERAL/x ]> + twelve || (NUM NUM:'twelve') || <x,l1,e,[l1:[x|equal(x,12)]],[],[],[ SLOT_arg/LITERAL/x ]> + thirteen || (NUM NUM:'thirteen') || <x,l1,e,[l1:[x|equal(x,13)]],[],[],[ SLOT_arg/LITERAL/x ]> + fourteen || (NUM NUM:'fourteen') || <x,l1,e,[l1:[x|equal(x,14)]],[],[],[ SLOT_arg/LITERAL/x ]> + fifteen || (NUM NUM:'fifteen') || <x,l1,e,[l1:[x|equal(x,15)]],[],[],[ SLOT_arg/LITERAL/x ]> + sixteen || (NUM NUM:'sixteen') || <x,l1,e,[l1:[x|equal(x,16)]],[],[],[ SLOT_arg/LITERAL/x ]> + seventeen || (NUM NUM:'seventeen') || <x,l1,e,[l1:[x|equal(x,17)]],[],[],[ SLOT_arg/LITERAL/x ]> + eighteen || (NUM NUM:'eighteen') || <x,l1,e,[l1:[x|equal(x,18)]],[],[],[ SLOT_arg/LITERAL/x ]> + nineteen || (NUM NUM:'nineteen') || <x,l1,e,[l1:[x|equal(x,19)]],[],[],[ SLOT_arg/LITERAL/x ]> + twenty || (NUM NUM:'twenty') || <x,l1,e,[l1:[x|equal(x,20)]],[],[],[ SLOT_arg/LITERAL/x ]> Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 14:54:54 UTC (rev 3740) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 15:30:01 UTC (rev 3741) @@ -10,7 +10,7 @@ for less than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> // MONTHS @@ -26,4 +26,4 @@ september || (DP DP:'september') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,9) ]], [],[],[]> october || (DP DP:'october') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,10) ]], [],[],[]> november || (DP DP:'november') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,11) ]], [],[],[]> - december || (DP DP:'december') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,12) ]], [],[],[]> \ No newline at end of file + december || (DP DP:'december') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,12) ]], [],[],[]> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 14:55:06
|
Revision: 3740 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3740&view=rev Author: lorenz_b Date: 2012-06-14 14:54:54 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Tried some to answer some more questions. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java trunk/components-ext/src/main/resources/tbsl/evaluation.txt Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -34,9 +34,11 @@ import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -51,6 +53,9 @@ import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; +import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; @@ -150,8 +155,13 @@ setOptions(options); if(propertiesIndex instanceof SPARQLPropertiesIndex){ - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } } else { datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; @@ -186,8 +196,13 @@ setOptions(options); if(propertiesIndex instanceof SPARQLPropertiesIndex){ - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } } else { datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; @@ -419,7 +434,9 @@ } } + executor.shutdown(); + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ @@ -559,9 +576,30 @@ } }*/ - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } WeightedQuery w = new WeightedQuery(q); double newScore = query.getScore() + a.getScore(); w.setScore(newScore); @@ -581,14 +619,50 @@ for(WeightedQuery query : queries){ Query q = query.getQuery(); for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + String objectVar = triple.getValue().getName(); + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } + } + } } } + } else if(slot.getSlotType() == SlotType.CLASS){ + String token = slot.getWords().get(0); + if(slot.getToken().contains("house")){ + String regexToken = token.replace("houses", "").replace("house", "").trim(); + try { + Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); + SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); + if(alloc != null && !alloc.isEmpty()){ + String uri = alloc.first().getUri(); + for(WeightedQuery query : queries){ + Query q = query.getQuery(); + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Term subject = triple.getVariable(); + SPARQL_Term object = new SPARQL_Term("desc"); + object.setIsVariable(true); + object.setIsURI(false); + q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); + } + q.replaceVarWithURI(slot.getAnchor(), uri); + + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } } @@ -733,7 +807,10 @@ if(queryType == SPARQL_QueryType.SELECT){ for(String query : queries){ logger.info("Testing query:\n" + query); - ResultSet rs = executeSelect(query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString());//executeSelect(query); + List<String> results = new ArrayList<String>(); QuerySolution qs; String projectionVar; @@ -927,9 +1004,16 @@ } - private boolean isDatatypePropeprty(String uri){ - String query = "ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty>}."; - return executeAskQuery(query); + private boolean isDatatypeProperty(String uri){ + Boolean isDatatypeProperty = null; + if(mappingIndex != null){ + isDatatypeProperty = mappingIndex.isDataProperty(uri); + } + if(isDatatypeProperty == null){ + String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri); + isDatatypeProperty = executeAskQuery(query); + } + return isDatatypeProperty; } /** Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -378,6 +378,8 @@ object.setIsVariable(false); if(object instanceof SPARQL_Term){ ((SPARQL_Term) object).setIsURI(true); + } else if(object instanceof SPARQL_Property){ + ((SPARQL_Property) object).setIsVariable(false); } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -6,12 +6,12 @@ { private static final long serialVersionUID = -1255754209857823420L; - public SPARQL_Term a; + public SPARQL_Value a; public Object b; public SPARQL_PairType type; - public SPARQL_Pair(SPARQL_Term a, Object b, SPARQL_PairType type) + public SPARQL_Pair(SPARQL_Value a, Object b, SPARQL_PairType type) { super(); this.a = a; @@ -19,7 +19,7 @@ this.type = type; } - public SPARQL_Pair(SPARQL_Term a, SPARQL_PairType type) + public SPARQL_Pair(SPARQL_Value a, SPARQL_PairType type) { super(); this.a = a; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -103,7 +103,7 @@ if (isString()) { return name.replaceAll("_"," "); } - else if (isURI) { + else if (isURI || !isVariable()) { return name; } else return "?"+name.toLowerCase(); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -127,7 +127,7 @@ return irs; } - private ResultSet executeSelect(String query){//System.out.println(query); + private ResultSet executeSelect(String query){System.out.println(query); ResultSet rs; if(model == null){ if(cache == null){ Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,38 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoClassesIndex extends SPARQLIndex{ + + public VirtuosoClassesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoClassesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + public VirtuosoClassesIndex(Model model) { + super(model); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?s a ?uri.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + + "?s a ?uri.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,41 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoDatatypePropertiesIndex extends SPARQLPropertiesIndex{ + + public VirtuosoDatatypePropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoDatatypePropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoDatatypePropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,42 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoObjectPropertiesIndex extends SPARQLPropertiesIndex{ + + public VirtuosoObjectPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoObjectPropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoObjectPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label." + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,46 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoPropertiesIndex extends SPARQLIndex{ + + public VirtuosoPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoPropertiesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + public VirtuosoPropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?uri ?label WHERE {\n" + + "?s ?uri ?o.\n" + +// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,43 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoResourcesIndex extends SPARQLIndex{ + + public VirtuosoResourcesIndex(SparqlEndpoint endpoint) { + this(endpoint, null); + } + + public VirtuosoResourcesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoResourcesIndex(VirtuosoResourcesIndex index) { + super(index); + } + + public VirtuosoResourcesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/resources/tbsl/evaluation.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,149 @@ +question: houses in Headington + +question: houses in Abingdon with more than 2 bedrooms + +question: houses with garden in Wheatley + +question: detached houses in Oxford + +question: Victorian houses in Oxfordshire + +question: Edwardian house in Oxfordshire for less than 1000000 + +question: houses with double garage + +question: houses with large garden and equipped kitchen + +question: houses with more than 1 reception room + +question: houses in Didcot furnished to a high standard + +question: houses with conservatory room and less than 900000 pounds + +question: detached bungalows in Oxfordshire + +question: houses in Old Marston + +question: family houses with more than 2 bathrooms and more than 4 bedrooms + +question: houses close to Iffley Sport Centre + +question: houses in Oxford close to the train station + +question: houses in Summertown for less than 400000 pounds + +question: two floors houses in East Oxford + +question: brand new houses in Oxford for less than 500000 pounds + +question: houses close to Brookes University + +question: houses in Jericho area + +question: house close to Headington hospitals + +question: modern houses with gas central heating + +question: houses with electric heating + +question: houses less than 500000 within area OX16 + +question: houses close to an Italian restaurant + +question: houses at walking distance from a pharmacy + +question: houses at walking distance from Tesco or Sainsburys shops + +question: houses nearby Sheldonian Theatre + +question: houses with underfloor heating + +question: houses with wood floor + +question: houses close to The King's Arms pub + +question: houses with garden large at least 2 acres + +question: houses with many reception rooms + +question: houses built around 1950 + +question: houses with balcony + +question: houses with double glazed windows + +question: houses far from city centre + +question: 2 bedroom houses near oxford train station + +question: 4 bedroom detached houses in oxford + +question: studio apartments in summertown, Oxford + +question: freehold houses with 2 bedrooms and a living room in banbury + +question: houses in Oxford city centre with at most 2 bedrooms + +question: houses with garage within minutes of Oxford schools and in a quiet road + +question: victorian town houses in north Oxford + +question: terrace houses with west facing garden + +question: modernised end terrace houses with private parking + +question: three bedroom houses with open fireplace + +question: houses available from June 15th. + +question: houses on rawlinson road + +question: flats near supermarket + +question: flats with bill included + + + +question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? + +question: Give me all 2 bedroom flats in walking distance from the computer science departement! + +question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. + +question: find a property with 2 bedrooms close to some park. + +question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. + +question: Give me all flats in the area around Cowley Road with 2 bedrooms. + +question: Give me all furnished places with one bedroom close to the Radcliffe Camera. + +question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. + +question: Give me all furnished flats with one bedroom for smokers. + +question: Give me all flats with parking in central Oxford. + +question: Give me all cheap places in Cowley. + +question: Give me all representative houses in Summertown. + +question: find a property for sale, with 2 bedrooms, parking, close to shops. + +question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement + +question: Give me all places offered close to the train station? + +question: Give me all retirement houses for sale near Oxford. + +question: Give me all houses that I can BBQ. + +question: Give me all flats which are far from the river. + +question: Give me all flats which are close to three bars. + +question: What is the average price of furnished 1 bedroom apartments in Heddington? + +question: How many flats are offered in central Oxford below 1000 GBP a month? + +question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 14:54:54 UTC (rev 3740) @@ -2,7 +2,7 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> - in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address ]> + in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> Modified: trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1 @@ +http://dbpedia.org/property/near|close, near \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -1,20 +1,21 @@ package org.dllearner.algorithm.tbsl; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; +import java.io.FileWriter; import java.net.URL; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Map.Entry; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; -import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -26,14 +27,15 @@ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); ExtractionDBCache cache = new ExtractionDBCache("cache"); - SPARQLIndex resourcesIndex = new SPARQLIndex(endpoint, cache); - SPARQLIndex classesIndex = new SPARQLClassesIndex(endpoint, cache); - SPARQLIndex propertiesIndex = new SPARQLPropertiesIndex(endpoint, cache); + SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); MappingBasedIndex mappingIndex= new MappingBasedIndex( OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath(), - OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath()); + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); @@ -42,11 +44,14 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader br = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + BufferedWriter out = new BufferedWriter(new FileWriter(new File("log/oxford_eval.txt"))); int questionNr = 0; + int errorCnt = 0; + int noQueryCnt = 0; String question = null; - while((question = br.readLine()) != null){ + while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); if(question.isEmpty()) continue; if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ @@ -62,17 +67,27 @@ if(learnedQuery != null){ question2QueryMap.put(question, learnedQuery); learnedQuestions++; + out.write("****************************************\n" + question + "\n" + learnedQuery + "\n****************************************"); + } else { + noQueryCnt++; + out.write("****************************************\n" + question + "\nNO QUERY WITH NON-EMPTY RESULTSET FOUND\n****************************************"); } } catch (Exception e) { e.printStackTrace(); + errorCnt++; + out.write("****************************************\n" + question + "\nERROR: " + e.getClass() + "\n****************************************"); } + out.flush(); } - System.out.println("Generated SPARQL queries for " + learnedQuestions + " questions."); - for(Entry<String, String> entry : question2QueryMap.entrySet()){ - System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); - System.out.println(entry.getKey()); - System.out.println(entry.getValue()); - } + out.write("################################\n"); + out.write("Questions with answer: " + learnedQuestions + "\n"); + out.write("Questions with no answer (and no error): " + noQueryCnt + "\n"); + out.write("Questions with error: " + errorCnt + "\n"); + + in.close(); + out.close(); + + } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -10,10 +10,15 @@ import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.junit.Test; @@ -34,24 +39,24 @@ protected void setUp() throws Exception { super.setUp(); endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - model = ModelFactory.createOntologyModel(); - File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); - try { - for(File f : dir.listFiles()){ - if(f.isFile()){ - System.out.println("Loading file " + f.getName()); - try { - model.read(new FileInputStream(f), null, "TURTLE"); - } catch (Exception e) { - System.err.println("Parsing failed."); - e.printStackTrace(); - } - } - } - model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } +// model = ModelFactory.createOntologyModel(); +// File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); +// try { +// for(File f : dir.listFiles()){ +// if(f.isFile()){ +// System.out.println("Loading file " + f.getName()); +// try { +// model.read(new FileInputStream(f), null, "TURTLE"); +// } catch (Exception e) { +// System.err.println("Parsing failed."); +// e.printStackTrace(); +// } +// } +// } +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); +// } catch (FileNotFoundException e) { +// e.printStackTrace(); +// } } @Test @@ -95,17 +100,24 @@ @Test public void testOxfordRemote() throws Exception{ + ExtractionDBCache cache = new ExtractionDBCache("cache"); + SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + MappingBasedIndex mappingIndex= new MappingBasedIndex( + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); - Index resourcesIndex = new SPARQLIndex(endpoint); - Index classesIndex = new SPARQLClassesIndex(endpoint); - Index propertiesIndex = new SPARQLPropertiesIndex(endpoint); - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + learner.setMappingIndex(mappingIndex); learner.init(); String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; - question = "Give me all houses with large garden and equipped kitchen"; + question = "Give me all Victorian houses in Oxfordshire"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-13 20:51:12
|
Revision: 3739 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3739&view=rev Author: lorenz_b Date: 2012-06-13 20:51:06 +0000 (Wed, 13 Jun 2012) Log Message: ----------- Added hack for REGEX filter when resource slot is empty and predicate is dataproperty - has to be tested. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 14:56:37 UTC (rev 3738) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 20:51:06 UTC (rev 3739) @@ -31,7 +31,12 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -452,6 +457,11 @@ } } for(Slot slot : t.getSlots()){ + if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ + sortedSlots.add(slot); + } + } + for(Slot slot : t.getSlots()){ if(!sortedSlots.contains(slot)){ sortedSlots.add(slot); } @@ -566,6 +576,22 @@ queries.clear(); queries.addAll(tmp);//System.out.println(tmp); tmp.clear(); + } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property + if(slot.getSlotType() == SlotType.RESOURCE){ + for(WeightedQuery query : queries){ + Query q = query.getQuery(); + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + String objectVar = triple.getValue().getName(); + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + + } + + } + + } + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-13 14:56:49
|
Revision: 3738 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3738&view=rev Author: lorenz_b Date: 2012-06-13 14:56:37 +0000 (Wed, 13 Jun 2012) Log Message: ----------- Integration of manual mappings. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_resource_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -14,7 +14,7 @@ public class TestFrontend { // MODE ::= BASIC | LEIPZIG - static String MODE = "BASIC"; + static String MODE = "LEIPZIG"; public static void main(String[] args) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -41,6 +41,7 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; +import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -96,6 +97,8 @@ private Index datatypePropertiesIndex; private Index objectPropertiesIndex; + private MappingBasedIndex mappingIndex; + private Templator templateGenerator; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; @@ -192,6 +195,10 @@ lemmatizer = new LingPipeLemmatizer(); } + public void setMappingIndex(MappingBasedIndex mappingIndex) { + this.mappingIndex = mappingIndex; + } + /* * Only for Evaluation useful. */ @@ -369,6 +376,7 @@ } } }); + slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); @@ -385,11 +393,13 @@ long startTime = System.currentTimeMillis(); for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){ + if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); - } + } else { + System.out.println("CACHE HIT"); + } } for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { @@ -840,27 +850,42 @@ IndexResultSet rs; for(String word : slot.getWords()){ + rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); + } + } if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 50); + rs.add(index.getResourcesWithScores(word, 50)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); } - rs = index.getResourcesWithScores(word, 20); + rs.add(index.getResourcesWithScores(word, 20)); } for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } +// //get the labels of the redirects and compute the highest similarity +// if(slot.getSlotType() == SlotType.RESOURCE){ +// Set<String> labels = getRedirectLabels(item.getUri()); +// for(String label : labels){ +// double tmp = Similarity.getSimilarity(word, label); +// if(tmp > similarity){ +// similarity = tmp; +// } +// } +// } double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } @@ -876,6 +901,11 @@ } + private boolean isDatatypePropeprty(String uri){ + String query = "ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty>}."; + return executeAskQuery(query); + } + /** * @param args * @throws NoTemplateFoundException Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -103,7 +103,8 @@ return new Slot(anchor,type,newWords); } - @Override + + /*@Override public int hashCode() { final int prime = 31; int result = 1; @@ -143,7 +144,31 @@ } else if (!words.equals(other.words)) return false; return true; + }*/ + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Slot other = (Slot) obj; + if(other.type == type && other.token == token){ + return true; + } + return false; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((token == null) ? 0 : token.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,211 @@ +package org.dllearner.common.index; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +public class MappingBasedIndex { + + private Map<String, List<String>> classUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> resourceUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> datatypePropertyUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> objectPropertyUri2TokensMap = new HashMap<String, List<String>>(); + + public MappingBasedIndex(String classMappingsFile, String resourceMappingsFile, + String dataPropertyMappingsFile, String objectPropertyMappingsFile) { + BufferedReader br = null; + String line = null; + try { + //load class mappings + if(classMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(classMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + classUri2TokensMap.put(uri, tokens); + } + } + + //load resource mappings + if(resourceMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(resourceMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + resourceUri2TokensMap.put(uri, tokens); + } + } + + //load object property mappings + if(objectPropertyMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(objectPropertyMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + objectPropertyUri2TokensMap.put(uri, tokens); + } + } + + //load datatype property mappings + if(dataPropertyMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(dataPropertyMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + datatypePropertyUri2TokensMap.put(uri, tokens); + } + } + + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public List<String> getClasses(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : classUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getResources(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : resourceUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getObjectProperties(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : objectPropertyUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getDatatypeProperties(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : datatypePropertyUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getProperties(String token){ + List<String> uris = new ArrayList<String>(); + uris.addAll(getObjectProperties(token)); + uris.addAll(getDatatypeProperties(token)); + return uris; + } + + public IndexResultSet getClassesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getClasses(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getResourcesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getResources(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getObjectPropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getObjectProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getDatatypePropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getDatatypeProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getPropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public Boolean isDataProperty(String uri){ + if(datatypePropertyUri2TokensMap.containsKey(uri)) { + return true; + } else if(objectPropertyUri2TokensMap.containsKey(uri)){ + return false; + } + return null; + } + + public static void main(String[] args) { + MappingBasedIndex index = new MappingBasedIndex(MappingBasedIndex.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), null, null, null); + System.out.println(index.getClasses("flat")); + } + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import com.hp.hpl.jena.rdf.model.Model; @@ -8,23 +9,20 @@ public SPARQLClassesIndex(SparqlEndpoint endpoint) { super(endpoint); - - super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + - "?s a ?uri.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + - "LIMIT %d OFFSET %d"; - - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s a ?uri.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + - "LIMIT %d OFFSET %d"; + init(); } + public SPARQLClassesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + public SPARQLClassesIndex(Model model) { super(model); - + init(); + } + + private void init(){ super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s a ?uri.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -60,6 +60,10 @@ this.cache = cache; } + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + @Override public List<String> getResources(String searchTerm) { return getResources(searchTerm, DEFAULT_LIMIT); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import com.hp.hpl.jena.rdf.model.Model; @@ -11,6 +12,11 @@ init(); } + public SPARQLPropertiesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + public SPARQLPropertiesIndex(Model model) { super(model); init(); @@ -28,7 +34,7 @@ "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?uri ?label WHERE {\n" + "?s ?uri ?o.\n" + // "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 14:56:37 UTC (rev 3738) @@ -2,7 +2,7 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> - in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code ]> + in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> Added: trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1 @@ +http://diadem.cs.ox.ac.uk/ontologies/real-estate#House|house, houses, flat, flats, appartement, appartements \ No newline at end of file Added: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,2 @@ +http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code +http://www.w3.org/2006/vcard/ns#locality|address, location \ No newline at end of file Added: trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt =================================================================== Added: trunk/components-ext/src/main/resources/tbsl/oxford_resource_mappings.txt =================================================================== Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,78 @@ +package org.dllearner.algorithm.tbsl; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SPARQLClassesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +public class OxfordEvaluation { + + private static final String QUERIES_FILE = "/home/lorenz/evaluation.txt"; + + public static void main(String[] args) throws Exception{ + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + ExtractionDBCache cache = new ExtractionDBCache("cache"); + + SPARQLIndex resourcesIndex = new SPARQLIndex(endpoint, cache); + SPARQLIndex classesIndex = new SPARQLClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new SPARQLPropertiesIndex(endpoint, cache); + MappingBasedIndex mappingIndex= new MappingBasedIndex( + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath()); + + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + learner.setMappingIndex(mappingIndex); + learner.init(); + + int learnedQuestions = 0; + Map<String, String> question2QueryMap = new HashMap<String, String>(); + + BufferedReader br = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + + int questionNr = 0; + String question = null; + while((question = br.readLine()) != null){ + question = question.replace("question:", "").trim(); + if(question.isEmpty()) continue; + if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ + question = "Give me all " + question; + } + System.out.println("########################################################"); + questionNr++; + System.out.println(question); + try { + learner.setQuestion(question); + learner.learnSPARQLQueries(); + String learnedQuery = learner.getBestSPARQLQuery(); + if(learnedQuery != null){ + question2QueryMap.put(question, learnedQuery); + learnedQuestions++; + } + } catch (Exception e) { + e.printStackTrace(); + } + } + System.out.println("Generated SPARQL queries for " + learnedQuestions + " questions."); + for(Entry<String, String> entry : question2QueryMap.entrySet()){ + System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); + System.out.println(entry.getKey()); + System.out.println(entry.getValue()); + } + } + +} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -44,6 +44,7 @@ model.read(new FileInputStream(f), null, "TURTLE"); } catch (Exception e) { System.err.println("Parsing failed."); + e.printStackTrace(); } } } @@ -83,7 +84,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(model, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - String question = "Give me all houses with more than 2 bedrooms."; + String question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; learner.setQuestion(question); learner.learnSPARQLQueries(); @@ -104,6 +105,7 @@ String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; + question = "Give me all houses with large garden and equipped kitchen"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-13 10:32:01
|
Revision: 3737 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3737&view=rev Author: christinaunger Date: 2012-06-13 10:31:55 +0000 (Wed, 13 Jun 2012) Log Message: ----------- [tbsl] "with ... and ..." (jetzt aber!) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-06-12 17:24:58 UTC (rev 3736) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-06-13 10:31:55 UTC (rev 3737) @@ -1,9 +1,6 @@ package org.dllearner.algorithm.tbsl.ltag.parser; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; +import java.util.*; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.ltag.data.Category; @@ -129,20 +126,20 @@ * input token matches the regular expression "\d", a new * auxiliary tree is added to the parseGrammar. */ - - try { - TreeNode tree = c.construct("NUM:'" + token + "'"); - TreeNode tree2 = c.construct("(NP NUM:'" + token + "' NP*)"); - + Set<Pair<String,String>> ps = new HashSet<Pair<String,String>>(); + ps.add(new Pair<String,String>("NUM:'" + token + "'","<x,l1,e,[l1:[ x | equal(x," + token + ")]],[],[],[ SLOT_arg/LITERAL/x ]>")); + ps.add(new Pair<String,String>("(NP NUM:'" + token + "' NP*)","<x,l1,<e,t>,[l1:[ | count(x," + token + ")]],[],[],[ SLOT_arg/RESOURCE/x ]>")); + + for (Pair<String,String> p : ps) { + + try { + TreeNode tree = c.construct(p.getFirst()); + int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree), - Collections.singletonList("<x,l1,e,[l1:[ x | equal(x," + token + ")]],[],[],[ SLOT_arg/LITERAL/x ]>")); + Collections.singletonList(p.getSecond())); add(parseG, tree, gid-1, localID); + temps.add(gid-1); localID++; - -// int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree2), -// Collections.singletonList("<x,l1,<e,t>,[l1:[ | count(x," + token + ")]],[],[],[ SLOT_arg/RESOURCE/x ]>")); -// add(parseG, tree2, gid-1, localID); -// localID++; foundCandidates = true; coveredTokens.add(token); @@ -150,6 +147,7 @@ } catch (ParseException e) { e.printStackTrace(); } + } } else { /* Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-12 17:24:58 UTC (rev 3736) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 10:31:55 UTC (rev 3737) @@ -11,8 +11,6 @@ from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> - with || (NP NP* (PP P:'with' NUM[num] DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y), count(y,z) ] ], [ (l2,y,dp,<<e,t>,t>),(l3,z,num,e) ], [ l2=l1,l3=l1 ],[ SLOT_arg/RESOURCE/y ]> - with || (NP NP* (PP P:'with' NUM[num] DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y), equal(y,z) ] ], [ (l2,y,dp,<<e,t>,t>),(l3,z,num,e) ], [ l2=l1,l3=l1 ],[ SLOT_arg/LITERAL/y ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> // MONTHS This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-12 17:25:04
|
Revision: 3736 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3736&view=rev Author: christinaunger Date: 2012-06-12 17:24:58 +0000 (Tue, 12 Jun 2012) Log Message: ----------- [tbsl] corrected templates for "with ... and ..." Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-12 13:20:43 UTC (rev 3735) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-12 17:24:58 UTC (rev 3736) @@ -427,7 +427,7 @@ sc.setArguments(newargs); success = true; globalsuccess = true; - break loop; +// break loop; } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-12 13:20:51
|
Revision: 3735 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3735&view=rev Author: lorenz_b Date: 2012-06-12 13:20:43 +0000 (Tue, 12 Jun 2012) Log Message: ----------- Added local-model based test case for Oxford example. Modified Paths: -------------- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-12 13:19:03 UTC (rev 3734) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-12 13:20:43 UTC (rev 3735) @@ -35,9 +35,19 @@ super.setUp(); endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); model = ModelFactory.createOntologyModel(); + File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); try { + for(File f : dir.listFiles()){ + if(f.isFile()){ + System.out.println("Loading file " + f.getName()); + try { + model.read(new FileInputStream(f), null, "TURTLE"); + } catch (Exception e) { + System.err.println("Parsing failed."); + } + } + } model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); - model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data/wwagency-letting-triple.ttl")), "http://diadem.cs.ox.ac.uk/ontologies/real-estate#", "TURTLE"); } catch (FileNotFoundException e) { e.printStackTrace(); } @@ -88,12 +98,12 @@ Index resourcesIndex = new SPARQLIndex(endpoint); Index classesIndex = new SPARQLClassesIndex(endpoint); Index propertiesIndex = new SPARQLPropertiesIndex(endpoint); - System.out.println(propertiesIndex.getResources("near")); SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); String question = "Give me all houses near a school."; + question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-12 13:19:15
|
Revision: 3734 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3734&view=rev Author: lorenz_b Date: 2012-06-12 13:19:03 +0000 (Tue, 12 Jun 2012) Log Message: ----------- Some modification to deal with new template generation. Added separate index classes for object and datatype properties. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -4,7 +4,6 @@ import java.io.IOException; import java.net.URL; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -32,8 +31,6 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.RatedQuery; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; @@ -43,12 +40,13 @@ import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; -import org.dllearner.common.index.IndexResultItemComparator; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLObjectPropertiesIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; -import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -71,22 +69,14 @@ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - //for debugging - List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", - "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); - enum Ranking{ LUCENE, SIMILARITY, NONE } - private static final String OPTIONS_FILE = SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/tbsl.properties").getPath(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - private static final int MAX_URIS_PER_SLOT = 10; - - private Ranking ranking; private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; private int maxTestedQueriesPerTemplate = 50; @@ -103,6 +93,9 @@ private Index classesIndex; private Index propertiesIndex; + private Index datatypePropertiesIndex; + private Index objectPropertiesIndex; + private Templator templateGenerator; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; @@ -111,12 +104,6 @@ private String question; private int learnedPos = -1; - private Oracle oracle; - - private Map<String, IndexResultSet> resourcesURICache; - private Map<String, IndexResultSet> classesURICache; - private Map<String, IndexResultSet> propertiesURICache; - private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; private Collection<Query> sparqlQueryCandidates; @@ -153,6 +140,14 @@ this.cache = cache; setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } } public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ @@ -181,6 +176,14 @@ this.cache = cache; setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } } @Override @@ -202,7 +205,6 @@ maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); @@ -244,15 +246,8 @@ this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; } - public void setRanking(Ranking ranking) { - this.ranking = ranking; - } - private void reset(){ learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, IndexResultSet>(); - classesURICache = new HashMap<String, IndexResultSet>(); - propertiesURICache = new HashMap<String, IndexResultSet>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); } @@ -273,10 +268,6 @@ logger.info(t); } -// //generate SPARQL query candidates, but select only a fixed number per template -// template2Queries = getSPARQLQueryCandidates(templates, ranking); -// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - //get the weighted query candidates generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<Query>(); @@ -315,38 +306,6 @@ return topNQueries; } - public List<String> getSPARQLQueries() throws NoTemplateFoundException{ - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - - //generate SPARQL query candidates - logger.info("Generating SPARQL query candidates..."); - mon.start(); - Map<Template, Collection<? extends Query>> template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - public Set<Template> getTemplates(){ return templates; } @@ -368,228 +327,6 @@ return slot2URI; } - - private Map<Template,Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ - switch(ranking){ - case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); - case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); - case NONE: return getSPARQLQueryCandidates(templates); - default: return null; - } - } - - /* - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - double alpha = 0.8; - double beta = 1 - alpha; - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allAllocations; - for(Template t : templates){ - allAllocations = new HashSet<Allocation>(); - - for(Slot slot : t.getSlots()){ - Set<Allocation> allocations = computeAllocation(slot); - allAllocations.addAll(allocations); - slot2Allocations.put(slot, allocations); - } - - int min = Integer.MAX_VALUE; - int max = Integer.MIN_VALUE; - for(Allocation a : allAllocations){ - if(a.getInDegree() < min){ - min = a.getInDegree(); - } - if(a.getInDegree() > max){ - max = a.getInDegree(); - } - } - for(Allocation a : allAllocations){ - double prominence = a.getInDegree()/(max-min); - a.setProminence(prominence); - - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - - } -// System.out.println(allAllocations); - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - //check if the query is possible - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - - boolean drop = false; - for(SPARQL_Triple triple : reversedQuery.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - } - - if(!drop){ - reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(reversedQuery); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - } - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - } - } - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - return allQueries; - } - */ - private void normProminenceValues(Set<Allocation> allocations){ double min = 0; double max = 0; @@ -621,7 +358,7 @@ private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.info("Generating SPARQL query candidates..."); - Map<Slot, Set<Allocation>> slot2Allocations2 = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @Override public int compare(Slot o1, Slot o2) { @@ -634,8 +371,6 @@ }); - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); Set<Allocation> allocations; @@ -654,7 +389,7 @@ Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); - } + } } for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { @@ -838,106 +573,6 @@ return allQueries; } -/* - * for(SPARQL_Triple triple : t.getQuery().getTriplesWithVar(slot.getAnchor())){System.out.println(triple); - for(SPARQL_Triple typeTriple : t.getQuery().getRDFTypeTriples(triple.getVariable().getName())){ - System.out.println(typeTriple); - for(Allocation a : allocations){ - Set<String> domains = getDomains(a.getUri()); - System.out.println(a); - System.out.println(domains); - for(Slot s : classSlots){ - if(s.getAnchor().equals(triple.getVariable().getName())){ - for(Allocation all : slot2Allocations.get(s)){ - if(!domains.contains(all.getUri())){ - System.out.println("DROP " + a); - } - } - } - } - } - - - } - */ - - private SortedSet<Allocation> computeAllocations(Slot slot){ - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - Index index = getIndexBySlotType(slot); - - IndexResultSet rs; - for(String word : slot.getWords()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 250); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - rs = index.getResourcesWithScores(word, 20); - } - - - //debugging -// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ -// SolrQueryResultItem item = iter.next(); -// if(exclusions.contains(item.getUri())){ -// iter.remove(); -// } -// } - - for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - - } - - normProminenceValues(allocations); - - computeScore(allocations); - return new TreeSet<Allocation>(allocations); - } - - private Set<Allocation> computeAllocations(Slot slot, int limit){ - logger.info("Computing allocations for " + slot); - SortedSet<Allocation> allocations = computeAllocations(slot); - - if(allocations.isEmpty()){ - logger.info("...done."); - return allocations; - } - - ArrayList<Allocation> l = new ArrayList<Allocation>(allocations); - Collections.sort(l, new Comparator<Allocation>() { - - @Override - public int compare(Allocation o1, Allocation o2) { - double dif = o1.getScore() - o2.getScore(); - if(dif < 0){ - return 1; - } else if(dif > 0){ - return -1; - } else { - return o1.getUri().compareTo(o2.getUri()); - } - } - }); - logger.info("...done."); - return new TreeSet<Allocation>(l.subList(0, Math.min(limit, allocations.size()))); - } - private Set<String> getRedirectLabels(String uri){ Set<String> labels = new HashSet<String>(); String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); @@ -956,7 +591,8 @@ String query = null; if(type == SlotType.CLASS){ query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; @@ -979,233 +615,7 @@ } - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Set<Query> queries = new HashSet<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - for(Template template : templates){ - queries = new HashSet<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - Set<Query> tmp = new HashSet<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Query query : queries){ - Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, item.getUri()); - tmp.add(newQuery); - } - } - if(!words.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Map<String, Float> query2Score = new HashMap<String, Float>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - query2Score.put(query.toString(), Float.valueOf(0)); - for(Slot slot : template.getSlots()){ - Map<String, Float> tmp = new HashMap<String, Float>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); - } - } - if(!words.isEmpty()){ - query2Score.clear(); - query2Score.putAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return query2Score; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - ratedQueries = new TreeSet<RatedQuery>(); - ratedQueries.add(new RatedQuery(query, 0)); - template2Queries.put(template, ratedQueries); - for(Slot slot : template.getSlots()){ - Set<RatedQuery> tmp = new HashSet<RatedQuery>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(RatedQuery rQ : ratedQueries){ - RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, item.getUri()); - newRQ.setScore(newRQ.getScore() + item.getScore()); - tmp.add(newRQ); - } - } - if(!words.isEmpty()){ - ratedQueries.clear(); - ratedQueries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - List<Query> queries = new ArrayList<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - List<String> uriCandidates; - for(Template template : templates){ - queries = new ArrayList<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - List<Query> tmp = new ArrayList<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - SPARQL_Prefix prefix = null; - uriCandidates = getCandidateURIsSortedBySimilarity(slot); - for(String uri : uriCandidates){ -// for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ -// if(uri.startsWith(uri2prefix.getKey())){ -// prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); -// uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); -// break; -// } -// } - for(Query query : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query); - reversedQuery.getTriplesWithVar(var).iterator().next().reverse(); -// logger.info("NORMAL QUERY:\n" + query.toString()); -// logger.info("REVERSED QUERY:\n" + reversedQuery.toString()); - if(prefix != null){ - reversedQuery.addPrefix(prefix); - reversedQuery.replaceVarWithPrefixedURI(var, uri); - } else { - reversedQuery.replaceVarWithURI(var, uri); - } - tmp.add(reversedQuery); - } - Query newQuery = new Query(query); - if(prefix != null){ - newQuery.addPrefix(prefix); - newQuery.replaceVarWithPrefixedURI(var, uri); - } else { - newQuery.replaceVarWithURI(var, uri); - } - tmp.add(newQuery); - } - prefix = null; - } - if(!words.isEmpty() && !uriCandidates.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private IndexResultSet getCandidateURIs(Slot slot, int limit){ - logger.info("Generating candidate URIs for " + slot.getWords() + "..."); - mon.start(); - Index index = null; - if(slot.getSlotType() == SlotType.CLASS){ - index = classesIndex; - } else if(slot.getSlotType() == SlotType.PROPERTY){ - index = propertiesIndex; - } else if(slot.getSlotType() == SlotType.RESOURCE){ - index = resourcesIndex; - } - IndexResultSet rs = new IndexResultSet(); - for(String word : slot.getWords()){ - rs.add(index.getResourcesWithScores(word, limit)); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return rs; - } - - private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ - logger.info("Generating URI candidates for " + slot.getWords() + "..."); - mon.start(); - List<String> sortedURIs = new ArrayList<String>(); - //get the appropriate index based on slot type - Index index = getIndexBySlotType(slot); - //get the appropriate cache for URIs to avoid redundant queries to index - Map<String, IndexResultSet> uriCache = getCacheBySlotType(slot); - - SortedSet<IndexResultItem> tmp; - IndexResultSet rs; - - //prune the word list only when slot type is not RESOURCE - List<String> words; - if(slot.getSlotType() == SlotType.RESOURCE){ - words = slot.getWords(); - } else { -// words = pruneList(slot.getWords());//getLemmatizedWords(slot.getWords()); - words = pruneList(slot.getWords()); - } - - for(String word : words){ - tmp = new TreeSet<IndexResultItem>(new IndexResultItemComparator(word)); - rs = uriCache.get(word); - - if(rs == null){ - rs = index.getResourcesWithScores(word, 50); - uriCache.put(word, rs); - } - - tmp.addAll(rs.getItems()); - - for(IndexResultItem item : tmp){ - if(!sortedURIs.contains(item.getUri())){ - sortedURIs.add(item.getUri()); - } - if(sortedURIs.size() == MAX_URIS_PER_SLOT){ - break; - } - - } - tmp.clear(); - - } - - slot2URI.put(slot, sortedURIs); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("URIs: " + sortedURIs); - return sortedURIs; - } - private List<String> pruneList(List<String> words){ List<String> prunedList = new ArrayList<String>(); for(String w1 : words){ @@ -1257,65 +667,16 @@ index = classesIndex; } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ index = propertiesIndex; + } else if(type == SlotType.DATATYPEPROPERTY){ + index = datatypePropertiesIndex; + } else if(type == SlotType.OBJECTPROPERTY){ + index = objectPropertiesIndex; } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ index = resourcesIndex; } return index; } - private Map<String, IndexResultSet> getCacheBySlotType(Slot slot){ - Map<String, IndexResultSet> cache = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - cache = classesURICache; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - cache = propertiesURICache; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - cache = resourcesURICache; - } - return cache; - } - - private IndexResultSet getCandidateURIsWithScore(Slot slot){ - logger.info("Generating candidate URIs for " + slot.getWords() + "..."); - mon.start(); - Index index = null; - Map<String, Float> uri2Score = new HashMap<String, Float>(); - boolean sorted = false; - if(slot.getSlotType() == SlotType.CLASS){ - index = classesIndex; - } else if(slot.getSlotType() == SlotType.PROPERTY){ - index = propertiesIndex; - } else if(slot.getSlotType() == SlotType.RESOURCE){ - index = resourcesIndex; - sorted = true; - } - IndexResultSet resultSet = new IndexResultSet(); - for(String word : slot.getWords()){ - resultSet.add(index.getResourcesWithScores("label:" + word)); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Candidate URIs: " + uri2Score.keySet()); - return resultSet; - } - - private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ - List<Query> queries = new ArrayList<Query>(); - for(Entry<Template, Collection<? extends Query>> entry : template2Queries.entrySet()){ - int max = Math.min(maxTestedQueriesPerTemplate, entry.getValue().size()); - int i = 0; - for(Query q : entry.getValue()){ - queries.add(q); - i++; - if(i == max){ - break; - } - } - } - return queries; - } - private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); SPARQL_QueryType queryType = SPARQL_QueryType.SELECT; @@ -1471,6 +832,48 @@ return result; } + private SortedSet<Allocation> computeAllocations(Slot slot){ + logger.info("Computing allocations for slot: " + slot); + SortedSet<Allocation> allocations = new TreeSet<Allocation>(); + + Index index = getIndexBySlotType(slot); + + IndexResultSet rs; + for(String word : slot.getWords()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs = index.getResourcesWithScores(word, 50); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs = index.getResourcesWithScores(word, 20); + } + + for(IndexResultItem item : rs.getItems()){ + double similarity = Similarity.getSimilarity(word, item.getLabel()); + //get the labels of the redirects and compute the highest similarity + if(slot.getSlotType() == SlotType.RESOURCE){ + Set<String> labels = getRedirectLabels(item.getUri()); + for(String label : labels){ + double tmp = Similarity.getSimilarity(word, label); + if(tmp > similarity){ + similarity = tmp; + } + } + } + double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + normProminenceValues(allocations); + + computeScore(allocations); + logger.info("Found " + allocations.size() + " allocations for slot " + slot); + return new TreeSet<Allocation>(allocations); + } + } /** Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -0,0 +1,41 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class SPARQLDatatypePropertiesIndex extends SPARQLPropertiesIndex{ + + public SPARQLDatatypePropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public SPARQLDatatypePropertiesIndex(Model model) { + super(model); + init(); + } + + public SPARQLDatatypePropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -47,6 +47,14 @@ this.model = model; } + public SPARQLIndex(SPARQLIndex index) { + if(index.getModel() != null){ + this.model = index.getModel(); + } else { + this.endpoint = index.getEndpoint(); + } + } + public SPARQLIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { this.endpoint = endpoint; this.cache = cache; @@ -115,7 +123,7 @@ return irs; } - private ResultSet executeSelect(String query){System.out.println(query); + private ResultSet executeSelect(String query){//System.out.println(query); ResultSet rs; if(model == null){ if(cache == null){ @@ -131,4 +139,12 @@ return rs; } + public SparqlEndpoint getEndpoint() { + return endpoint; + } + + public Model getModel() { + return model; + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -0,0 +1,42 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class SPARQLObjectPropertiesIndex extends SPARQLPropertiesIndex{ + + public SPARQLObjectPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public SPARQLObjectPropertiesIndex(Model model) { + super(model); + init(); + } + + public SPARQLObjectPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + } + + + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -8,33 +8,29 @@ public SPARQLPropertiesIndex(SparqlEndpoint endpoint) { super(endpoint); - - super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + - "?s ?uri ?o.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + - "LIMIT %d OFFSET %d"; - - super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + - "?s ?uri ?o.\n" + -// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + - "LIMIT %d OFFSET %d"; + init(); } public SPARQLPropertiesIndex(Model model) { super(model); - - super.queryTemplate = "SELECT ?uri WHERE {\n" + + init(); + } + + public SPARQLPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s ?uri ?o.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + -// "?s ?uri ?o.\n" + - "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + + "?s ?uri ?o.\n" + +// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |