From: <lor...@us...> - 2011-11-05 07:05:30
|
Revision: 3375 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3375&view=rev Author: lorenz_b Date: 2011-11-05 07:05:23 +0000 (Sat, 05 Nov 2011) Log Message: ----------- Some changes in eval output. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-04 15:22:54 UTC (rev 3374) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-05 07:05:23 UTC (rev 3375) @@ -119,6 +119,8 @@ private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; + private Set<WeightedQuery> generatedQueries; + private Map<String, String> prefixMap; private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); @@ -282,10 +284,10 @@ // sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); //get the weighted query candidates - Set<WeightedQuery> weightedQueries = getWeightedSPARQLQueries(templates); + generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<Query>(); int i = 0; - for(WeightedQuery wQ : weightedQueries){System.out.println(wQ); + for(WeightedQuery wQ : generatedQueries){System.out.println(wQ); sparqlQueryCandidates.add(wQ.getQuery()); if(i == maxTestedQueries){ break; @@ -302,6 +304,22 @@ } + public Set<WeightedQuery> getGeneratedQueries() { + return generatedQueries; + } + + public Set<WeightedQuery> getGeneratedQueries(int topN) { + Set<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + int max = Math.min(topN, generatedQueries.size()); + for(WeightedQuery wQ : generatedQueries){ + topNQueries.add(wQ); + if(topNQueries.size() == max){ + break; + } + } + return topNQueries; + } + public List<String> getSPARQLQueries() throws NoTemplateFoundException{ logger.info("Generating SPARQL query templates..."); mon.start(); @@ -1253,7 +1271,10 @@ logger.info("Testing query:\n" + query); boolean result = executeAskQuery(query); learnedSPARQLQueries.put(query, result); - if(stopIfQueryResultNotEmpty && result){ +// if(stopIfQueryResultNotEmpty && result){ +// return; +// } + if(stopIfQueryResultNotEmpty){ return; } logger.info("Result: " + result); @@ -1400,7 +1421,8 @@ // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; // String question = "Who/WP produced/VBD the/DT most/JJS films/NNS"; // String question = "Which/WDT country/NN does/VBZ the/DT Airedale/NNP Terrier/NNP come/VBP from/IN"; - String question = "When/WRB was/VBD Capcom/NNP founded/VBD"; +// String question = "When/WRB was/VBD Capcom/NNP founded/VBD"; + String question = "Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP"; // String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; // String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; // String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-04 15:22:54 UTC (rev 3374) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-05 07:05:23 UTC (rev 3375) @@ -37,8 +37,10 @@ import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; import org.dllearner.algorithm.tbsl.sparql.Query; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; import org.dllearner.algorithm.tbsl.util.LatexWriter; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -49,8 +51,10 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.vocabulary.FOAF; @@ -369,32 +373,34 @@ latex.endEnumeration(); //get the generated SPARQL query candidates - Map<Template, Collection<? extends Query>> template2Queries = stbl.getTemplates2SPARQLQueries(); + Set<WeightedQuery> generatedQueries = stbl.getGeneratedQueries(15); //write generated queries subsection - latex.beginSubsection("Top " + topN2Print + " generated queries per template"); - int k = 1; - List<Query> queries; - for(Template t : templates){ - latex.beginSubSubsection("Template " + k); - queries = new ArrayList<Query>(template2Queries.get(t)); - if(!queries.isEmpty()){ - latex.beginEnumeration(); - } - //print top n queries to latex file - int max = Math.min(topN2Print, queries.size()); - for(int j = 0; j < max; j++){ + latex.beginSubsection("Top " + 15 + " generated queries"); + if(!generatedQueries.isEmpty()){ + latex.beginEnumeration(); + for(WeightedQuery wQ : generatedQueries){ latex.beginEnumerationItem(); - latex.addListing(queries.get(j).toString()); + com.hp.hpl.jena.query.Query q = QueryFactory.create(wQ.getQuery().toString()); + if(q.toString().contains("http://dbpedia.org/property/")){ + q.setPrefix("dbp", "http://dbpedia.org/property/"); + } + if(q.toString().contains("http://dbpedia.org/ontology/")){ + q.setPrefix("dbo", "http://dbpedia.org/ontology/"); + } + if(q.toString().contains("http://dbpedia.org/resource/")){ + q.setPrefix("dbr", "http://dbpedia.org/resource/"); + } + String queryString = q.toString(); + String requestURL = new QueryEngineHTTP(endpoint.getURL().toString(), queryString).toString(); +// System.out.println(requestURL); + queryString = queryString + "\n" + "Score(" + wQ.getScore() + ")"; + latex.addListing(queryString); latex.endEnumerationItem(); } - if(!queries.isEmpty()){ - latex.endEnumeration(); - } - k++; + latex.endEnumeration(); } - //get the URIs for each template slot latex.beginSubsection("Covered entities"); Map<Slot, List<String>> slot2URIsMap = stbl.getSlot2URIs(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-05 07:57:23
|
Revision: 3376 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3376&view=rev Author: lorenz_b Date: 2011-11-05 07:57:16 +0000 (Sat, 05 Nov 2011) Log Message: ----------- Added GROUP BY to query syntax, to be SPARQL 1.1 conform. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-11-05 07:05:23 UTC (rev 3375) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-11-05 07:57:16 UTC (rev 3376) @@ -143,6 +143,8 @@ public String toString() { + String groupBy = null; + String retVal = ""; for (SPARQL_Prefix prefix : prefixes) { @@ -153,9 +155,14 @@ { retVal += "\nSELECT "; + String lastSelectTerm = null; for (SPARQL_Term term : selTerms) { retVal += term.toString() + " "; + if(selTerms.size() > 1 && term.toString().contains("COUNT")){ + groupBy = lastSelectTerm; + } + lastSelectTerm = term.toString(); } } else retVal += "\nASK "; @@ -175,6 +182,10 @@ } retVal += "}\n"; + + if(groupBy != null){ + retVal += "GROUP BY (" + groupBy + ")\n"; + } if (orderBy != null && !orderBy.isEmpty()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-11-05 07:05:23 UTC (rev 3375) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2011-11-05 07:57:16 UTC (rev 3376) @@ -87,7 +87,7 @@ // System.err.println("SPARQL_Term: name="+name+",alias="+alias+",agg="+aggregate+",orderBy="+orderBy); // DEBUG if (aggregate != SPARQL_Aggregate.NONE) { if (alias != null && !alias.equals(name)) - return aggregate+"(?"+name.toLowerCase()+") AS ?" + alias; + return "(" + aggregate+"(?"+name.toLowerCase()+") AS ?" + alias + ")"; else return aggregate+"(?"+name.toLowerCase()+")"; } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-05 07:05:23 UTC (rev 3375) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-05 07:57:16 UTC (rev 3376) @@ -381,7 +381,7 @@ latex.beginEnumeration(); for(WeightedQuery wQ : generatedQueries){ latex.beginEnumerationItem(); - com.hp.hpl.jena.query.Query q = QueryFactory.create(wQ.getQuery().toString()); + com.hp.hpl.jena.query.Query q = QueryFactory.create(wQ.getQuery().toString(), Syntax.syntaxARQ); if(q.toString().contains("http://dbpedia.org/property/")){ q.setPrefix("dbp", "http://dbpedia.org/property/"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2011-11-07 16:07:41
|
Revision: 3385 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3385&view=rev Author: lorenz_b Date: 2011-11-07 16:07:31 +0000 (Mon, 07 Nov 2011) Log Message: ----------- Some changes to get position of learned query if exists. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-07 10:59:39 UTC (rev 3384) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-07 16:07:31 UTC (rev 3385) @@ -105,6 +105,7 @@ private Templator templateGenerator; private String question; + private int learnedPos = -1; private Oracle oracle; @@ -1305,7 +1306,7 @@ if(!results.isEmpty()){ try{ int cnt = Integer.parseInt(results.get(0)); - if(cnt > 0){ + if(cnt > 0){learnedPos = queries.indexOf(query); learnedSPARQLQueries.put(query, results); if(stopIfQueryResultNotEmpty){ return; @@ -1313,6 +1314,7 @@ } } catch (NumberFormatException e){ learnedSPARQLQueries.put(query, results); + learnedPos = queries.indexOf(query); if(stopIfQueryResultNotEmpty){ return; } @@ -1329,12 +1331,13 @@ // if(stopIfQueryResultNotEmpty && result){ // return; // } - if(stopIfQueryResultNotEmpty){ + if(stopIfQueryResultNotEmpty){learnedPos = queries.indexOf(query); return; } logger.info("Result: " + result); } } + mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); } @@ -1398,6 +1401,10 @@ return resources; } + public int getLearnedPosition() { + return learnedPos+1; + } + private List<String> getResultFromLocalModel(String query, Model model){ List<String> resources = new ArrayList<String>(); QueryExecution qe = QueryExecutionFactory.create(query, model); @@ -1411,29 +1418,36 @@ } private Set<String> getDomains(String property){ + String tmp = property; + if(property.startsWith("http://dbpedia.org/property/")){ + tmp = "http://dbpedia.org/ontology" + property.substring(property.lastIndexOf("/")); + } Set<String> domains = new HashSet<String>(); - String query = String.format("SELECT ?domain WHERE {<%s> <%s> ?domain}", property, RDFS.domain.getURI()); + String query = String.format("SELECT ?domain WHERE {<%s> <%s> ?domain}", tmp, RDFS.domain.getURI()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); domains.add(qs.getResource("domain").getURI()); } - return domains; } private Set<String> getRanges(String property){ - Set<String> domains = new HashSet<String>(); - String query = String.format("SELECT ?range WHERE {<%s> <%s> ?range}", property, RDFS.range.getURI()); + String tmp = property; + if(property.startsWith("http://dbpedia.org/property/")){ + tmp = "http://dbpedia.org/ontology" + property.substring(property.lastIndexOf("/")); + } + Set<String> ranges = new HashSet<String>(); + String query = String.format("SELECT ?range WHERE {<%s> <%s> ?range}", tmp, RDFS.range.getURI()); ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); QuerySolution qs; while(rs.hasNext()){ qs = rs.next(); - domains.add(qs.getResource("range").getURI()); + ranges.add(qs.getResource("range").getURI()); } - return domains; + return ranges; } private boolean isObjectProperty(String property){ @@ -1479,10 +1493,10 @@ // String question = "Which/WDT organizations/NNS were/VBD founded/VBN in/IN 1950/CD"; // String question = "Is/VBZ there/RB a/DT video/NN game/NN called/VBN Battle/NNP Chess/NNP"; // String question = "Which/WDT software/NN has/VBZ been/VBN developed/VBN by/IN organizations/NNS founded/VBN in/IN California/NNP"; -// String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; + String question = "How/WRB many/JJ films/NNS did/VBD Leonardo/NNP DiCaprio/NNP star/VB in/IN"; // String question = "Which/WDT music/NN albums/NNS contain/VBP the/DT song/NN Last/NNP Christmas/NNP"; // String question = "Which/WDT companies/NNS are/VBP located/VBN in/IN California/NNP USA/NNP"; - String question = "Who/WP wrote/VBD the/DT book/NN The/NNP pillars/NNP of/NNP the/NNP Earth/NNP"; +// String question = "Who/WP wrote/VBD the/DT book/NN The/NNP pillars/NNP of/NNP the/NNP Earth/NNP"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner();learner.setUseIdealTagger(true); // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), // Collections.<String>singletonList(""), Collections.<String>emptyList()); @@ -1493,6 +1507,7 @@ learner.learnSPARQLQueries(); System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + System.out.println(learner.getLearnedPosition()); } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-07 10:59:39 UTC (rev 3384) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-11-07 16:07:31 UTC (rev 3385) @@ -321,6 +321,7 @@ String errorCode = ""; LatexWriter latex = new LatexWriter(); int i = 0; + int cnt = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){//if(entry.getKey()==50)continue; if((testID != -1 && entry.getKey() != testID) || (yagoExclusions.contains(entry.getKey())))continue; try { @@ -361,6 +362,11 @@ //get the used templates List<Template> templates = new ArrayList<Template>(stbl.getTemplates()); + if(stbl.getLearnedPosition() == -1 || stbl.getLearnedPosition() > 10){ + cnt++; + } + i++; + //start output //write templates subsection latex.beginSubsection("Templates (" + templates.size() + ")"); @@ -392,8 +398,6 @@ q.setPrefix("dbr", "http://dbpedia.org/resource/"); } String queryString = q.toString(); - String requestURL = new QueryEngineHTTP(endpoint.getURL().toString(), queryString).toString(); -// System.out.println(requestURL); queryString = queryString + "\n" + "Score(" + wQ.getScore() + ")"; latex.addListing(queryString); latex.endEnumerationItem(); @@ -483,7 +487,7 @@ latex.addSummaryTableEntry(questionId, extractSentence(question), precision, recall, errorCode); } } - + System.out.println(cnt + "/" + i); latex.write("log/evaluation_" + System.currentTimeMillis()+ ".tex", Calendar.getInstance().getTime().toString(), correctAnswers); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-11-16 15:02:47
|
Revision: 3408 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3408&view=rev Author: christinaunger Date: 2011-11-16 15:02:41 +0000 (Wed, 16 Nov 2011) Log Message: ----------- changed TemplateSerializationTest to Hashtable<Template,String>-serialization Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateSerializationTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-16 12:55:46 UTC (rev 3407) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2011-11-16 15:02:41 UTC (rev 3408) @@ -10,6 +10,7 @@ import net.didion.jwnl.data.POS; import org.apache.log4j.Logger; + import org.dllearner.algorithm.tbsl.converter.DRS2SPARQL_Converter; import org.dllearner.algorithm.tbsl.converter.DUDE2UDRS_Converter; import org.dllearner.algorithm.tbsl.ltag.parser.LTAGLexicon; Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-11-16 12:55:46 UTC (rev 3407) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2011-11-16 15:02:41 UTC (rev 3408) @@ -121,7 +121,8 @@ when || (S WH:'when' S[s]) || <x, l1, t, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/date ]> when || (DP WH:'when') || <y, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x) ] ], [], [], [ SLOT_p/PROPERTY/date ]> where || (S WH:'where' S[s]) || <x, l1, t, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/place ]> - where || (DP WH:'where') || <y, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x) ] ], [], [], [ SLOT_p/PROPERTY/date ]> + where || (DP WH:'where') || <y, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x) ] ], [], [], [ SLOT_p/PROPERTY/place ]> + where in || (DP WH:'where' (PP P:'in' DP[dp])) || <y, l1, <<e,t>,t>, [ l1:[ ?x | SLOT_p(y,x), SLOT_in(x,z) ] ], [(l2,z,dp,<<e,t>,t>)], [l2=l1], [ SLOT_p/PROPERTY/place ]> // NEGATION Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateSerializationTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateSerializationTest.java 2011-11-16 12:55:46 UTC (rev 3407) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TemplateSerializationTest.java 2011-11-16 15:02:41 UTC (rev 3408) @@ -5,6 +5,7 @@ import java.io.FileOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.Hashtable; import java.util.Set; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -16,24 +17,34 @@ * @param args */ public static void main(String[] args) throws Exception{ + Templator templateGenerator = new Templator(); templateGenerator.setUNTAGGED_INPUT(true); + Hashtable<Template,String> testcorpus = new Hashtable<Template,String>(); + //generate templates - Set<Template> templates = templateGenerator.buildTemplates("Give me all soccer clubs in Premier League."); + String q = "Give me all soccer clubs in Premier League."; + Set<Template> templates = templateGenerator.buildTemplates(q); for(Template t : templates){ System.out.println(t); + testcorpus.put(t,q); } //serialize ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File("templates.out"))); - oos.writeObject(templates); +// oos.writeObject(templates); + oos.writeObject(testcorpus); //deserialize ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File("templates.out"))); - templates = (Set<Template>) ois.readObject(); +// templates = (Set<Template>) ois.readObject(); + testcorpus = (Hashtable<Template,String>) ois.readObject(); - for(Template t : templates){ +// for(Template t : templates){ +// System.out.println(t); +// } + for (Template t : testcorpus.keySet()) { System.out.println(t); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-12-19 14:20:40
|
Revision: 3508 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3508&view=rev Author: christinaunger Date: 2011-12-19 14:20:34 +0000 (Mon, 19 Dec 2011) Log Message: ----------- [tbsl] parts of the user input enclosed in double quotes is treated as named entity (jetzt aber wirklich!) Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -226,9 +226,21 @@ String flat = s; Matcher m; - Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); - Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); - Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + Pattern quotePattern1 = Pattern.compile("``/``(\\s)?(\\w+(/\\w+\\s)).*''/''"); + Pattern quotePattern2 = Pattern.compile("(``/``((.*)_)''/'')"); + Pattern nnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?\\s(\\w+))/NNP[S]?(\\W|$)"); + Pattern nnPattern = Pattern.compile("\\s?((\\w+)/NN[S]?\\s(\\w+))/NN[S]?(\\W|$)"); + Pattern nnnnpPattern = Pattern.compile("\\s?((\\w+)/NNP[S]?)\\s(\\w+)/NN[S]?(\\W|$)"); + + m = quotePattern1.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(3),"_"); + m = quotePattern1.matcher(flat); + } + m = quotePattern2.matcher(flat); + while (m.find()) { + flat = flat.replaceFirst(m.group(2),m.group(3)+"/NNP"); + } m = nnpPattern.matcher(flat); while (m.find()) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -14,6 +14,7 @@ import org.dllearner.algorithm.tbsl.ltag.parser.Preprocessor; import org.dllearner.algorithm.tbsl.nlp.ApachePartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.sem.drs.DRS; import org.dllearner.algorithm.tbsl.sem.drs.UDRS; import org.dllearner.algorithm.tbsl.sem.dudes.data.Dude; @@ -31,6 +32,9 @@ Parser p; Preprocessor pp; + DUDE2UDRS_Converter d2u; + DRS2BasicSPARQL_Converter d2s; + boolean ONE_SCOPE_ONLY = true; public boolean UNTAGGED_INPUT = true; @@ -42,8 +46,8 @@ g = LTAG_Constructor.construct(grammarFiles); -// tagger = new StanfordPartOfSpeechTagger(); - tagger = new ApachePartOfSpeechTagger(); + tagger = new StanfordPartOfSpeechTagger(); +// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; @@ -52,6 +56,9 @@ p.MODE = "BASIC"; pp = new Preprocessor(false); + + d2u = new DUDE2UDRS_Converter(); + d2s = new DRS2BasicSPARQL_Converter(); } public void setUNTAGGED_INPUT(boolean b) { @@ -59,9 +66,7 @@ } public Set<BasicQueryTemplate> buildBasicQueries(String s) { - - DUDE2UDRS_Converter d2u = new DUDE2UDRS_Converter(); - DRS2BasicSPARQL_Converter d2s = new DRS2BasicSPARQL_Converter(); + boolean clearAgain = true; String tagged; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 13:18:28 UTC (rev 3507) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 14:20:34 UTC (rev 3508) @@ -12,7 +12,7 @@ public class TestFrontend { static String[] GRAMMAR_FILES = {"src/main/resources/lexicon/english.lex"}; - static boolean BASIC_MODE = false; // true for BASIC mode, false for LEIPZIG mode + static boolean BASIC_MODE = true; // true for BASIC mode, false for LEIPZIG mode public static void main(String[] args) { @@ -32,7 +32,13 @@ if (BASIC_MODE) { for (BasicQueryTemplate temp : handler.buildBasicTemplates(s)) { - System.out.println(temp.toString()); + try { + System.out.println(temp.toString()); + } + catch (NullPointerException e) { + System.err.println("NullPointer in BasicQueryTemplate!"); + continue; + } } } else { for (Template temp : handler.buildTemplates(s)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2011-12-19 16:07:22
|
Revision: 3510 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3510&view=rev Author: christinaunger Date: 2011-12-19 16:07:11 +0000 (Mon, 19 Dec 2011) Log Message: ----------- [tbsl] corrections in BasicTemplate building Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-12-19 14:55:37 UTC (rev 3509) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2011-12-19 16:07:11 UTC (rev 3510) @@ -19,9 +19,11 @@ import org.dllearner.algorithm.tbsl.sparql.SPARQL_OrderBy; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; @@ -120,6 +122,14 @@ for (DRS_Condition cond : scope.getConditions()) { temp = convertCondition(cond,temp); } + // preserve marked referents from restrictor and scope + Set<DiscourseReferent> tokeep = restrictor.collectDRs(); + tokeep.addAll(scope.collectDRs()); + for (DiscourseReferent dr : tokeep) { + if (dr.isMarked()) { + temp.addSelTerm(new SPARQL_Term(dr.getValue())); + } + } // add the quantifier at last DiscourseReferent ref = complex.getReferent(); String sref = ref.getValue(); @@ -165,7 +175,7 @@ } else { Simple_DRS_Condition simple = (Simple_DRS_Condition) condition; - + String predicate = simple.getPredicate(); if (predicate.startsWith("SLOT")) { for (Slot s : slots) { @@ -287,6 +297,28 @@ SPARQL_PairType.EQ))); return temp; } + else if (predicate.equals("DATE")) { + temp.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term("'^"+simple.getArguments().get(1).getValue()+"'",true), + SPARQL_PairType.REGEX))); + } + else if (predicate.equals("regex")) { + temp.addFilter(new SPARQL_Filter( + new SPARQL_Pair( + new SPARQL_Term(simple.getArguments().get(0).getValue(),false), + new SPARQL_Term(simple.getArguments().get(1).getValue(),true), + SPARQL_PairType.REGEX))); + } + else if (predicate.equals("ISA")) { + temp.addConditions(new Path(simple.getArguments().get(0).getValue(),"isA",simple.getArguments().get(1).getValue())); + } +// else { +// if (simple.getArguments().size() == 1) { +// temp.addConditions(new Path(simple.getArguments().get(0).getValue(),"rdf:type",simple.getPredicate())); +// } +// } } return temp; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 14:55:37 UTC (rev 3509) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-12-19 16:07:11 UTC (rev 3510) @@ -63,6 +63,7 @@ * x/RBR adj/JJ > adj/JJR, x/RBS adj/JJ > adj/JJS, x/WRB adj/JJ > x/JJH * nn/RBR of/IN > nn/NPREP * usw. + * Note: Matching these patterns is order-dependent! * */ String condensedstring = taggedstring.replaceAll("``/``","").replaceAll("''/''","").replaceAll(" "," "); Matcher m; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java 2011-12-19 14:55:37 UTC (rev 3509) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Path.java 2011-12-19 16:07:11 UTC (rev 3510) @@ -28,11 +28,14 @@ } public String toString() { + String v; + if (via.equals("isA")) v = via; else v = "?"+via; + if (via.isEmpty()) { return "?" + start + " -- " + "?" + target; } else { - return "?" + start + " -- ?" + via + " -- ?" + target; + return "?" + start + " -- " + v + " -- ?" + target; } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-12-19 14:55:37 UTC (rev 3509) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2011-12-19 16:07:11 UTC (rev 3510) @@ -59,19 +59,19 @@ /* DP */ String[] dpEntry1 = {token, "(DP (NP " + treetoken + "))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; String[] dpEntry2 = {token, "(DP (NP " + treetoken + " DP[name]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x,p | SLOT_" + tokenfluent + "(p), ISA(x,p), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); /* NP */ String[] npEntry1 = {token, "(NP " + treetoken + ")", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; String[] npEntry2 = {token, "(NP " + treetoken + " DP[name])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p), equal(x,y) ] ],[ (l2,y,name,<<e,t>,t>) ],[l2=l1],[" + slot + "]>"}; result.add(npEntry1); result.add(npEntry2); } @@ -89,13 +89,13 @@ else if (pos.equals("NPREP")) { String[] dpEntry1 = {token, "(DP (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ x | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; String[] dpEntry2 = {token, "(DP DET[det] (NP " + treetoken + " DP[pobj]))", - "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>),(l3,x,det,e)],[l2=l1,l3=l1],[" + slot + "]>"}; String[] npEntry = {token, "(NP " + treetoken + " DP[pobj])", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(p), p(x,y) ] ],[(l2,y,pobj,<<e,t>,t>)],[l2=l1],[" + slot + "]>"}; result.add(dpEntry1); result.add(dpEntry2); result.add(npEntry); @@ -116,10 +116,10 @@ result.add(npEntry); } else if(pos.equals("JJNN") && token.contains("_")) { - slot = "SLOT_" + tokenfluent + "/USNPEC/" + token; + slot = "SLOT_" + tokenfluent + "/UNSPEC/" + token; String[] npEntry = {token, "(NP " + treetoken + " )", - "<x,l1,<e,t>,[ l1:[ | SLOT_" + tokenfluent + "(x) ] ],[],[],[" + slot + "]>"}; + "<x,l1,<e,t>,[ l1:[ p | SLOT_" + tokenfluent + "(p), ISA(x,p) ] ],[],[],[" + slot + "]>"}; result.add(npEntry); } @@ -160,10 +160,14 @@ result.add(passEntry); } else if (pos.equals("VPASSIN")) { - String[] passEntry = {token, + String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; - result.add(passEntry); + String[] passEntry2 = {token, + "(S DP[dp] (VP V:'" + token + "' NUM[num]))", + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(y,z) ] ],[(l2,x,dp,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; + result.add(passEntry1); + result.add(passEntry2); } else if (pos.equals("GERUNDIN")) { String[] gerundinEntry1 = {token, @@ -178,15 +182,19 @@ result.add(gerundinEntry2); } else if (pos.equals("VPREP")) { - String[] passEntry = {token, + String[] passEntry1 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; + String[] passEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' NUM[num]))", + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y), DATE(y,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,num,e)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>"}; String[] whEntry = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(p), p(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; - result.add(passEntry); + result.add(passEntry1); + result.add(passEntry2); result.add(whEntry); } else if (pos.equals("VBD") || pos.equals("VBZ") || pos.equals("VBP")) { @@ -239,13 +247,13 @@ if (pos.equals("JJ")) { String[] adjEntry = {token, "(NP ADJ:'" + token.toLowerCase() + "' NP*)", - "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(j) ] ],[],[],["+slot+"]>"}; + "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(p), p(x,j) ] ],[],[],["+slot+"]>"}; result.add(adjEntry); } if (pos.equals("JJH")) { String[] howEntry = {"how "+token, "(DP ADJ:'" + token.toLowerCase() + "')", - "<x,l1,<<e,t>,t>,[ l1:[ ?j,x | SLOT_" + token + "(j) ] ],[],[],["+slot+"]>"}; + "<x,l1,<<e,t>,t>,[ l1:[ ?j,x,p | SLOT_" + token + "(p), p(x,j) ] ],[],[],["+slot+"]>"}; result.add(howEntry); } /* COMPARATIVE */ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 14:55:37 UTC (rev 3509) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2011-12-19 16:07:11 UTC (rev 3510) @@ -29,6 +29,21 @@ if (s.equals(":q")) { System.exit(0); } + if (s.equals(":mode")) { + if (BASIC_MODE) System.out.println("Mode: BASIC"); + else System.out.println("Current mode is: LEIPZIG"); + continue; + } + if (s.equals(":mode BASIC")) { + BASIC_MODE = true; + System.out.println("Current mode is switched to BASIC."); + continue; + } + else if (s.equals(":mode LEIPZIG")) { + BASIC_MODE = false; + System.out.println("Current mode is switched to LEIPZIG."); + continue; + } if (BASIC_MODE) { for (BasicQueryTemplate temp : handler.buildBasicTemplates(s)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-05-07 13:15:30
|
Revision: 3694 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3694&view=rev Author: christinaunger Date: 2012-05-07 11:56:11 +0000 (Mon, 07 May 2012) Log Message: ----------- [tbsl] repaired some stuff causing parse failures Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2BasicSPARQL_Converter.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -1,9 +1,6 @@ package org.dllearner.algorithm.tbsl.converter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import org.dllearner.algorithm.tbsl.sem.drs.Complex_DRS_Condition; import org.dllearner.algorithm.tbsl.sem.drs.DRS; @@ -241,6 +238,35 @@ p.setTarget(simple.getArguments().get(1).getValue()); temp.addConditions(p); } + else if (simple.getArguments().size() == 3) { + Path p = new Path(); + p.setStart(simple.getArguments().get(0).getValue()); + p.setVia(predicate); + String newword = null; + Slot del = null; + for (Slot s : slots) { + if (s.getAnchor().equals(simple.getArguments().get(1).getValue())) { + newword = s.getWords().get(0); + del = s; + break; + } + } + if (newword != null) { + for (Slot s : slots) { + if (s.getAnchor().equals(predicate)) { + boolean date = false; + if (s.getWords().get(0).endsWith(" date")) date = true; + newword = s.getWords().get(0).replace(" date","") + " " + newword; + if (date) newword += " date"; + s.setWords(Arrays.asList(newword)); + break; + } + } + if (del != null) slots.remove(del); + } + p.setTarget(simple.getArguments().get(2).getValue()); + temp.addConditions(p); + } } else if (predicate.equals("count")) { if (simple.getArguments().size() == 1) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -151,11 +151,11 @@ */ String[] tokenParts = token.split(" "); if (tokenParts.length > 2) { - + for (String anchor : grammar.getWildCardAnchors()) { if (token.matches(anchor)) { - + foundCandidates = true; coveredTokens.add(token); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -195,8 +195,13 @@ } m = whenPattern.matcher(condensedstring); while (m.find()) { + if (m.group(5).equals("VPREP")) { + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHENPREP"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHENPREP"); + } else { if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+m.group(3)+"/WHEN"); condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2) + m.group(3)+"/WHEN"); + } } m = wherePattern.matcher(condensedstring); while (m.find()) { @@ -210,13 +215,13 @@ } m = adjnounPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNN"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNN"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NN"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NN"); } m = adjnprepPattern.matcher(condensedstring); while (m.find()) { - if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/JJNPREP"); - condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/JJNPREP"); + if (VERBOSE) logger.trace("Replacing " + m.group(1) + " by " + m.group(2)+"_"+m.group(3)+"/NPREP"); + condensedstring = condensedstring.replaceFirst(m.group(1),m.group(2)+"_"+m.group(3)+"/NPREP"); } return condensedstring; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-05-07 11:56:11 UTC (rev 3694) @@ -52,44 +52,44 @@ { DUDE() <EOF> } - -/** DUDE */ -Dude DUDE() : -{ - Token referent; - Token label; - Type type; - List<DRS> drs_list = null; - List<DominanceConstraint> constraints = null; + +/** DUDE */ +Dude DUDE() : +{ + Token referent; + Token label; + Type type; + List<DRS> drs_list = null; + List<DominanceConstraint> constraints = null; List<Argument> arg_list = null; - List<Slot> slots = null; -} -{ + List<Slot> slots = null; +} +{ "<" referent = dr() "," label=<LABEL> "," type=Type() "," "[" (drs_list=DRS_List())? "]" "," "[" (arg_list = Arg_List())? "]" - "," "[" (constraints = DC_List())? "]" "," "[" (slots = Slot_List())? "]" ">" - { - Dude dude = new Dude(); - dude.setReferent(referent.toString()); - dude.setType(type); - if (drs_list != null) dude.setComponents(drs_list); - dude.setLabel(new Label(label.toString())); - if (arg_list != null) dude.setArguments(arg_list); + "," "[" (constraints = DC_List())? "]" "," "[" (slots = Slot_List())? "]" ">" + { + Dude dude = new Dude(); + dude.setReferent(referent.toString()); + dude.setType(type); + if (drs_list != null) dude.setComponents(drs_list); + dude.setLabel(new Label(label.toString())); + if (arg_list != null) dude.setArguments(arg_list); if (constraints != null) dude.setDominanceConstraints(constraints); - if (slots != null) dude.setSlots(slots); - return dude; - } -} - -DominanceConstraint DominanceConstraint() : -{ - Label label1; + if (slots != null) dude.setSlots(slots); + return dude; + } +} + +DominanceConstraint DominanceConstraint() : +{ + Label label1; Label label2; Token domType; - DominanceConstraint dc = null; -} + DominanceConstraint dc = null; +} { label1 = Label() "<" label2 = Label() - { + { dc = new DominanceConstraint(label1,label2); return dc; } @@ -105,21 +105,21 @@ | label1 = Label() "=" label2 = Label() - { + { dc = new DominanceConstraint(label1,label2); dc.setType(DomType.equal); return dc; - } -} - - -Label Label() : -{ - Token label; -} + } +} + + +Label Label() : { - - label = <LABEL> + Token label; +} +{ + + label = <LABEL> { return new Label(label.toString()); } @@ -127,219 +127,219 @@ | "res(" label = <LABEL> ")" - { + { return new Label(label.toString(),Position.res); } | "scope(" label = <LABEL> ")" - { + { return new Label(label.toString(),Position.scope); } -} - - -List<DominanceConstraint> DC_List() : -{ -List<DominanceConstraint> dc_list = null; -DominanceConstraint dc = null; -} -{ - dc = DominanceConstraint() ("," dc_list = DC_List())? - { - if (dc_list == null) - { - dc_list = new ArrayList<DominanceConstraint>(); - } - - dc_list.add(0,dc); - return dc_list; - } - - } - -List<DRS> DRS_List() : -{ - DRS drs; - List<DRS> drs_list = null; -} -{ - drs = DRS() ("," drs_list = DRS_List())? - { - if (drs_list == null) - { - drs_list = new ArrayList<DRS>(); - } - - drs_list.add(0,drs); - return drs_list; - } -} - -List<Argument> Arg_List() : -{ - Argument argument; - List<Argument> arg_list = null; -} -{ - argument = Argument() ("," arg_list = Arg_List())? - { - if (arg_list == null) - { - arg_list = new ArrayList<Argument>(); - } - - arg_list.add(0,argument); - return arg_list; - } - -} - -Argument Argument() : -{ - Token label; - Token word; - Token referent; - Type type; -} -{ - "(" label=<LABEL> "," referent = dr() "," word=<WORD> "," type = Type() ")" - { - Argument argument = new Argument(); - argument.setLabel(new Label(label.toString())); - argument.setReferent(referent.toString()); - argument.setAnchor(word.toString()); - argument.setType(type); - return argument; - } -} - -Type Type() : -{ - Type argument; - Type result; - Token word; -} -{ - "<" argument = Type() "," result = Type() ">" - { - CompositeType type = new CompositeType(); - type.setArgumentType(argument); - type.setResultType(result); - return type; - } - - | - - word = <WORD> - { - ElementaryType type=null; - if (word.toString().equals("e")) - type = new ElementaryType(ElemType.e); - if (word.toString().equals("t")) - type = new ElementaryType(ElemType.t); - return type; - } -} - +List<DominanceConstraint> DC_List() : +{ +List<DominanceConstraint> dc_list = null; +DominanceConstraint dc = null; +} +{ + dc = DominanceConstraint() ("," dc_list = DC_List())? + { + if (dc_list == null) + { + dc_list = new ArrayList<DominanceConstraint>(); + } + + dc_list.add(0,dc); + return dc_list; + } + + +} + + +List<DRS> DRS_List() : +{ + DRS drs; + List<DRS> drs_list = null; +} +{ + drs = DRS() ("," drs_list = DRS_List())? + { + if (drs_list == null) + { + drs_list = new ArrayList<DRS>(); + } + + drs_list.add(0,drs); + return drs_list; + } +} + +List<Argument> Arg_List() : +{ + Argument argument; + List<Argument> arg_list = null; +} +{ + argument = Argument() ("," arg_list = Arg_List())? + { + if (arg_list == null) + { + arg_list = new ArrayList<Argument>(); + } + + arg_list.add(0,argument); + return arg_list; + } + +} + +Argument Argument() : +{ + Token label; + Token word; + Token referent; + Type type; +} +{ + "(" label=<LABEL> "," referent = dr() "," word=<WORD> "," type = Type() ")" + { + Argument argument = new Argument(); + argument.setLabel(new Label(label.toString())); + argument.setReferent(referent.toString()); + argument.setAnchor(word.toString()); + argument.setType(type); + return argument; + } +} + +Type Type() : +{ + Type argument; + Type result; + Token word; +} +{ + "<" argument = Type() "," result = Type() ">" + { + CompositeType type = new CompositeType(); + type.setArgumentType(argument); + type.setResultType(result); + return type; + } + + | + + word = <WORD> + { + ElementaryType type=null; + if (word.toString().equals("e")) + type = new ElementaryType(ElemType.e); + if (word.toString().equals("t")) + type = new ElementaryType(ElemType.t); + return type; + } +} + + /** DRS */ DRS DRS() : -{ - Set<DiscourseReferent> dr_set = null; - Set<DRS_Condition> conditions = null; - DRS drs; - Token label; - +{ + Set<DiscourseReferent> dr_set = null; + Set<DRS_Condition> conditions = null; + DRS drs; + Token label; + } -{ - label=<LABEL> ":[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" - { - if (dr_set == null) - { - dr_set = new HashSet<DiscourseReferent>(); - } - drs = new DRS(); - drs.setLabel(label.toString()); - drs.setDiscourseReferents(dr_set); - if (conditions != null) - drs.setDRSConditions(conditions); - return drs; +{ + label=<LABEL> ":[" (dr_set=DR_Set())? "|" (conditions=Condition_List())? "]" + { + if (dr_set == null) + { + dr_set = new HashSet<DiscourseReferent>(); + } + drs = new DRS(); + drs.setLabel(label.toString()); + drs.setDiscourseReferents(dr_set); + if (conditions != null) + drs.setDRSConditions(conditions); + return drs; } -} - +} + /** DR_Set*/ Set<DiscourseReferent> DR_Set() : -{ - Token dr; - Set<DiscourseReferent> dr_set=null; +{ + Token dr; + Set<DiscourseReferent> dr_set=null; } -{ - dr = dr() ("," dr_set=DR_Set())? - { - if (dr_set == null) - { - dr_set= new HashSet<DiscourseReferent>(); - } - if (dr.toString().startsWith("?")) - { +{ + dr = dr() ("," dr_set=DR_Set())? + { + if (dr_set == null) + { + dr_set= new HashSet<DiscourseReferent>(); + } + if (dr.toString().startsWith("?")) + { dr_set.add(new DiscourseReferent(dr.toString().substring(1),true,false)); } else if (dr.toString().startsWith("!")) { dr_set.add(new DiscourseReferent(dr.toString().substring(1),false,true)); } - else - { - dr_set.add(new DiscourseReferent(dr.toString(),false,false)); - } - return dr_set; + else + { + dr_set.add(new DiscourseReferent(dr.toString(),false,false)); + } + return dr_set; } -} - -Set<DRS_Condition> Condition_List() : -{ - DRS_Condition condition= null; - Set<DRS_Condition> conditions = null; -} -{ - condition=Condition() ("," conditions=Condition_List())? - { - if (conditions == null) - { - conditions = new HashSet<DRS_Condition>(); - } - conditions.add(condition); - return conditions; - } -} - -DRS_Condition Condition() : -{ - List<DiscourseReferent> dr_list; - Token dr1; +} + +Set<DRS_Condition> Condition_List() : +{ + DRS_Condition condition= null; + Set<DRS_Condition> conditions = null; +} +{ + condition=Condition() ("," conditions=Condition_List())? + { + if (conditions == null) + { + conditions = new HashSet<DRS_Condition>(); + } + conditions.add(condition); + return conditions; + } +} + +DRS_Condition Condition() : +{ + List<DiscourseReferent> dr_list; + Token dr1; Token dr2; - Token dr; - Token predicate; - Token quantifier; - DRS drs1; - DRS drs2; -} + Token dr; + Token predicate; + Token quantifier; + DRS drs1; + DRS drs2; +} { - predicate=<WORD> "(" dr_list=DR_List() ")" - { - Simple_DRS_Condition condition; - - condition = new Simple_DRS_Condition(); - condition.setPredicate(predicate.toString()); - condition.setArguments(dr_list); - return condition; - } - + predicate=<WORD> "(" dr_list=DR_List() ")" + { + Simple_DRS_Condition condition; + + condition = new Simple_DRS_Condition(); + condition.setPredicate(predicate.toString()); + condition.setArguments(dr_list); + return condition; + } + | dr1 = dr() "=" dr2 = dr() @@ -352,54 +352,54 @@ condition.addArgument(new DiscourseReferent(dr2.toString())); return condition; } - - | - - "NOT" drs1=DRS() - { - Negated_DRS drs = new Negated_DRS(); - drs.setDRS(drs1); - return drs; - } - - | - + + | + + "NOT" drs1=DRS() + { + Negated_DRS drs = new Negated_DRS(); + drs.setDRS(drs1); + return drs; + } + + | + drs1=DRS() (quantifier=<EVERY> | quantifier=<SOME> | quantifier=<AFEW> | quantifier=<MOST> | quantifier=<THEMOST> | quantifier=<THELEAST> | - quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() - { - Complex_DRS_Condition drs; - drs = new Complex_DRS_Condition(); - drs.setRestrictor(drs1); - drs.setScope(drs2); + quantifier=<HOWMANY> | quantifier=<MANY> | quantifier=<NO>) dr=dr() drs2=DRS() + { + Complex_DRS_Condition drs; + drs = new Complex_DRS_Condition(); + drs.setRestrictor(drs1); + drs.setScope(drs2); drs.setReferent(new DiscourseReferent(dr.toString())); - + if (quantifier.toString().equals("EVERY")) {drs.setQuantifier(DRS_Quantifier.EVERY);} - if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} + if (quantifier.toString().equals("SOME")) {drs.setQuantifier(DRS_Quantifier.SOME);} if (quantifier.toString().equals("MOST")) {drs.setQuantifier(DRS_Quantifier.MOST);} if (quantifier.toString().equals("THEMOST")) {drs.setQuantifier(DRS_Quantifier.THEMOST);} - if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} - if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} + if (quantifier.toString().equals("THELEAST")) {drs.setQuantifier(DRS_Quantifier.THELEAST);} + if (quantifier.toString().equals("AFEW")) {drs.setQuantifier(DRS_Quantifier.FEW);} if (quantifier.toString().equals("MANY")) {drs.setQuantifier(DRS_Quantifier.MANY);} if (quantifier.toString().equals("HOWMANY")) {drs.setQuantifier(DRS_Quantifier.HOWMANY);} - if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} - - return drs; - - } -} - + if (quantifier.toString().equals("NO")) {drs.setQuantifier(DRS_Quantifier.NO);} + + return drs; + + } +} + /** DR_List*/ List<DiscourseReferent> DR_List() : -{ - Token dr; - List<DiscourseReferent> dr_list=null; +{ + Token dr; + List<DiscourseReferent> dr_list=null; } -{ - dr = dr() ("," dr_list=DR_List())? - { - if (dr_list == null) - { - dr_list= new ArrayList<DiscourseReferent>(); +{ + dr = dr() ("," dr_list=DR_List())? + { + if (dr_list == null) + { + dr_list= new ArrayList<DiscourseReferent>(); } if (dr.toString().startsWith("?")) { @@ -410,9 +410,9 @@ } else { dr_list.add(0,new DiscourseReferent(dr.toString(),false,false)); - } + } - return dr_list; + return dr_list; } } @@ -445,15 +445,15 @@ { ref = <WORD> "/" type = <WORD> "/" (words = Word_List())? { - if (words == null) - { + if (words == null) + { words = new ArrayList<String>(); } if (type.toString().equals("CLASS")) { slottype = SlotType.CLASS; } - else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } + else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } - else { slottype = SlotType.UNSPEC; } + else { slottype = SlotType.UNSPEC; } return new Slot(ref.toString(),slottype,words); } @@ -476,8 +476,8 @@ return words; } } - + TOKEN: {<EVERY: "EVERY">} TOKEN: {<MOST: "MOST">} @@ -498,7 +498,7 @@ TOKEN: {<LABEL: "l"(["0"-"9"])+>} -TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":"])+>} +TOKEN: {<WORD: (["a"-"z","A"-"Z","_",".","#",":","0"-"9"])+>} TOKEN: {<DR: (["?","!"])?(["a"-"z","A"-"Z","0"-"9","."])+>} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicSlotBuilder.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -11,7 +11,7 @@ private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; - private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHERE"}; + private String[] verb = {"VB","VBD","VBG","VBN","VBP","VBZ","PASSIVE","PASSPART","VPASS","VPASSIN","GERUNDIN","VPREP","WHEN","WHENPREP","WHERE"}; private String[] preps = {"IN","TO"}; public BasicSlotBuilder() { @@ -206,11 +206,16 @@ result.add(vEntry); } else if (pos.equals("VB")) { - String[] whEntry = {token, + String[] whEntry1 = {token, "(S DP[obj] (VP DP[subj] V:'" + token + "'))", "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; - result.add(whEntry); + String[] whEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj] ))", + "<x,l1,t,[ l1:[|], l4:[ p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + slot + "]>" + + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; + result.add(whEntry1); + result.add(whEntry2); } else if (pos.equals("VBG") || pos.equals("VBN")) { String[] gerEntry = {token, @@ -225,11 +230,27 @@ } else if (pos.equals("WHEN")) { slot = "SLOT_" + token + "/PROPERTY/" + token + "_date"; - String[] whenEntry = {token, + String[] whenEntry1 = {token, "(S DP[subj] (VP V:'" + token + "'))", "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + slot + " ]>"}; - result.add(whenEntry); + String[] whenEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[obj]))", + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(x,z,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,obj,<<e,t>,t>)],[ l2=l1,l3=l1 ],[ " + slot + " ]>"}; + result.add(whenEntry1); + result.add(whenEntry2); } + else if (pos.equals("WHENPREP")) { + System.out.println(" >>>> " + token); // DEBUG + slot = "SLOT_" + token + "/PROPERTY/" + token + "_date"; + String[] whenprepEntry1 = {token, + "(S DP[subj] (VP V:'" + token + "' DP[pobj]))", + "<x,l1,t,[ l1:[ ?y,p | SLOT_" + token + "(x,z,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,pobj,<<e,t>,t>)],[ l2=l1,l3=l1 ],[ " + slot + " ]>"}; + String[] whenprepEntry2 = {token, + "(S DP[subj] (VP V:'" + token + "' NP[pobj]))", + "<x,l1,t,[ l1:[ ?y,p,z | SLOT_" + token + "(x,z,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,z,pobj,<e,t>)],[ l2=l1,l3=l1 ],[ " + slot + " ]>"}; + result.add(whenprepEntry1); + result.add(whenprepEntry2); + } else if (pos.equals("WHERE")) { slot = "SLOT_" + token + "/PROPERTY/" + token + "_place"; String[] whereEntry = {token, @@ -245,10 +266,18 @@ slot = "SLOT_" + token + "/PROPERTY/" + token; /* ADJECTIVE */ if (pos.equals("JJ")) { - String[] adjEntry = {token, + String[] adjEntry1 = {token, "(NP ADJ:'" + token.toLowerCase() + "' NP*)", - "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(x,j) ] ],[],[],["+slot+"]>"}; - result.add(adjEntry); + "<x,l1,<e,t>,[ l1:[ j | SLOT_" + token + "(x,j) ] ],[],[],["+slot+"]>"}; +// String[] adjEntry2 = {"is .+ " + token, +// "(S DP[subject] (VP V:'is' ADJ:'" + token.toLowerCase() + "'))", +// "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x) ] ],[(l2,x,subject,<<e,t>,t>)],[l2=l1],["+slot+"]>"}; +// String[] adjEntry3 = {"is .+ " + token, +// "(S (VP V:'is' DP[subject] ADJ:'" + token.toLowerCase() + "'))", +// "<x,l1,<e,t>,[ l1:[ | SLOT_" + token + "(x) ] ],[(l2,x,subject,<<e,t>,t>)],[l2=l1],["+slot+"]>"}; + result.add(adjEntry1); +// result.add(adjEntry2); +// result.add(adjEntry3); } if (pos.equals("JJH")) { String[] howEntry = {"how "+token, Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -45,7 +45,7 @@ } g = LTAG_Constructor.construct(grammarFiles); - + tagger = new StanfordPartOfSpeechTagger(); // tagger = new ApachePartOfSpeechTagger(); Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-05-07 11:56:11 UTC (rev 3694) @@ -18,8 +18,12 @@ is there || (S V:'is' C:'there' DP[dp]) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> are there || (S V:'are' C:'there' DP[dp]) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + has there been || (S V:'has' C:'there' V:'been' DP[dp]) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + have there been || (S V:'have' C:'there' V:'been' DP[dp]) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> is there || (S DP[dp] (VP V:'is' C:'there')) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> are there || (S DP[dp] (VP V:'are' C:'there')) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + have there been || (S DP[dp] (VP V:'have' C:'there' V:'been')) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + has there been || (S DP[dp] (VP V:'has' C:'there' V:'been')) || <x, l1, t, [ l1:[ | ] ], [ (l2,x,dp,<<e,t>,t>) ], [ l2=l1 ],[]> // TO BE: YES/NO QUESTIONS @@ -75,8 +79,9 @@ least || (ADJ DET:'least' ADJ*) || <x,l1,<e,t>,[ l1:[ | minimum(a,x,x) ] ], [],[],[]> - how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?x | ] ], [],[],[]> - how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ | count(x) ] ], [],[],[]> + how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?x | ] ], [],[],[]> + how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ | count(x) ] ], [],[],[]> + how often || (DP DET:'how' DET:'often') || <x,l1,<<e,t>,t>, [ l1:[ | count(x) ] ], [],[],[]> a || (DET DET:'a') || <x,l1,e, [ l1:[ x |] ], [],[],[]> an || (DET DET:'an') || <x,l1,e, [ l1:[ x |] ], [],[],[]> which || (DET DET:'which') || <x,l1,e, [ l1:[ ?x |] ], [],[],[]> @@ -102,14 +107,18 @@ also || (DP ADV:'also' DP*) || <x,l1,<<e,t>,t>,[ l1:[|] ],[],[],[]> has || (S DP[subject] (VP V:'has' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> - have || (S DP[subject] (VP V:'have' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> - had || (S DP[subject] (VP V:'had' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + have || (S DP[subject] (VP V:'have' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + have || (S DP[object] (VP DP[subject] V:'have')) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> + had || (S DP[subject] (VP V:'had' DP[object])) || <x, l1, t, [ l1:[ | ], l2:[ | empty(x,y) ] ], [ (l3,x,subject,<<e,t>,t>), (l4,y,object,<<e,t>,t>) ], [ l3<l1, l4<l1, l2<scope(l3), l2<scope(l4) ],[]> // with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>,[ l1:[| empty(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[l2=l1],[]> - +// of || (NP NP* (PP P:'of' DP[dp])) || <x,l1,<e,t>,[ l1:[| empty(x,y) ] ],[(l2,y,dp,<<e,t>,t>)],[l2=l1],[]> + people || (NP N:'people') || <x,l1,<e,t>,[ l1:[|] ],[],[],[]> + still || (ADJ ADJ:'still' ADJ*) || <x,l1,<e,t>,[l1:[|]],[],[],[]> + // WH WORDS // -------- @@ -172,7 +181,7 @@ eight || (NP NUM:'eight' NP*) || <x,l1,<e,t>,[l1:[x|count(x,8)]],[],[],[]> nine || (NP NUM:'nine' NP*) || <x,l1,<e,t>,[l1:[x|count(x,9)]],[],[],[]> ten || (NP NUM:'ten' NP*) || <x,l1,<e,t>,[l1:[x|count(x,10)]],[],[],[]> - + one || (NUM NUM:'one') || <x,l1,e,[l1:[x|equal(x,1)]],[],[],[]> two || (NUM NUM:'two') || <x,l1,e,[l1:[x|equal(x,2)]],[],[],[]> three || (NUM NUM:'three') || <x,l1,e,[l1:[x|equal(x,3)]],[],[],[]> Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java 2012-05-06 21:35:52 UTC (rev 3693) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/GoldTagger.java 2012-05-07 11:56:11 UTC (rev 3694) @@ -25,8 +25,8 @@ public class GoldTagger { - static String GOLD = "src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml"; - static String OUT = "target/qald2-dbpedia-train-tagged.xml"; + static String GOLD = "/home/christina/Downloads/dbpedia-test-new.xml"; + static String OUT = "/home/christina/Downloads/dbpedia-test-new-tagged.xml"; public static void main(String[] args) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-05-10 13:31:36
|
Revision: 3701 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3701&view=rev Author: lorenz_b Date: 2012-05-10 13:31:25 +0000 (Thu, 10 May 2012) Log Message: ----------- Added preliminary support for FILTERs. Added LGG test for Oxford dataset. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/QueryTree.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -26,7 +26,9 @@ import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import com.hp.hpl.jena.datatypes.RDFDatatype; import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.rdf.model.Literal; /** * @@ -129,8 +131,14 @@ String toSPARQLQueryString(boolean filtered); + Query toSPARQLQuery(); + int getTriplePatternCount(); Query toQuery(); + RDFDatatype getDatatype(); + + List<Literal> getLiterals(); + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -22,10 +22,13 @@ import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -33,15 +36,21 @@ import java.util.TreeSet; import java.util.regex.Pattern; +import javax.xml.bind.DatatypeConverter; + import org.dllearner.algorithm.qtl.datastructures.NodeRenderer; import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.filters.Filters; import com.hp.hpl.jena.datatypes.BaseDatatype; +import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.sparql.syntax.ElementGroup; import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; @@ -72,6 +81,8 @@ private boolean isLiteralNode = false; private boolean isResourceNode = false; + private List<Literal> literals = new ArrayList<Literal>(); + public QueryTreeImpl(N userObject) { this.userObject = userObject; @@ -80,7 +91,13 @@ edge2ChildrenMap = new HashMap<String, List<QueryTree<N>>>(); toStringRenderer = new NodeRenderer<N>() { public String render(QueryTree<N> object) { - return object.toString() + "(" + object.getId() + ")"; + String label = object.toString() + "(" + object.getId() + ")"; + if(object.isLiteralNode()){ + if(!object.getLiterals().isEmpty()){ + label += "Values: " + object.getLiterals(); + } + } + return label; } }; } @@ -672,6 +689,10 @@ return true; } + @Override + public Query toSPARQLQuery() { + return QueryFactory.create(toSPARQLQueryString(), Syntax.syntaxARQ); + } @Override public String toSPARQLQueryString() { @@ -680,8 +701,12 @@ } cnt = 0; StringBuilder sb = new StringBuilder(); - sb.append("SELECT ?x0 WHERE {\n"); - buildSPARQLQueryString(this, sb, false); + sb.append("SELECT DISTINCT ?x0 WHERE {\n"); + List<String> filters = new ArrayList<String>(); + buildSPARQLQueryString(this, sb, false, filters); + for(String filter : filters){ + sb.append(filter).append("\n"); + } sb.append("}"); return sb.toString(); } @@ -693,16 +718,23 @@ } cnt = 0; StringBuilder sb = new StringBuilder(); - sb.append("SELECT ?x0 WHERE {\n"); - buildSPARQLQueryString(this, sb, filtered); + List<String> filters = new ArrayList<String>(); + sb.append("SELECT DISTINCT ?x0 WHERE {\n"); + buildSPARQLQueryString(this, sb, filtered, filters); + for(String filter : filters){ + sb.append(filter).append("\n"); + } sb.append("}"); return sb.toString(); } - private void buildSPARQLQueryString(QueryTree<N> tree, StringBuilder sb, boolean filtered){ + private void buildSPARQLQueryString(QueryTree<N> tree, StringBuilder sb, boolean filtered, List<String> filters){ Object subject = null; if(tree.getUserObject().equals("?")){ subject = "?x" + cnt++; + if(tree.isLiteralNode() && !tree.getLiterals().isEmpty()){ + filters.add(getFilter(subject.toString(), tree.getLiterals())); + } } else { subject = "<" + tree.getUserObject() + ">"; } @@ -725,12 +757,61 @@ } sb.append(subject).append(" <").append(predicate).append("> ").append(object).append(".\n"); if(!objectIsResource){ - buildSPARQLQueryString(child, sb, filtered); + buildSPARQLQueryString(child, sb, filtered, filters); } } } } + private String getFilter(String varName, List<Literal> literals){ + String filter = "FILTER("; + + Literal min = getMin(literals); + filter += varName + ">=\"" + min.getLexicalForm() + "\"^^<" + min.getDatatypeURI() + ">"; + + filter += " && "; + + Literal max = getMax(literals); + filter += varName + "<=\"" + max.getLexicalForm() + "\"^^<" + min.getDatatypeURI() + ">"; + + filter += ")"; + return filter; + } + + private Literal getMin(List<Literal> literals){ + Iterator<Literal> iter = literals.iterator(); + Literal min = iter.next(); + Literal l; + while(iter.hasNext()){ + l = iter.next(); + if(l.getDatatype() == XSDDatatype.XSDinteger){ + min = (l.getInt() < min.getInt()) ? l : min; + } else if(l.getDatatype() == XSDDatatype.XSDdouble){ + min = (l.getDouble() < min.getDouble()) ? l : min; + } else if(l.getDatatype() == XSDDatatype.XSDdate){ + min = (DatatypeConverter.parseDate(l.getLexicalForm()).compareTo(DatatypeConverter.parseDate(min.getLexicalForm())) == -1) ? l : min; + } + } + return min; + } + + private Literal getMax(List<Literal> literals){ + Iterator<Literal> iter = literals.iterator(); + Literal max = iter.next(); + Literal l; + while(iter.hasNext()){ + l = iter.next(); + if(l.getDatatype() == XSDDatatype.XSDinteger){ + max = (l.getInt() > max.getInt()) ? l : max; + } else if(l.getDatatype() == XSDDatatype.XSDdouble){ + max = (l.getDouble() > max.getDouble()) ? l : max; + } else if(l.getDatatype() == XSDDatatype.XSDdate){ + max = (DatatypeConverter.parseDate(l.getLexicalForm()).compareTo(DatatypeConverter.parseDate(max.getLexicalForm())) == 1) ? l : max; + } + } + return max; + } + public Query toQuery(){ Query query = QueryFactory.make(); query.setQuerySelectType(); @@ -797,5 +878,29 @@ return triples; } + public void addLiteral(Literal l){ + literals.add(l); + } + + public List<Literal> getLiterals() { + return literals; + } + + public void addLiterals(Collection<Literal> literals) { + this.literals.addAll(literals); + } + + public RDFDatatype getDatatype(){ + if(isLiteralNode){ + if(!literals.isEmpty()){ + return literals.get(0).getDatatype(); + } else { + return null; + } + } else { + throw new UnsupportedOperationException("Node ist not a literal"); + } + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -35,6 +35,8 @@ import org.dllearner.algorithm.qtl.filters.QuestionBasedStatementFilter; import org.dllearner.algorithm.qtl.filters.ZeroFilter; +import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; @@ -60,6 +62,8 @@ private Selector statementSelector = new SimpleSelector(); private com.hp.hpl.jena.util.iterator.Filter<Statement> keepFilter; + private int maxDepth = 3; + public QueryTreeFactoryImpl(){ comparator = new StatementComparator(); predicateFilters = new HashSet<String>(Filters.getAllFilterProperties()); @@ -139,7 +143,8 @@ QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; @@ -160,7 +165,8 @@ fillMap(s, model, resource2Statements); QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; @@ -178,7 +184,7 @@ resource2Statements.put(st.getSubject().toString(), statements); } statements.add(st); - if(st.getObject().isURIResource() && !resource2Statements.containsKey(st.getObject().asResource().getURI())){ + if((st.getObject().isResource()) && !resource2Statements.containsKey(st.getObject().toString())){ fillMap(st.getObject().asResource(), model, resource2Statements); } } @@ -201,54 +207,73 @@ statements.add(st); } QueryTreeImpl<String> tree = new QueryTreeImpl<String>(s.toString()); - fillTree(tree, resource2Statements); + int depth = 0; + fillTree(tree, resource2Statements, depth); tree.setUserObject("?"); return tree; } - private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements){ - tree.setId(nodeId++); - if(resource2Statements.containsKey(tree.getUserObject())){ - QueryTreeImpl<String> subTree; - Property predicate; - RDFNode object; - for(Statement st : resource2Statements.get(tree.getUserObject())){ - predicate = st.getPredicate(); - object = st.getObject(); - if(!predicateFilter.isRelevantResource(predicate.getURI())){ - continue; - } - if(predicateFilters.contains(st.getPredicate().toString())){ - continue; - } - if(object.isLiteral()){ - Literal lit = st.getLiteral(); - String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); - StringBuilder sb = new StringBuilder(); - sb.append("\"").append(escapedLit).append("\""); - if(lit.getDatatypeURI() != null){ - sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + private void fillTree(QueryTreeImpl<String> tree, SortedMap<String, SortedSet<Statement>> resource2Statements, int depth){ + depth++; + tree.setId(nodeId++); + if(resource2Statements.containsKey(tree.getUserObject())){ + QueryTreeImpl<String> subTree; + Property predicate; + RDFNode object; + for(Statement st : resource2Statements.get(tree.getUserObject())){ + predicate = st.getPredicate(); + object = st.getObject(); + if(!predicateFilter.isRelevantResource(predicate.getURI())){ + continue; } - if(!lit.getLanguage().isEmpty()){ - sb.append("@").append(lit.getLanguage()); + if(predicateFilters.contains(st.getPredicate().toString())){ + continue; } - subTree = new QueryTreeImpl<String>(sb.toString()); -// subTree = new QueryTreeImpl<String>(lit.toString()); - subTree.setId(nodeId++); - subTree.setLiteralNode(true); - tree.addChild(subTree, st.getPredicate().toString()); - } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ - if(tree.getUserObjectPathToRoot().size() < 3 && - !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ - subTree = new QueryTreeImpl<String>(st.getObject().toString()); - subTree.setResourceNode(true); + if(object.isLiteral()){ + Literal lit = st.getLiteral(); + String escapedLit = lit.getLexicalForm().replace("\"", "\\\""); + StringBuilder sb = new StringBuilder(); + sb.append("\"").append(escapedLit).append("\""); + if(lit.getDatatypeURI() != null){ + sb.append("^^<").append(lit.getDatatypeURI()).append(">"); + } + if(!lit.getLanguage().isEmpty()){ + sb.append("@").append(lit.getLanguage()); + } + subTree = new QueryTreeImpl<String>(sb.toString()); +// subTree = new QueryTreeImpl<String>(lit.toString()); + subTree.setId(nodeId++); + subTree.setLiteralNode(true); + if(lit.getDatatype() == XSDDatatype.XSDinteger || lit.getDatatype() == XSDDatatype.XSDdouble || lit.getDatatype() == XSDDatatype.XSDdate){ + subTree.addLiteral(lit); + } tree.addChild(subTree, st.getPredicate().toString()); - fillTree(subTree, resource2Statements); + } else if(objectFilter.isRelevantResource(object.asResource().getURI())){ + if(object.asResource().isAnon()){ + System.out.println(object); + } + if(!tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + if(depth < maxDepth){ + fillTree(subTree, resource2Statements, depth); + } + + } + } else if(object.isAnon()){ + if(depth < maxDepth && + !tree.getUserObjectPathToRoot().contains(st.getObject().toString())){ + subTree = new QueryTreeImpl<String>(st.getObject().toString()); + subTree.setResourceNode(true); + tree.addChild(subTree, st.getPredicate().toString()); + fillTree(subTree, resource2Statements, depth); + } } } } - } + depth--; } class StatementComparator implements Comparator<Statement>{ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/lgg/LGGGeneratorImpl.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -29,6 +29,7 @@ import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; +import com.hp.hpl.jena.datatypes.RDFDatatype; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -145,6 +146,15 @@ lgg.setUserObject((N)"?"); } + if(tree1.isLiteralNode() && tree2.isLiteralNode()){ + RDFDatatype d1 = tree1.getDatatype(); + RDFDatatype d2 = tree2.getDatatype(); + if(d1 != null && d2 != null && d1 == d2){ + ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree1).getLiterals()); + ((QueryTreeImpl<N>)lgg).addLiterals(((QueryTreeImpl<N>)tree2).getLiterals()); + } + } + Set<QueryTreeImpl<N>> addedChildren; QueryTreeImpl<N> lggChild; for(Object edge : new TreeSet<Object>(tree1.getEdges())){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2012-05-10 07:08:30 UTC (rev 3700) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java 2012-05-10 13:31:25 UTC (rev 3701) @@ -19,7 +19,11 @@ */ package org.dllearner.algorithm.qtl; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -39,14 +43,23 @@ import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.junit.Assert; import org.junit.Test; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFormatter; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.sparql.util.ModelUtils; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; @@ -61,6 +74,64 @@ private static final Logger logger = Logger.getLogger(LGGTest.class); +// @Test + public void testOxfordData(){ + Model model = ModelFactory.createOntologyModel(); + int depth = 3; + try { + model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); + System.out.println(model.size()); + model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/finders.ttl")), "http://diadem.cs.ox.ac.uk/ontologies/real-estate#", "TURTLE"); + System.out.println(model.size()); +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/martinco.ttl")), null, "TURTLE"); +// System.out.println(model.size()); +// model.write(new FileOutputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/all.ttl")), "TURTLE", null); +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/all.ttl")), null, "TURTLE"); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + +// for(Statement s : model.listStatements().toList()){ +// System.out.println(s); +// } +// +// ResultSet rs1 = QueryExecutionFactory.create("SELECT * WHERE {?s <http://diadem.cs.ox.ac.uk/ontologies/real-estate#rooms> ?o. ?o ?p ?o1}", model).execSelect(); +// System.out.println(ResultSetFormatter.asText(rs1)); + + ConciseBoundedDescriptionGenerator cbd = new ConciseBoundedDescriptionGeneratorImpl(model); + QueryTreeFactory<String> qtf = new QueryTreeFactoryImpl(); + + List<String> posExamples = Arrays.asList("http://diadem.cs.ox.ac.uk/ontologies/real-estate#inst004", + "http://diadem.cs.ox.ac.uk/ontologies/real-estate#inst005"); + + List<QueryTree<String>> trees = new ArrayList<QueryTree<String>>(); + + //get the trees for the positive examples of depth 3 + QueryTree<String> tree; + for(String ex : posExamples){ + tree = qtf.getQueryTree(ex, cbd.getConciseBoundedDescription(ex, depth)); + trees.add(tree); + System.out.println(tree.getStringRepresentation()); + } + + //compute the LGG + LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); + QueryTree<String> lgg = lggGen.getLGG(trees); + System.out.println("LGG:\n" + lgg.getStringRepresentation()); + Query q = lgg.toSPARQLQuery(); + System.out.println("Query:\n" + q); + + //run the SPARQL query against the data - should be return at least the positive examples + List<String> result = new ArrayList<String>(); + ResultSet rs = QueryExecutionFactory.create(q, model).execSelect(); + while(rs.hasNext()){ + result.add(rs.next().getResource("x0").getURI()); + } + System.out.println(result); + Assert.assertTrue(result.containsAll(posExamples)); + + } + @Test public void testLGGWithDBpediaExample(){ QueryTreeFactory<String> factory = new QueryTreeFactoryImpl(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-09 10:47:05
|
Revision: 3728 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3728&view=rev Author: christinaunger Date: 2012-06-09 10:46:58 +0000 (Sat, 09 Jun 2012) Log Message: ----------- NP/DP conjunction Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-06-07 13:57:47 UTC (rev 3727) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/basic_english.lex 2012-06-09 10:46:58 UTC (rev 3728) @@ -150,7 +150,7 @@ and || (S S* CC:'and' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> and || (DP DP* CC:'and' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> - and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> + and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> and || (VP VP* CC:'and' VP[vp]) || - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-07 13:57:47 UTC (rev 3727) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-09 10:46:58 UTC (rev 3728) @@ -140,7 +140,7 @@ and || (S S* CC:'and' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> and || (DP DP* CC:'and' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> - and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> + and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> and || (VP VP* CC:'and' VP[vp]) || - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-06-07 13:57:47 UTC (rev 3727) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-06-09 10:46:58 UTC (rev 3728) @@ -12,7 +12,7 @@ public class TestFrontend { static String[] GRAMMAR_FILES = {"src/main/resources/lexicon/english.lex"}; - static boolean BASIC_MODE = true; // true for BASIC mode, false for LEIPZIG mode + static boolean BASIC_MODE = false; // true for BASIC mode, false for LEIPZIG mode public static void main(String[] args) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-11 13:07:08
|
Revision: 3731 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3731&view=rev Author: lorenz_b Date: 2012-06-11 13:06:58 +0000 (Mon, 11 Jun 2012) Log Message: ----------- Changed query to get URIs for property label. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-11 12:38:21 UTC (rev 3730) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-11 13:06:58 UTC (rev 3731) @@ -16,8 +16,8 @@ "LIMIT %d OFFSET %d"; super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + -// "?s ?uri ?o.\n" + - "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + + "?s ?uri ?o.\n" + +// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-11 12:38:21 UTC (rev 3730) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-11 13:06:58 UTC (rev 3731) @@ -41,18 +41,6 @@ } catch (FileNotFoundException e) { e.printStackTrace(); } - System.out.println(model.size()); - String queryStr = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?uri WHERE {" + -// "?s ?uri ?o." + - "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.}" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label." + - "FILTER(REGEX(STR(?label), 'bathroom', 'i'))" + - "}" + - "LIMIT 20 OFFSET 0"; - System.out.println( - ResultSetFormatter.asText( - QueryExecutionFactory.create( - QueryFactory.create(queryStr, Syntax.syntaxARQ), model).execSelect())); } @Test @@ -100,11 +88,12 @@ Index resourcesIndex = new SPARQLIndex(endpoint); Index classesIndex = new SPARQLClassesIndex(endpoint); Index propertiesIndex = new SPARQLPropertiesIndex(endpoint); + System.out.println(propertiesIndex.getResources("near")); SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - String question = "Give me all houses with more than 2 bedrooms and more than 3 bathrooms."; + String question = "Give me all houses near a school."; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-11 17:06:29
|
Revision: 3732 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3732&view=rev Author: christinaunger Date: 2012-06-11 17:06:18 +0000 (Mon, 11 Jun 2012) Log Message: ----------- [tbsl] differentiated object and datatype properties, and filtered templates inconsistent with those requirements Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/converter/DRS2SPARQL_Converter.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -76,7 +76,7 @@ } template = new Template(new Query()); - // slots = ls; + slots = ls; Query q = convert(drs, new Query(), false); if (q == null) { @@ -127,6 +127,8 @@ } } + for (Slot s : slots) if (s.getAnchor().equals("SLOT_arg")) template.addSlot(s); + Set<SPARQL_Triple> statements = new HashSet<SPARQL_Triple>(); for (DRS_Condition condition : drs.getConditions()) { @@ -242,7 +244,7 @@ } } SPARQL_Property prop = new SPARQL_Property(predicate); - prop.setIsVariable(true); + if (!predicate.contains(":")) prop.setIsVariable(true); boolean literal = false; if (simple.getArguments().size() > 1 && simple.getArguments().get(1).getValue().matches("\\d+")) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -131,12 +131,18 @@ */ try { - TreeNode tree = c.construct("NUM:'" + token + "'"); + TreeNode tree = c.construct("NUM:'" + token + "'"); + TreeNode tree2 = c.construct("(NP NUM:'" + token + "' NP*)"); int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree), - Collections.singletonList("<x,l1,e,[l1:[ x | equal(x," + token + ")]],[],[],[]>")); + Collections.singletonList("<x,l1,e,[l1:[ x | equal(x," + token + ")]],[],[],[ SLOT_arg/LITERAL/x ]>")); add(parseG, tree, gid-1, localID); localID++; + +// int gid = grammar.addTree(grammar.size(), new Pair<String,TreeNode>(token,tree2), +// Collections.singletonList("<x,l1,<e,t>,[l1:[ | count(x," + token + ")]],[],[],[ SLOT_arg/RESOURCE/x ]>")); +// add(parseG, tree2, gid-1, localID); +// localID++; foundCandidates = true; coveredTokens.add(token); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/data/Dude.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -1,11 +1,11 @@ package org.dllearner.algorithm.tbsl.sem.dudes.data; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.dllearner.algorithm.tbsl.sem.drs.DRS; +import org.dllearner.algorithm.tbsl.sem.drs.DiscourseReferent; import org.dllearner.algorithm.tbsl.sem.drs.Simple_DRS_Condition; import org.dllearner.algorithm.tbsl.sem.util.DomType; import org.dllearner.algorithm.tbsl.sem.util.DominanceConstraint; @@ -14,6 +14,7 @@ import org.dllearner.algorithm.tbsl.sem.util.Type; import org.dllearner.algorithm.tbsl.sparql.BasicSlot; import org.dllearner.algorithm.tbsl.sparql.Slot; +import org.dllearner.algorithm.tbsl.sparql.SlotType; public class Dude implements SemanticRepresentation{ @@ -156,9 +157,9 @@ output.components.addAll(input.components); output.dominanceConstraints.addAll(input.dominanceConstraints); output.arguments.remove(argument); - output.arguments.addAll(dude.arguments); - output.slots.addAll(input.slots); - + output.arguments.addAll(dude.arguments); + output.slots.addAll(input.slots); + return output; } @@ -177,7 +178,7 @@ output.components.addAll(input.components); output.dominanceConstraints.addAll(input.dominanceConstraints); output.arguments.addAll(input.arguments); - output.slots.addAll(input.slots); + output.slots.addAll(input.slots); // finally add a constraint to link the main input-component to the bottom output-component (with DomType.equal) DominanceConstraint newConstraint = new DominanceConstraint(getBottomLabel(output),input.mainLabel); @@ -359,6 +360,9 @@ } for (Slot slot : slots) { slot.replaceReferent(ref1.replace("?",""),ref2.replace("?","")); + String minus = null; + for (String w : slot.getWords()) if (w.equals(ref1.replace("?",""))) minus = w; + if (minus != null) { slot.getWords().remove(minus); slot.getWords().add(ref2.replace("?","")); } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -491,8 +491,11 @@ } if (type.toString().equals("CLASS")) { slottype = SlotType.CLASS; } else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } + else if (type.toString().equals("DATATYPEPROPERTY")) { slottype = SlotType.DATATYPEPROPERTY; } + else if (type.toString().equals("OBJECTPROPERTY")) { slottype = SlotType.OBJECTPROPERTY; } else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } + else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } else { slottype = SlotType.UNSPEC; } {if (true) return new Slot(ref.toString(),slottype,words);} @@ -827,6 +830,14 @@ finally { jj_save(39, xla); } } + private boolean jj_3R_13() { + if (jj_3R_7()) return true; + Token xsp; + xsp = jj_scanpos; + if (jj_3_36()) jj_scanpos = xsp; + return false; + } + private boolean jj_3_12() { if (jj_scan_token(2)) return true; if (jj_3R_1()) return true; @@ -838,14 +849,6 @@ return false; } - private boolean jj_3R_13() { - if (jj_3R_7()) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3_36()) jj_scanpos = xsp; - return false; - } - private boolean jj_3_11() { if (jj_scan_token(2)) return true; if (jj_3R_3()) return true; @@ -883,20 +886,14 @@ return false; } - private boolean jj_3R_11() { - if (jj_scan_token(LABEL)) return true; - if (jj_scan_token(11)) return true; - return false; - } - private boolean jj_3_40() { if (jj_scan_token(B)) return true; return false; } - private boolean jj_3R_16() { - if (jj_3R_7()) return true; - if (jj_scan_token(14)) return true; + private boolean jj_3R_11() { + if (jj_scan_token(LABEL)) return true; + if (jj_scan_token(11)) return true; return false; } @@ -905,8 +902,9 @@ return false; } - private boolean jj_3_23() { - if (jj_scan_token(MOST)) return true; + private boolean jj_3R_16() { + if (jj_3R_7()) return true; + if (jj_scan_token(14)) return true; return false; } @@ -915,12 +913,6 @@ return false; } - private boolean jj_3_30() { - if (jj_3R_10()) return true; - if (jj_scan_token(6)) return true; - return false; - } - private boolean jj_3R_7() { Token xsp; xsp = jj_scanpos; @@ -931,9 +923,8 @@ return false; } - private boolean jj_3_34() { - if (jj_scan_token(2)) return true; - if (jj_3R_4()) return true; + private boolean jj_3_23() { + if (jj_scan_token(MOST)) return true; return false; } @@ -942,6 +933,12 @@ return false; } + private boolean jj_3_30() { + if (jj_3R_10()) return true; + if (jj_scan_token(6)) return true; + return false; + } + private boolean jj_3R_10() { Token xsp; xsp = jj_scanpos; @@ -952,6 +949,12 @@ return false; } + private boolean jj_3_34() { + if (jj_scan_token(2)) return true; + if (jj_3R_4()) return true; + return false; + } + private boolean jj_3R_3() { if (jj_3R_15()) return true; return false; @@ -1110,6 +1113,12 @@ return false; } + private boolean jj_3_36() { + if (jj_scan_token(15)) return true; + if (jj_3R_13()) return true; + return false; + } + private boolean jj_3_25() { if (jj_scan_token(THELEAST)) return true; return false; @@ -1126,12 +1135,6 @@ return false; } - private boolean jj_3_36() { - if (jj_scan_token(15)) return true; - if (jj_3R_13()) return true; - return false; - } - private boolean jj_3_18() { if (jj_scan_token(2)) return true; if (jj_3R_8()) return true; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sem/dudes/reader/DUDE_Parser.jj 2012-06-11 17:06:18 UTC (rev 3732) @@ -451,8 +451,11 @@ } if (type.toString().equals("CLASS")) { slottype = SlotType.CLASS; } else if (type.toString().equals("RESOURCE")) { slottype = SlotType.RESOURCE; } + else if (type.toString().equals("DATATYPEPROPERTY")) { slottype = SlotType.DATATYPEPROPERTY; } + else if (type.toString().equals("OBJECTPROPERTY")) { slottype = SlotType.OBJECTPROPERTY; } else if (type.toString().equals("PROPERTY")) { slottype = SlotType.PROPERTY; } else if (type.toString().equals("SYMPROPERTY")) { slottype = SlotType.SYMPROPERTY; } + else if (type.toString().equals("LITERAL")) { slottype = SlotType.LITERAL; } else { slottype = SlotType.UNSPEC; } return new Slot(ref.toString(),slottype,words); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SlotType.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -1,5 +1,6 @@ package org.dllearner.algorithm.tbsl.sparql; public enum SlotType { - CLASS, PROPERTY, SYMPROPERTY, RESOURCE, LITERAL, UNSPEC + RESOURCE, CLASS, OBJECTPROPERTY, DATATYPEPROPERTY, PROPERTY, LITERAL, UNSPEC, + SYMPROPERTY // TODO don't use them anymore } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Template.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -2,7 +2,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; public class Template implements Serializable, Comparable<Template>{ @@ -27,6 +29,47 @@ public void addSlot(Slot s) { slots.add(s); } + + public Template checkandrefine() { + + Set<Slot> argslots = new HashSet<Slot>(); + for (Slot slot : slots) if (slot.anchor.equals("SLOT_arg")) argslots.add(slot); + + for (Slot slot : slots) { + // check for clashes + if (slot.type.equals(SlotType.CLASS)) { + for (SPARQL_Triple triple : query.conditions) { + if (triple.property.toString().equals("rdf:type") && triple.value.toString().equals("?"+slot.anchor)) { + for (Slot s : argslots) { + if (s.words.contains(triple.variable.toString().replace("?","")) && s.type.equals(SlotType.LITERAL)) + return null; + } + } + } + } + // refine property if possible + if (slot.type.equals(SlotType.PROPERTY) || slot.type.equals(SlotType.SYMPROPERTY)) { + Set<String> args = new HashSet<String>(); + for (SPARQL_Triple triple : query.conditions) { + if (triple.property.toString().equals("?"+slot.anchor)) + args.add(triple.value.toString()); + } + for (String arg : args) { + for (Slot s : argslots) { + if (s.anchor.equals("SLOT_arg") && s.words.contains(arg.replace("?",""))) { + if (s.type.equals(SlotType.LITERAL)) slot.type = SlotType.DATATYPEPROPERTY; + else if (s.type.equals(SlotType.RESOURCE)) slot.type = SlotType.OBJECTPROPERTY; + } + } + } + } + } + + // finally remove all argslots + slots.removeAll(argslots); + + return this; + } public String toString() { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/BasicTemplator.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -39,15 +39,16 @@ public boolean UNTAGGED_INPUT = true; public BasicTemplator() { - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - g = LTAG_Constructor.construct(grammarFiles); + g = LTAG_Constructor.construct(grammarFiles); - tagger = new StanfordPartOfSpeechTagger(); -// tagger = new ApachePartOfSpeechTagger(); + tagger = new StanfordPartOfSpeechTagger(); +// tagger = new ApachePartOfSpeechTagger(); p = new Parser(); p.SHOW_GRAMMAR = true; @@ -61,6 +62,14 @@ d2s = new DRS2BasicSPARQL_Converter(); } + public void setGrammarFiles(String[] files) { + GRAMMAR_FILES = files; + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + } + public void setUNTAGGED_INPUT(boolean b) { UNTAGGED_INPUT = b; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -299,15 +299,15 @@ result.add(wasGerEntry); } else if (pos.equals("WHEN")) { - String dateSlot = "SLOT_" + token + "/PROPERTY/" + token +"^" + token + "_date"; - String tokenSlot = "SLOT_" + token + "/PROPERTY/" + token; + String dateSlot = "SLOT_" + token + "/DATATYPEPROPERTY/" + token +"^" + token + "_date"; + String tokenSlot = "SLOT_" + token + "/DATATYPEPROPERTY/" + token; String[] whenEntry1 = {token, "(S DP[subj] (VP V:'" + token + "'))", "<x,l1,t,[ l1:[ ?y | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>)],[ l2=l1 ],[ " + dateSlot + " ]>"}; String[] whenEntry2 = {token, "(S DP[subj] (VP V:'" + token + "' DP[obj]))", "<x,l1,t,[ l1:[|], l4:[ ?z | SLOT_" + token + "(x,y), SLOT_date(x,z) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ]," + - "[" + tokenSlot + ", SLOT_date/PROPERTY/date ]>"}; + "[" + tokenSlot + ", SLOT_date/DATATYPEPROPERTY/date ]>"}; result.add(whenEntry1); result.add(whenEntry2); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -33,7 +33,7 @@ private static final Logger logger = Logger.getLogger(Templator.class); - String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex"}; + String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; private String[] noun = {"NN","NNS","NNP","NNPS","NPREP","JJNN","JJNPREP"}; private String[] adjective = {"JJ","JJR","JJS","JJH"}; @@ -67,16 +67,16 @@ } public Templator(final PartOfSpeechTagger tagger, WordNet wordnet) { - this.tagger = tagger; - this.wordnet = wordnet; + this.tagger = tagger; + this.wordnet = wordnet; + + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + g = LTAG_Constructor.construct(grammarFiles); - g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); p.SHOW_GRAMMAR = true; p.USE_DPS_AS_INITTREES = true; @@ -87,16 +87,16 @@ } public Templator(boolean b) { - this.tagger = new StanfordPartOfSpeechTagger(); - this.USE_WORDNET = false; - VERBOSE = b; + this.tagger = new StanfordPartOfSpeechTagger(); + this.USE_WORDNET = false; + VERBOSE = b; - List<InputStream> grammarFiles = new ArrayList<InputStream>(); - for(int i = 0; i < GRAMMAR_FILES.length; i++){ - grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); - } + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } - g = LTAG_Constructor.construct(grammarFiles); + g = LTAG_Constructor.construct(grammarFiles); p = new Parser(); p.SHOW_GRAMMAR = false; @@ -119,7 +119,12 @@ VERBOSE = b; } public void setGrammarFiles(String[] gf) { - GRAMMAR_FILES = gf; + GRAMMAR_FILES = gf; + List<InputStream> grammarFiles = new ArrayList<InputStream>(); + for(int i = 0; i < GRAMMAR_FILES.length; i++){ + grammarFiles.add(this.getClass().getClassLoader().getResourceAsStream(GRAMMAR_FILES[i])); + } + g = LTAG_Constructor.construct(grammarFiles); } public Set<Template> buildTemplates(String s) { @@ -196,6 +201,7 @@ try { Template temp = d2s.convert(drs,slots); + temp = temp.checkandrefine(); if (temp == null) { continue; } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -7,20 +7,16 @@ public class TemplatorHandler { - static String[] GRAMMAR_FILES; Templator templator; BasicTemplator basictemplator; public TemplatorHandler(String[] files) { templator = new Templator(); basictemplator = new BasicTemplator(); - GRAMMAR_FILES = files; + templator.setGrammarFiles(files); + basictemplator.setGrammarFiles(files); } - public void setGRAMMAR_FILES(String[] g) { - GRAMMAR_FILES = g; - } - public Set<Template> buildTemplates(String s) { return templator.buildTemplates(s); } Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-11 17:06:18 UTC (rev 3732) @@ -75,8 +75,8 @@ least || (ADJ DET:'least' ADJ*) || <x,l1,<e,t>,[ l1:[ | minimum(a,x,x) ] ], [],[],[]> - how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?x | ] ], [],[],[]> - how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?c,x | count(a,x,c) ] ], [],[],[]> + how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?x | ] ], [],[],[ SLOT_arg/DATATYPEPROPERTY/x ]> + how many || (DET DET:'how' DET:'many') || <x,l1,e, [ l1:[ ?c,x | count(a,x,c) ] ], [],[],[ SLOT_arg/OBJECTPROPERTY_CLASS/x ]> a || (DET DET:'a') || <x,l1,e, [ l1:[ x |] ], [],[],[]> an || (DET DET:'an') || <x,l1,e, [ l1:[ x |] ], [],[],[]> which || (DET DET:'which') || <x,l1,e, [ l1:[ ?x |] ], [],[],[]> @@ -85,11 +85,11 @@ the least || (DET DET:'the' DET:'least') || <y, l1, e, [ l1:[ | l2:[ y | ] THELEAST y l3:[|] ] ], [], [],[]> // NECESSARY "CHEAT" - highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> ;; <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/PROPERTY/height ]> + highest || (NP ADJ:'highest' NP*) || <x, l1, e, [ l1:[ | maximum(x) ] ], [], [],[]> ;; <x, l1, e, [ l1:[ j | SLOT_high(x,j), maximum(j) ] ],[],[],[ SLOT_high/DATATYPEPROPERTY/height ]> // COUNT - more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count(y,c), greater(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> - less than || (DP DET:'less' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count(y,c), less(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | less(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[]> + more than || (DP DET:'more' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count(y,c), greater(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[ SLOT_arg/RESOURCE/y ]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | greater(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[ SLOT_arg/LITERAL/y ]> + less than || (DP DET:'less' DET:'than' NUM[num] NP[np]) || <y,l1,<<e,t>,t>,[ l1:[ y,c | count(y,c), less(c,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[ SLOT_arg/RESOURCE/y ]> ;; <y,l1,<<e,t>,t>,[ l1:[ y | less(y,z) ] ],[(l2,y,np,<e,t>),(l3,z,num,e)],[l2=l1,l3=l1],[ SLOT_arg/LITERAL/y ]> // HOW // how || (DP DET:'how' ADJ[adj]) || <x,l1,<<e,t>,t>,[ l1:[?x,|] ],[ (x,l2,adj,<e,t>) ],[l2=l1],[]> @@ -115,7 +115,7 @@ what || (DP WH:'what') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> which || (DP WH:'which') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> - how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] HOWMANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[]> + how many || (DP WH:'how' ADJ:'many' NP[noun]) || <y, l1, <<e,t>,t>, [ l1:[ | l2:[ y | ] HOWMANY y l3:[|] ] ], [ (l4,y,noun,<e,t>) ], [ l4=l2 ],[ SLOT_arg/RESOURCE/y ]> who || (DP WH:'who') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> whom || (DP WH:'whom') || <x, l1, <<e,t>,t>, [ l1:[ ?x | ] ], [], [], []> when || (S WH:'when' S[s]) || <x, l1, t, [ l1:[ ?x | SLOT_p(y,x) ] ], [(l2,y,s,t)], [l2=l1], [ SLOT_p/PROPERTY/date ]> @@ -162,24 +162,25 @@ // NUMBERS (1-10) // --------------------- - one || (NP NUM:'one' NP*) || <x,l1,<e,t>,[l1:[x|count(x,1)]],[],[],[]> - two || (NP NUM:'two' NP*) || <x,l1,<e,t>,[l1:[x|count(x,2)]],[],[],[]> - three || (NP NUM:'three' NP*) || <x,l1,<e,t>,[l1:[x|count(x,3)]],[],[],[]> - four || (NP NUM:'four' NP*) || <x,l1,<e,t>,[l1:[x|count(x,4)]],[],[],[]> - five || (NP NUM:'five' NP*) || <x,l1,<e,t>,[l1:[x|count(x,5)]],[],[],[]> - six || (NP NUM:'six' NP*) || <x,l1,<e,t>,[l1:[x|count(x,6)]],[],[],[]> - seven || (NP NUM:'seven' NP*) || <x,l1,<e,t>,[l1:[x|count(x,7)]],[],[],[]> - eight || (NP NUM:'eight' NP*) || <x,l1,<e,t>,[l1:[x|count(x,8)]],[],[],[]> - nine || (NP NUM:'nine' NP*) || <x,l1,<e,t>,[l1:[x|count(x,9)]],[],[],[]> - ten || (NP NUM:'ten' NP*) || <x,l1,<e,t>,[l1:[x|count(x,10)]],[],[],[]> + one || (NP NUM:'one' NP*) || <x,l1,<e,t>,[l1:[x|count(x,1)]],[],[],[ SLOT_arg/RESOURCE/x ]> + two || (NP NUM:'two' NP*) || <x,l1,<e,t>,[l1:[x|count(x,2)]],[],[],[ SLOT_arg/RESOURCE/x ]> + three || (NP NUM:'three' NP*) || <x,l1,<e,t>,[l1:[x|count(x,3)]],[],[],[ SLOT_arg/RESOURCE/x ]> + four || (NP NUM:'four' NP*) || <x,l1,<e,t>,[l1:[x|count(x,4)]],[],[],[ SLOT_arg/RESOURCE/x ]> + five || (NP NUM:'five' NP*) || <x,l1,<e,t>,[l1:[x|count(x,5)]],[],[],[ SLOT_arg/RESOURCE/x ]> + six || (NP NUM:'six' NP*) || <x,l1,<e,t>,[l1:[x|count(x,6)]],[],[],[ SLOT_arg/RESOURCE/x ]> + seven || (NP NUM:'seven' NP*) || <x,l1,<e,t>,[l1:[x|count(x,7)]],[],[],[ SLOT_arg/RESOURCE/x ]> + eight || (NP NUM:'eight' NP*) || <x,l1,<e,t>,[l1:[x|count(x,8)]],[],[],[ SLOT_arg/RESOURCE/x ]> + nine || (NP NUM:'nine' NP*) || <x,l1,<e,t>,[l1:[x|count(x,9)]],[],[],[ SLOT_arg/RESOURCE/x ]> + ten || (NP NUM:'ten' NP*) || <x,l1,<e,t>,[l1:[x|count(x,10)]],[],[],[ SLOT_arg/RESOURCE/x ]> - one || (NUM NUM:'one') || <x,l1,e,[l1:[x|equal(x,1)]],[],[],[]> - two || (NUM NUM:'two') || <x,l1,e,[l1:[x|equal(x,2)]],[],[],[]> - three || (NUM NUM:'three') || <x,l1,e,[l1:[x|equal(x,3)]],[],[],[]> - four || (NUM NUM:'four') || <x,l1,e,[l1:[x|equal(x,4)]],[],[],[]> - five || (NUM NUM:'five') || <x,l1,e,[l1:[x|equal(x,5)]],[],[],[]> - six || (NUM NUM:'six') || <x,l1,e,[l1:[x|equal(x,6)]],[],[],[]> - seven || (NUM NUM:'seven') || <x,l1,e,[l1:[x|equal(x,7)]],[],[],[]> - eight || (NUM NUM:'eight') || <x,l1,e,[l1:[x|equal(x,8)]],[],[],[]> - nine || (NUM NUM:'nine') || <x,l1,e,[l1:[x|equal(x,9)]],[],[],[]> - ten || (NUM NUM:'ten') || <x,l1,e,[l1:[x|equal(x,10)]],[],[],[]> \ No newline at end of file + one || (NUM NUM:'one') || <x,l1,e,[l1:[x|equal(x,1)]],[],[],[ SLOT_arg/LITERAL/x ]> + two || (NUM NUM:'two') || <x,l1,e,[l1:[x|equal(x,2)]],[],[],[ SLOT_arg/LITERAL/x ]> + three || (NUM NUM:'three') || <x,l1,e,[l1:[x|equal(x,3)]],[],[],[ SLOT_arg/LITERAL/x ]> + four || (NUM NUM:'four') || <x,l1,e,[l1:[x|equal(x,4)]],[],[],[ SLOT_arg/LITERAL/x ]> + five || (NUM NUM:'five') || <x,l1,e,[l1:[x|equal(x,5)]],[],[],[ SLOT_arg/LITERAL/x ]> + six || (NUM NUM:'six') || <x,l1,e,[l1:[x|equal(x,6)]],[],[],[ SLOT_arg/LITERAL/x ]> + seven || (NUM NUM:'seven') || <x,l1,e,[l1:[x|equal(x,7)]],[],[],[ SLOT_arg/LITERAL/x ]> + eight || (NUM NUM:'eight') || <x,l1,e,[l1:[x|equal(x,8)]],[],[],[ SLOT_arg/LITERAL/x ]> + nine || (NUM NUM:'nine') || <x,l1,e,[l1:[x|equal(x,9)]],[],[],[ SLOT_arg/LITERAL/x ]> + ten || (NUM NUM:'ten') || <x,l1,e,[l1:[x|equal(x,10)]],[],[],[ SLOT_arg/LITERAL/x ]> + Copied: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex (from rev 3728, trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex) =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-11 17:06:18 UTC (rev 3732) @@ -0,0 +1,31 @@ + +// PREPOSITIONS + + close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> + in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location ]> + since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> + + for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + + with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> + with || (NP NP* (PP P:'with' NUM[num] DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y), count(y,z) ] ], [ (l2,y,dp,<<e,t>,t>),(l3,z,num,e) ], [ l2=l1,l3=l1 ],[ SLOT_arg/RESOURCE/y ]> + with || (NP NP* (PP P:'with' NUM[num] DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y), equals(y,z) ] ], [ (l2,y,dp,<<e,t>,t>),(l3,z,num,e) ], [ l2=l1,l3=l1 ],[ SLOT_arg/LITERAL/y ]> + + +// MONTHS + + january || (DP DP:'january') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,1) ]], [],[],[]> + february || (DP DP:'february') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,2) ]], [],[],[]> + march || (DP DP:'march') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,3) ]], [],[],[]> + april || (DP DP:'april') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,4) ]], [],[],[]> + may || (DP DP:'may') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,5) ]], [],[],[]> + june || (DP DP:'june') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,6) ]], [],[],[]> + july || (DP DP:'july') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,7) ]], [],[],[]> + august || (DP DP:'august') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,8) ]], [],[],[]> + september || (DP DP:'september') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,9) ]], [],[],[]> + october || (DP DP:'october') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,10) ]], [],[],[]> + november || (DP DP:'november') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,11) ]], [],[],[]> + december || (DP DP:'december') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,12) ]], [],[],[]> \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-06-11 13:06:58 UTC (rev 3731) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-06-11 17:06:18 UTC (rev 3732) @@ -11,7 +11,7 @@ public class TestFrontend { - static String[] GRAMMAR_FILES = {"src/main/resources/lexicon/english.lex"}; + static String[] GRAMMAR_FILES = {"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}; static boolean BASIC_MODE = false; // true for BASIC mode, false for LEIPZIG mode public static void main(String[] args) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-13 14:56:49
|
Revision: 3738 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3738&view=rev Author: lorenz_b Date: 2012-06-13 14:56:37 +0000 (Wed, 13 Jun 2012) Log Message: ----------- Integration of manual mappings. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_resource_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/TestFrontend.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -14,7 +14,7 @@ public class TestFrontend { // MODE ::= BASIC | LEIPZIG - static String MODE = "BASIC"; + static String MODE = "LEIPZIG"; public static void main(String[] args) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -41,6 +41,7 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; +import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -96,6 +97,8 @@ private Index datatypePropertiesIndex; private Index objectPropertiesIndex; + private MappingBasedIndex mappingIndex; + private Templator templateGenerator; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; @@ -192,6 +195,10 @@ lemmatizer = new LingPipeLemmatizer(); } + public void setMappingIndex(MappingBasedIndex mappingIndex) { + this.mappingIndex = mappingIndex; + } + /* * Only for Evaluation useful. */ @@ -369,6 +376,7 @@ } } }); + slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); @@ -385,11 +393,13 @@ long startTime = System.currentTimeMillis(); for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){ + if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); - } + } else { + System.out.println("CACHE HIT"); + } } for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { @@ -840,27 +850,42 @@ IndexResultSet rs; for(String word : slot.getWords()){ + rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); + } + } if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 50); + rs.add(index.getResourcesWithScores(word, 50)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); } - rs = index.getResourcesWithScores(word, 20); + rs.add(index.getResourcesWithScores(word, 20)); } for(IndexResultItem item : rs.getItems()){ double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } +// //get the labels of the redirects and compute the highest similarity +// if(slot.getSlotType() == SlotType.RESOURCE){ +// Set<String> labels = getRedirectLabels(item.getUri()); +// for(String label : labels){ +// double tmp = Similarity.getSimilarity(word, label); +// if(tmp > similarity){ +// similarity = tmp; +// } +// } +// } double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); allocations.add(new Allocation(item.getUri(), prominence, similarity)); } @@ -876,6 +901,11 @@ } + private boolean isDatatypePropeprty(String uri){ + String query = "ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty>}."; + return executeAskQuery(query); + } + /** * @param args * @throws NoTemplateFoundException Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Slot.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -103,7 +103,8 @@ return new Slot(anchor,type,newWords); } - @Override + + /*@Override public int hashCode() { final int prime = 31; int result = 1; @@ -143,7 +144,31 @@ } else if (!words.equals(other.words)) return false; return true; + }*/ + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Slot other = (Slot) obj; + if(other.type == type && other.token == token){ + return true; + } + return false; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((token == null) ? 0 : token.hashCode()); + result = prime * result + ((type == null) ? 0 : type.hashCode()); + return result; + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/MappingBasedIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,211 @@ +package org.dllearner.common.index; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +public class MappingBasedIndex { + + private Map<String, List<String>> classUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> resourceUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> datatypePropertyUri2TokensMap = new HashMap<String, List<String>>(); + private Map<String, List<String>> objectPropertyUri2TokensMap = new HashMap<String, List<String>>(); + + public MappingBasedIndex(String classMappingsFile, String resourceMappingsFile, + String dataPropertyMappingsFile, String objectPropertyMappingsFile) { + BufferedReader br = null; + String line = null; + try { + //load class mappings + if(classMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(classMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + classUri2TokensMap.put(uri, tokens); + } + } + + //load resource mappings + if(resourceMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(resourceMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + resourceUri2TokensMap.put(uri, tokens); + } + } + + //load object property mappings + if(objectPropertyMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(objectPropertyMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + objectPropertyUri2TokensMap.put(uri, tokens); + } + } + + //load datatype property mappings + if(dataPropertyMappingsFile != null){ + br = new BufferedReader(new FileReader(new File(dataPropertyMappingsFile))); + while((line = br.readLine()) != null){ + int split = line.indexOf("|"); + //get the URI + String uri = line.substring(0, split); + //get the list of tokens + List<String> tokens = new ArrayList<String>(); + String tokenString = line.substring(split + 1); + String[] tokenArray = tokenString.split(","); + for(String token : tokenArray){ + tokens.add(token.trim()); + } + + datatypePropertyUri2TokensMap.put(uri, tokens); + } + } + + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public List<String> getClasses(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : classUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getResources(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : resourceUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getObjectProperties(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : objectPropertyUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getDatatypeProperties(String token){ + List<String> uris = new ArrayList<String>(); + for(Entry<String, List<String>> entry : datatypePropertyUri2TokensMap.entrySet()){ + if(entry.getValue().contains(token)){ + uris.add(entry.getKey()); + } + } + return uris; + } + + public List<String> getProperties(String token){ + List<String> uris = new ArrayList<String>(); + uris.addAll(getObjectProperties(token)); + uris.addAll(getDatatypeProperties(token)); + return uris; + } + + public IndexResultSet getClassesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getClasses(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getResourcesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getResources(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getObjectPropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getObjectProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getDatatypePropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getDatatypeProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public IndexResultSet getPropertiesWithScores(String token){ + IndexResultSet rs = new IndexResultSet(); + for(String uri : getProperties(token)){ + rs.addItem(new IndexResultItem(uri, token, 1f)); + } + return rs; + } + + public Boolean isDataProperty(String uri){ + if(datatypePropertyUri2TokensMap.containsKey(uri)) { + return true; + } else if(objectPropertyUri2TokensMap.containsKey(uri)){ + return false; + } + return null; + } + + public static void main(String[] args) { + MappingBasedIndex index = new MappingBasedIndex(MappingBasedIndex.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), null, null, null); + System.out.println(index.getClasses("flat")); + } + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import com.hp.hpl.jena.rdf.model.Model; @@ -8,23 +9,20 @@ public SPARQLClassesIndex(SparqlEndpoint endpoint) { super(endpoint); - - super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + - "?s a ?uri.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + - "LIMIT %d OFFSET %d"; - - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s a ?uri.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + - "LIMIT %d OFFSET %d"; + init(); } + public SPARQLClassesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + public SPARQLClassesIndex(Model model) { super(model); - + init(); + } + + private void init(){ super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s a ?uri.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -60,6 +60,10 @@ this.cache = cache; } + public void setCache(ExtractionDBCache cache) { + this.cache = cache; + } + @Override public List<String> getResources(String searchTerm) { return getResources(searchTerm, DEFAULT_LIMIT); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -1,5 +1,6 @@ package org.dllearner.common.index; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import com.hp.hpl.jena.rdf.model.Model; @@ -11,6 +12,11 @@ init(); } + public SPARQLPropertiesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + public SPARQLPropertiesIndex(Model model) { super(model); init(); @@ -28,7 +34,7 @@ "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?uri ?label WHERE {\n" + "?s ?uri ?o.\n" + // "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 14:56:37 UTC (rev 3738) @@ -2,7 +2,7 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> - in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code ]> + in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> Added: trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_class_mappings.txt 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1 @@ +http://diadem.cs.ox.ac.uk/ontologies/real-estate#House|house, houses, flat, flats, appartement, appartements \ No newline at end of file Added: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,2 @@ +http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code +http://www.w3.org/2006/vcard/ns#locality|address, location \ No newline at end of file Added: trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt =================================================================== Added: trunk/components-ext/src/main/resources/tbsl/oxford_resource_mappings.txt =================================================================== Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -0,0 +1,78 @@ +package org.dllearner.algorithm.tbsl; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SPARQLClassesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +public class OxfordEvaluation { + + private static final String QUERIES_FILE = "/home/lorenz/evaluation.txt"; + + public static void main(String[] args) throws Exception{ + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + ExtractionDBCache cache = new ExtractionDBCache("cache"); + + SPARQLIndex resourcesIndex = new SPARQLIndex(endpoint, cache); + SPARQLIndex classesIndex = new SPARQLClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new SPARQLPropertiesIndex(endpoint, cache); + MappingBasedIndex mappingIndex= new MappingBasedIndex( + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath()); + + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + learner.setMappingIndex(mappingIndex); + learner.init(); + + int learnedQuestions = 0; + Map<String, String> question2QueryMap = new HashMap<String, String>(); + + BufferedReader br = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + + int questionNr = 0; + String question = null; + while((question = br.readLine()) != null){ + question = question.replace("question:", "").trim(); + if(question.isEmpty()) continue; + if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ + question = "Give me all " + question; + } + System.out.println("########################################################"); + questionNr++; + System.out.println(question); + try { + learner.setQuestion(question); + learner.learnSPARQLQueries(); + String learnedQuery = learner.getBestSPARQLQuery(); + if(learnedQuery != null){ + question2QueryMap.put(question, learnedQuery); + learnedQuestions++; + } + } catch (Exception e) { + e.printStackTrace(); + } + } + System.out.println("Generated SPARQL queries for " + learnedQuestions + " questions."); + for(Entry<String, String> entry : question2QueryMap.entrySet()){ + System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); + System.out.println(entry.getKey()); + System.out.println(entry.getValue()); + } + } + +} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 10:31:55 UTC (rev 3737) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 14:56:37 UTC (rev 3738) @@ -44,6 +44,7 @@ model.read(new FileInputStream(f), null, "TURTLE"); } catch (Exception e) { System.err.println("Parsing failed."); + e.printStackTrace(); } } } @@ -83,7 +84,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(model, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - String question = "Give me all houses with more than 2 bedrooms."; + String question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; learner.setQuestion(question); learner.learnSPARQLQueries(); @@ -104,6 +105,7 @@ String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; + question = "Give me all houses with large garden and equipped kitchen"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-14 14:55:06
|
Revision: 3740 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3740&view=rev Author: lorenz_b Date: 2012-06-14 14:54:54 +0000 (Thu, 14 Jun 2012) Log Message: ----------- Tried some to answer some more questions. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java trunk/components-ext/src/main/resources/tbsl/evaluation.txt Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -34,9 +34,11 @@ import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; +import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -51,6 +53,9 @@ import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; +import org.dllearner.common.index.VirtuosoObjectPropertiesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; @@ -150,8 +155,13 @@ setOptions(options); if(propertiesIndex instanceof SPARQLPropertiesIndex){ - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } } else { datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; @@ -186,8 +196,13 @@ setOptions(options); if(propertiesIndex instanceof SPARQLPropertiesIndex){ - datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); - objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } } else { datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; @@ -419,7 +434,9 @@ } } + executor.shutdown(); + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ @@ -559,9 +576,30 @@ } }*/ - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } + } + } else { + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + } WeightedQuery w = new WeightedQuery(q); double newScore = query.getScore() + a.getScore(); w.setScore(newScore); @@ -581,14 +619,50 @@ for(WeightedQuery query : queries){ Query q = query.getQuery(); for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - + SPARQL_Value object = triple.getValue(); + if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position + SPARQL_Property predicate = triple.getProperty(); + if(!predicate.isVariable()){//only consider triple where predicate is URI + String predicateURI = predicate.getName().replace("<", "").replace(">", ""); + if(isDatatypeProperty(predicateURI)){//if data property + String objectVar = triple.getValue().getName(); + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + new SPARQL_Term(objectVar), "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); + } + } + } } } + } else if(slot.getSlotType() == SlotType.CLASS){ + String token = slot.getWords().get(0); + if(slot.getToken().contains("house")){ + String regexToken = token.replace("houses", "").replace("house", "").trim(); + try { + Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); + SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); + if(alloc != null && !alloc.isEmpty()){ + String uri = alloc.first().getUri(); + for(WeightedQuery query : queries){ + Query q = query.getQuery(); + for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ + SPARQL_Term subject = triple.getVariable(); + SPARQL_Term object = new SPARQL_Term("desc"); + object.setIsVariable(true); + object.setIsURI(false); + q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); + q.addFilter(new SPARQL_Filter(new SPARQL_Pair( + object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); + } + q.replaceVarWithURI(slot.getAnchor(), uri); + + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } } @@ -733,7 +807,10 @@ if(queryType == SPARQL_QueryType.SELECT){ for(String query : queries){ logger.info("Testing query:\n" + query); - ResultSet rs = executeSelect(query); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); + q.setLimit(1); + ResultSet rs = executeSelect(q.toString());//executeSelect(query); + List<String> results = new ArrayList<String>(); QuerySolution qs; String projectionVar; @@ -927,9 +1004,16 @@ } - private boolean isDatatypePropeprty(String uri){ - String query = "ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty>}."; - return executeAskQuery(query); + private boolean isDatatypeProperty(String uri){ + Boolean isDatatypeProperty = null; + if(mappingIndex != null){ + isDatatypeProperty = mappingIndex.isDataProperty(uri); + } + if(isDatatypeProperty == null){ + String query = String.format("ASK {<%s> a <http://www.w3.org/2002/07/owl#DatatypeProperty> .}", uri); + isDatatypeProperty = executeAskQuery(query); + } + return isDatatypeProperty; } /** Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -378,6 +378,8 @@ object.setIsVariable(false); if(object instanceof SPARQL_Term){ ((SPARQL_Term) object).setIsURI(true); + } else if(object instanceof SPARQL_Property){ + ((SPARQL_Property) object).setIsVariable(false); } } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Pair.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -6,12 +6,12 @@ { private static final long serialVersionUID = -1255754209857823420L; - public SPARQL_Term a; + public SPARQL_Value a; public Object b; public SPARQL_PairType type; - public SPARQL_Pair(SPARQL_Term a, Object b, SPARQL_PairType type) + public SPARQL_Pair(SPARQL_Value a, Object b, SPARQL_PairType type) { super(); this.a = a; @@ -19,7 +19,7 @@ this.type = type; } - public SPARQL_Pair(SPARQL_Term a, SPARQL_PairType type) + public SPARQL_Pair(SPARQL_Value a, SPARQL_PairType type) { super(); this.a = a; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/SPARQL_Term.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -103,7 +103,7 @@ if (isString()) { return name.replaceAll("_"," "); } - else if (isURI) { + else if (isURI || !isVariable()) { return name; } else return "?"+name.toLowerCase(); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -127,7 +127,7 @@ return irs; } - private ResultSet executeSelect(String query){//System.out.println(query); + private ResultSet executeSelect(String query){System.out.println(query); ResultSet rs; if(model == null){ if(cache == null){ Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoClassesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,38 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoClassesIndex extends SPARQLIndex{ + + public VirtuosoClassesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoClassesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + public VirtuosoClassesIndex(Model model) { + super(model); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?s a ?uri.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + + "?s a ?uri.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoDatatypePropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,41 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoDatatypePropertiesIndex extends SPARQLPropertiesIndex{ + + public VirtuosoDatatypePropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoDatatypePropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoDatatypePropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoObjectPropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,42 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoObjectPropertiesIndex extends SPARQLPropertiesIndex{ + + public VirtuosoObjectPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoObjectPropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoObjectPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label." + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoPropertiesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,46 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoPropertiesIndex extends SPARQLIndex{ + + public VirtuosoPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public VirtuosoPropertiesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + public VirtuosoPropertiesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?uri ?label WHERE {\n" + + "?s ?uri ?o.\n" + +// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/VirtuosoResourcesIndex.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,43 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class VirtuosoResourcesIndex extends SPARQLIndex{ + + public VirtuosoResourcesIndex(SparqlEndpoint endpoint) { + this(endpoint, null); + } + + public VirtuosoResourcesIndex(Model model) { + super(model); + init(); + } + + public VirtuosoResourcesIndex(VirtuosoResourcesIndex index) { + super(index); + } + + public VirtuosoResourcesIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + super(endpoint, cache); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label.\n" + + "?label bif:contains '\"%s\"'}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Added: trunk/components-ext/src/main/resources/tbsl/evaluation.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1,149 @@ +question: houses in Headington + +question: houses in Abingdon with more than 2 bedrooms + +question: houses with garden in Wheatley + +question: detached houses in Oxford + +question: Victorian houses in Oxfordshire + +question: Edwardian house in Oxfordshire for less than 1000000 + +question: houses with double garage + +question: houses with large garden and equipped kitchen + +question: houses with more than 1 reception room + +question: houses in Didcot furnished to a high standard + +question: houses with conservatory room and less than 900000 pounds + +question: detached bungalows in Oxfordshire + +question: houses in Old Marston + +question: family houses with more than 2 bathrooms and more than 4 bedrooms + +question: houses close to Iffley Sport Centre + +question: houses in Oxford close to the train station + +question: houses in Summertown for less than 400000 pounds + +question: two floors houses in East Oxford + +question: brand new houses in Oxford for less than 500000 pounds + +question: houses close to Brookes University + +question: houses in Jericho area + +question: house close to Headington hospitals + +question: modern houses with gas central heating + +question: houses with electric heating + +question: houses less than 500000 within area OX16 + +question: houses close to an Italian restaurant + +question: houses at walking distance from a pharmacy + +question: houses at walking distance from Tesco or Sainsburys shops + +question: houses nearby Sheldonian Theatre + +question: houses with underfloor heating + +question: houses with wood floor + +question: houses close to The King's Arms pub + +question: houses with garden large at least 2 acres + +question: houses with many reception rooms + +question: houses built around 1950 + +question: houses with balcony + +question: houses with double glazed windows + +question: houses far from city centre + +question: 2 bedroom houses near oxford train station + +question: 4 bedroom detached houses in oxford + +question: studio apartments in summertown, Oxford + +question: freehold houses with 2 bedrooms and a living room in banbury + +question: houses in Oxford city centre with at most 2 bedrooms + +question: houses with garage within minutes of Oxford schools and in a quiet road + +question: victorian town houses in north Oxford + +question: terrace houses with west facing garden + +question: modernised end terrace houses with private parking + +question: three bedroom houses with open fireplace + +question: houses available from June 15th. + +question: houses on rawlinson road + +question: flats near supermarket + +question: flats with bill included + + + +question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? + +question: Give me all 2 bedroom flats in walking distance from the computer science departement! + +question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. + +question: find a property with 2 bedrooms close to some park. + +question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. + +question: Give me all flats in the area around Cowley Road with 2 bedrooms. + +question: Give me all furnished places with one bedroom close to the Radcliffe Camera. + +question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. + +question: Give me all furnished flats with one bedroom for smokers. + +question: Give me all flats with parking in central Oxford. + +question: Give me all cheap places in Cowley. + +question: Give me all representative houses in Summertown. + +question: find a property for sale, with 2 bedrooms, parking, close to shops. + +question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement + +question: Give me all places offered close to the train station? + +question: Give me all retirement houses for sale near Oxford. + +question: Give me all houses that I can BBQ. + +question: Give me all flats which are far from the river. + +question: Give me all flats which are close to three bars. + +question: What is the average price of furnished 1 bedroom apartments in Heddington? + +question: How many flats are offered in central Oxford below 1000 GBP a month? + +question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-14 14:54:54 UTC (rev 3740) @@ -2,7 +2,7 @@ // PREPOSITIONS close to || (NP NP* (PP P:'close' P:'to' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_closeto(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_closeto/OBJECTPROPERTY/near ]> - in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address ]> + in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> Modified: trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-14 14:54:54 UTC (rev 3740) @@ -0,0 +1 @@ +http://dbpedia.org/property/near|close, near \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -1,20 +1,21 @@ package org.dllearner.algorithm.tbsl; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; +import java.io.FileWriter; import java.net.URL; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Map.Entry; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; -import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -26,14 +27,15 @@ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); ExtractionDBCache cache = new ExtractionDBCache("cache"); - SPARQLIndex resourcesIndex = new SPARQLIndex(endpoint, cache); - SPARQLIndex classesIndex = new SPARQLClassesIndex(endpoint, cache); - SPARQLIndex propertiesIndex = new SPARQLPropertiesIndex(endpoint, cache); + SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); MappingBasedIndex mappingIndex= new MappingBasedIndex( OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath(), - OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath()); + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); @@ -42,11 +44,14 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader br = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE))); + BufferedWriter out = new BufferedWriter(new FileWriter(new File("log/oxford_eval.txt"))); int questionNr = 0; + int errorCnt = 0; + int noQueryCnt = 0; String question = null; - while((question = br.readLine()) != null){ + while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); if(question.isEmpty()) continue; if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ @@ -62,17 +67,27 @@ if(learnedQuery != null){ question2QueryMap.put(question, learnedQuery); learnedQuestions++; + out.write("****************************************\n" + question + "\n" + learnedQuery + "\n****************************************"); + } else { + noQueryCnt++; + out.write("****************************************\n" + question + "\nNO QUERY WITH NON-EMPTY RESULTSET FOUND\n****************************************"); } } catch (Exception e) { e.printStackTrace(); + errorCnt++; + out.write("****************************************\n" + question + "\nERROR: " + e.getClass() + "\n****************************************"); } + out.flush(); } - System.out.println("Generated SPARQL queries for " + learnedQuestions + " questions."); - for(Entry<String, String> entry : question2QueryMap.entrySet()){ - System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); - System.out.println(entry.getKey()); - System.out.println(entry.getValue()); - } + out.write("################################\n"); + out.write("Questions with answer: " + learnedQuestions + "\n"); + out.write("Questions with no answer (and no error): " + noQueryCnt + "\n"); + out.write("Questions with error: " + errorCnt + "\n"); + + in.close(); + out.close(); + + } } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-13 20:51:06 UTC (rev 3739) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-14 14:54:54 UTC (rev 3740) @@ -10,10 +10,15 @@ import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; +import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.junit.Test; @@ -34,24 +39,24 @@ protected void setUp() throws Exception { super.setUp(); endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - model = ModelFactory.createOntologyModel(); - File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); - try { - for(File f : dir.listFiles()){ - if(f.isFile()){ - System.out.println("Loading file " + f.getName()); - try { - model.read(new FileInputStream(f), null, "TURTLE"); - } catch (Exception e) { - System.err.println("Parsing failed."); - e.printStackTrace(); - } - } - } - model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } +// model = ModelFactory.createOntologyModel(); +// File dir = new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/data"); +// try { +// for(File f : dir.listFiles()){ +// if(f.isFile()){ +// System.out.println("Loading file " + f.getName()); +// try { +// model.read(new FileInputStream(f), null, "TURTLE"); +// } catch (Exception e) { +// System.err.println("Parsing failed."); +// e.printStackTrace(); +// } +// } +// } +// model.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/examples/ontology.ttl")), null, "TURTLE"); +// } catch (FileNotFoundException e) { +// e.printStackTrace(); +// } } @Test @@ -95,17 +100,24 @@ @Test public void testOxfordRemote() throws Exception{ + ExtractionDBCache cache = new ExtractionDBCache("cache"); + SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + MappingBasedIndex mappingIndex= new MappingBasedIndex( + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); - Index resourcesIndex = new SPARQLIndex(endpoint); - Index classesIndex = new SPARQLClassesIndex(endpoint); - Index propertiesIndex = new SPARQLPropertiesIndex(endpoint); - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + learner.setMappingIndex(mappingIndex); learner.init(); String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; - question = "Give me all houses with large garden and equipped kitchen"; + question = "Give me all Victorian houses in Oxfordshire"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-15 14:01:27
|
Revision: 3747 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3747&view=rev Author: lorenz_b Date: 2012-06-15 14:01:20 +0000 (Fri, 15 Jun 2012) Log Message: ----------- Updates for oxford eval. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt Removed Paths: ------------- trunk/components-ext/src/main/resources/tbsl/evaluation.txt Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -123,10 +123,12 @@ private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; - private Set<WeightedQuery> generatedQueries; + private SortedSet<WeightedQuery> generatedQueries; private SPARQLReasoner reasoner; + private String currentlyExecutedQuery; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -277,6 +279,7 @@ learnedSPARQLQueries = new HashMap<String, Object>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); + currentlyExecutedQuery = null; } public void learnSPARQLQueries() throws NoTemplateFoundException{ @@ -317,12 +320,12 @@ } - public Set<WeightedQuery> getGeneratedQueries() { + public SortedSet<WeightedQuery> getGeneratedQueries() { return generatedQueries; } - public Set<WeightedQuery> getGeneratedQueries(int topN) { - Set<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); + public SortedSet<WeightedQuery> getGeneratedQueries(int topN) { + SortedSet<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); int max = Math.min(topN, generatedQueries.size()); for(WeightedQuery wQ : generatedQueries){ topNQueries.add(wQ); @@ -382,7 +385,7 @@ } - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.info("Generating SPARQL query candidates..."); Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @@ -399,7 +402,7 @@ slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); Set<Allocation> allocations; @@ -864,6 +867,7 @@ } private boolean executeAskQuery(String query){ + currentlyExecutedQuery = query; QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), query); for(String uri : endpoint.getDefaultGraphURIs()){ qe.addDefaultGraph(uri); @@ -873,6 +877,7 @@ } private ResultSet executeSelect(String query) { + currentlyExecutedQuery = query; ResultSet rs; if (model == null) { if (cache == null) { @@ -890,6 +895,9 @@ return rs; } + public String getCurrentlyExecutedQuery() { + return currentlyExecutedQuery; + } public int getLearnedPosition() { if(learnedPos >= 0){ Deleted: trunk/components-ext/src/main/resources/tbsl/evaluation.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/main/resources/tbsl/evaluation.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -1,149 +0,0 @@ -question: houses in Headington - -question: houses in Abingdon with more than 2 bedrooms - -question: houses with garden in Wheatley - -question: detached houses in Oxford - -question: Victorian houses in Oxfordshire - -question: Edwardian house in Oxfordshire for less than 1000000 - -question: houses with double garage - -question: houses with large garden and equipped kitchen - -question: houses with more than 1 reception room - -question: houses in Didcot furnished to a high standard - -question: houses with conservatory room and less than 900000 pounds - -question: detached bungalows in Oxfordshire - -question: houses in Old Marston - -question: family houses with more than 2 bathrooms and more than 4 bedrooms - -question: houses close to Iffley Sport Centre - -question: houses in Oxford close to the train station - -question: houses in Summertown for less than 400000 pounds - -question: two floors houses in East Oxford - -question: brand new houses in Oxford for less than 500000 pounds - -question: houses close to Brookes University - -question: houses in Jericho area - -question: house close to Headington hospitals - -question: modern houses with gas central heating - -question: houses with electric heating - -question: houses less than 500000 within area OX16 - -question: houses close to an Italian restaurant - -question: houses at walking distance from a pharmacy - -question: houses at walking distance from Tesco or Sainsburys shops - -question: houses nearby Sheldonian Theatre - -question: houses with underfloor heating - -question: houses with wood floor - -question: houses close to The King's Arms pub - -question: houses with garden large at least 2 acres - -question: houses with many reception rooms - -question: houses built around 1950 - -question: houses with balcony - -question: houses with double glazed windows - -question: houses far from city centre - -question: 2 bedroom houses near oxford train station - -question: 4 bedroom detached houses in oxford - -question: studio apartments in summertown, Oxford - -question: freehold houses with 2 bedrooms and a living room in banbury - -question: houses in Oxford city centre with at most 2 bedrooms - -question: houses with garage within minutes of Oxford schools and in a quiet road - -question: victorian town houses in north Oxford - -question: terrace houses with west facing garden - -question: modernised end terrace houses with private parking - -question: three bedroom houses with open fireplace - -question: houses available from June 15th. - -question: houses on rawlinson road - -question: flats near supermarket - -question: flats with bill included - - - -question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? - -question: Give me all 2 bedroom flats in walking distance from the computer science departement! - -question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. - -question: find a property with 2 bedrooms close to some park. - -question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. - -question: Give me all flats in the area around Cowley Road with 2 bedrooms. - -question: Give me all furnished places with one bedroom close to the Radcliffe Camera. - -question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. - -question: Give me all furnished flats with one bedroom for smokers. - -question: Give me all flats with parking in central Oxford. - -question: Give me all cheap places in Cowley. - -question: Give me all representative houses in Summertown. - -question: find a property for sale, with 2 bedrooms, parking, close to shops. - -question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement - -question: Give me all places offered close to the train station? - -question: Give me all retirement houses for sale near Oxford. - -question: Give me all houses that I can BBQ. - -question: Give me all flats which are far from the river. - -question: Give me all flats which are close to three bars. - -question: What is the average price of furnished 1 bedroom apartments in Heddington? - -question: How many flats are offered in central Oxford below 1000 GBP a month? - -question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Added: trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -0,0 +1,79 @@ +question: Give me all houses close to The Ashmolean Museum, with more than one bedroom + +question: Give me all houses in Botley Road + +question: Give me all houses in Littlemore + +question: Give me all houses that cost between 600000 and 800000 pounds + +question: Give me all houses with 3 bedrooms in Florence Park Road + +question: Give me all houses with front garden and rear garden + +//question: Give me all houses with 2 bathrooms, 5 bedrooms and at least 1 reception + +question: Give me all houses in Oxford, close to Railway Station + +question: Give me all houses with ample parking + +question: Give me all house with electric central heating system + +question: Give me all houses with three en-suites near Oxford + +question: Give me all houses with countryside views + +question: Give me all houses with farmland views + +question: Give me all houses nearby River Thames + +question: Give me all houses having one utility room or cloakroom + +question: Give me all houses in Oxfordshire with fireplaces + +question: Give me all houses with open plan kitchen near Oxford + +question: Give me all houses with walled garden near Oxford + +question: Give me all houses with river views + +question: Give me all houses with gated entrance or in a gated community + +question: Give me all recently refurbished houses with lift (access) + +question: Give me all houses with balcony and vaulted ceiling + +question: Give me all houses that are a maisonette and on Gloucester Green + +question: Give me all houses in a corner or end-of-terrace plot + +question: Give me all houses with 3 bedrooms, but cheaper than 150000 GBP + +question: Give me all houses with at least 2 reception rooms and a garden + +question: Give me all houses in/with a courtyard + +question: Give me all houses with fireplace and a garden that is not communal + +question: Give me all houses with parking but not Victorian + +//question: Give me all houses with 2-5 bedrooms, a balcony, and a price less than 450000 GBP + +question: Give me all houses with 3 bathrooms, but not en-suite + +question: Give me all houses in a retirement complex + +question: Give me all houses with double glazing and central heating + +question: Give me all houses with “no upper chain” (means immediately available) + +question: Give me all houses listed Grade I or Grade II (means “under protection for age …”) + +question: Give me all houses that have recently replumbed + +question: Give me all houses in Witney or Wolvercote + +question: Give me all houses with more than 2 bedrooms, but not in Marston + +question: Give me all houses in Banbury Road + +question: Give me all houses in the area of (John) Radcliffe hospital \ No newline at end of file Copied: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt (from rev 3746, trunk/components-ext/src/main/resources/tbsl/evaluation.txt) =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:01:20 UTC (rev 3747) @@ -0,0 +1,149 @@ +question: houses in Headington + +question: houses in Abingdon with more than 2 bedrooms + +question: houses with garden in Wheatley + +question: detached houses in Oxford + +question: Victorian houses in Oxfordshire + +question: Edwardian house in Oxfordshire for less than 1000000 + +question: houses with double garage + +question: houses with large garden and equipped kitchen + +question: houses with more than 1 reception room + +question: houses in Didcot furnished to a high standard + +question: houses with conservatory room and less than 900000 pounds + +question: detached bungalows in Oxfordshire + +question: houses in Old Marston + +question: family houses with more than 2 bathrooms and more than 4 bedrooms + +question: houses close to Iffley Sport Centre + +question: houses in Oxford close to the train station + +question: houses in Summertown for less than 400000 pounds + +question: two floors houses in East Oxford + +question: brand new houses in Oxford for less than 500000 pounds + +question: houses close to Brookes University + +question: houses in Jericho area + +question: house close to Headington hospitals + +question: modern houses with gas central heating + +question: houses with electric heating + +question: houses less than 500000 within area OX16 + +question: houses close to an Italian restaurant + +question: houses at walking distance from a pharmacy + +question: houses at walking distance from Tesco or Sainsburys shops + +question: houses nearby Sheldonian Theatre + +question: houses with underfloor heating + +question: houses with wood floor + +question: houses close to The King's Arms pub + +question: houses with garden large at least 2 acres + +question: houses with many reception rooms + +question: houses built around 1950 + +question: houses with balcony + +question: houses with double glazed windows + +question: houses far from city centre + +question: 2 bedroom houses near oxford train station + +question: 4 bedroom detached houses in oxford + +question: studio apartments in summertown, Oxford + +question: freehold houses with 2 bedrooms and a living room in banbury + +question: houses in Oxford city centre with at most 2 bedrooms + +question: houses with garage within minutes of Oxford schools and in a quiet road + +question: victorian town houses in north Oxford + +question: terrace houses with west facing garden + +question: modernised end terrace houses with private parking + +question: three bedroom houses with open fireplace + +question: houses available from June 15th. + +question: houses on rawlinson road + +question: flats near supermarket + +question: flats with bill included + + + +question: give me all flats in central Oxford with at least one bedroom below 1000 GBP a month. + +question: Give me all 2 bedroom flats in walking distance from the computer science departement! + +//question: Give me all houses with 3 bedrooms or more, close to the train station with good shopping opportunities. + +question: find a property with 2 bedrooms close to some park. + +//question: Give me all flats at roughly 1300 GBP the month, equally close to the computer science department and Christ Church College. + +question: Give me all flats in the area around Cowley Road with 2 bedrooms. + +question: Give me all furnished places with one bedroom close to the Radcliffe Camera. + +question: Give me all unfurnished houses with at least 2 bedrooms in Summertown. + +question: Give me all furnished flats with one bedroom for smokers. + +question: Give me all flats with parking in central Oxford. + +question: Give me all cheap places in Cowley. + +question: Give me all representative houses in Summertown. + +//question: find a property for sale, with 2 bedrooms, parking, close to shops. + +//question: Give me all flats with a garden, one bedroom, in walking distance to the computer science departement + +question: Give me all places offered close to the train station? + +question: Give me all retirement houses for sale near Oxford. + +question: Give me all houses that I can BBQ. + +question: Give me all flats which are far from the river. + +question: Give me all flats which are close to three bars. + +question: What is the average price of furnished 1 bedroom apartments in Heddington? + +question: How many flats are offered in central Oxford below 1000 GBP a month? + +question: Which area in Oxford has the cheapest offers close to the Keble college [or some other landmark]? Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -10,6 +10,7 @@ import java.util.HashMap; import java.util.Map; +import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SPARQLIndex; @@ -19,13 +20,20 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; +import com.hp.hpl.jena.query.QueryParseException; +import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; + public class OxfordEvaluation { - private static final String QUERIES_FILE = OxfordEvaluation.class.getClassLoader().getResource("tbsl/evaluation.txt").getPath(); + private static final String QUERIES_FILE1 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_queries.txt").getPath(); + private static final String QUERIES_FILE2 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_additional_queries.txt").getPath(); + private static final String LOG_DIRECTORY = "log/oxford/"; + private static final String LOG_FILE = "evaluation.txt"; public static void main(String[] args) throws Exception{ SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); ExtractionDBCache cache = new ExtractionDBCache("cache"); + new File(LOG_DIRECTORY).mkdirs(); SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); @@ -44,45 +52,69 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE))); - BufferedWriter out = new BufferedWriter(new FileWriter(new File("log/oxford_eval.txt"))); + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE2))); + BufferedWriter out = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + LOG_FILE), false)); - int questionNr = 0; + int questionCnt = 0; int errorCnt = 0; - int noQueryCnt = 0; + int noTemplateFoundCnt = 0; + int noQueryWithNonEmptyResultSetCnt = 0; String question = null; while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); - if(question.isEmpty()) continue; + if(question.isEmpty() || question.startsWith("//")) continue; if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ question = "Give me all " + question; } System.out.println("########################################################"); - questionNr++; + questionCnt++; System.out.println(question); try { + out.write("****************************************\n"); + out.write("QUESTION: " + question + "\n"); learner.setQuestion(question); learner.learnSPARQLQueries(); String learnedQuery = learner.getBestSPARQLQuery(); if(learnedQuery != null){ question2QueryMap.put(question, learnedQuery); learnedQuestions++; - out.write("****************************************\n" + question + "\n" + learnedQuery + "\n****************************************"); + out.write("ANSWER FOUND: YES\n"); + out.write(learnedQuery + "\n"); } else { - noQueryCnt++; - out.write("****************************************\n" + question + "\nNO QUERY WITH NON-EMPTY RESULTSET FOUND\n****************************************"); + noQueryWithNonEmptyResultSetCnt++; + out.write("ANSWER FOUND: NO\n"); + out.write("REASON: NO SPARQL QUERY WITH NON-EMPTY RESULTSET FOUND\n"); + out.write("SPARQL QUERY WITH HIGHEST SCORE TESTED:\n" + learner.getGeneratedQueries().first()); + } } catch (Exception e) { e.printStackTrace(); + out.write("ANSWER FOUND: NO\n"); + if(e instanceof NoTemplateFoundException){ + noTemplateFoundCnt++; + out.write("REASON: NO TEMPLATE FOUND"); + } else { + errorCnt++; + out.write("REASON: ERROR OCCURED (" + e.getClass() + ")\n"); + if(e instanceof QueryExceptionHTTP || e instanceof QueryParseException){ + out.write("\nLast tested SPARQL query: " + learner.getCurrentlyExecutedQuery()); + } + } + } catch (Error e){ + e.printStackTrace(); + out.write("ANSWER FOUND: NO\n"); errorCnt++; - out.write("****************************************\n" + question + "\nERROR: " + e.getClass() + "\n****************************************"); + out.write("REASON: ERROR OCCURED (" + e.getClass() + ")\n"); } + out.write("\n****************************************"); out.flush(); } - out.write("################################\n"); - out.write("Questions with answer: " + learnedQuestions + "\n"); - out.write("Questions with no answer (and no error): " + noQueryCnt + "\n"); - out.write("Questions with error: " + errorCnt + "\n"); + out.write("\n\n###################SUMMARY################\n"); + out.write("Questions tested:\t" + questionCnt + "\n"); + out.write("Questions with answer:\t" + learnedQuestions + "\n"); + out.write("Questions with no answer (and no error):\t" + noQueryWithNonEmptyResultSetCnt + "\n"); + out.write("Questions with no templates:\t" + noTemplateFoundCnt + "\n"); + out.write("Questions with other errors:\t" + errorCnt + "\n"); in.close(); out.close(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-14 18:41:30 UTC (rev 3746) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-15 14:01:20 UTC (rev 3747) @@ -118,6 +118,8 @@ String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; question = "Give me all Victorian houses in Oxfordshire"; + question = "Give me all Edwardian house in Oxfordshire for less than 1000000."; +// question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); learner.learnSPARQLQueries(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-06-15 16:39:51
|
Revision: 3751 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3751&view=rev Author: christinaunger Date: 2012-06-15 16:39:44 +0000 (Fri, 15 Jun 2012) Log Message: ----------- [tbsl] final changes Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -54,7 +54,7 @@ replacements.addAll(Arrays.asList(genericReplacements)); replacements.addAll(Arrays.asList(hackReplacements)); - s = s.replaceAll(",\\s"," and "); + s = s.replaceAll(",\\s"," and ").replaceAll(" and but "," but "); for (int i = 0; i < replacements.size(); i += 2) { s = s.replaceAll(replacements.get(i), replacements.get(i + 1)); } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/SlotBuilder.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -287,6 +287,10 @@ "<x,l1,t,[ l1:[|], l4:[ | SLOT_" + token + "(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[" + symslot + "]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(x,y) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>" + " ;; <x,l1,t,[ l1:[|], l4:[ | empty(y,x) ] ],[(l2,x,subj,<<e,t>,t>),(l3,y,obj,<<e,t>,t>)],[ l2<l1,l3<l1,l4<scope(l2),l4<scope(l3) ],[]>"}; + String[] adjEntry = {token, + "(NP ADJ:'"+token+"' NP*)", + "<x,l1,<e,t>,[ l1:[ | SLOT_description(x,y), regex(y,'"+ token +"')] ],[],[],[ SLOT_description/DATATYPEPROPERTY/description ]>"}; + result.add(adjEntry); result.add(vEntry); } else if (pos.equals("VB")) { Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english.lex 2012-06-15 16:39:44 UTC (rev 3751) @@ -45,6 +45,7 @@ show || (S (VP V:'show' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> list me || (S (VP V:'list' (DP N:'me') DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> list || (S (VP V:'list' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> + find || (S (VP V:'find' DP[object])) || <x,l1,t,[ l1:[ ?x | x=y ] ],[ (l2,y,object,<<e,t>,t>) ],[ l1=l2 ],[]> // DETERMINER @@ -143,6 +144,10 @@ and || (NP NP* CC:'and' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> and || (VP VP* CC:'and' VP[vp]) || - and || (ADJ ADJ* CC:'and' ADJ[adj]) || - + + but || (S S* CC:'but' S[s]) || <x,l1,t,[l1:[|]],[(l2,y,s,t)],[l1=l2],[]> + but || (DP DP* CC:'but' DP[dp]) || <x,l1,<<e,t>,t>,[l1:[|]],[(l2,y,dp,<<e,t>,t>)],[l1=l2],[]> + but || (NP NP* CC:'but' NP[np]) || <x,l1,<e,t>,[l1:[|x=y]],[(l2,y,np,<e,t>)],[l1=l2],[]> as well as || (NP NP* CC:'as' CC:'well' CC:'as' NP[np]) || <x,l1,<e,t>,[l1:[|]],[(l2,y,np,<e,t>)],[l1=l2],[]> Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-15 16:39:44 UTC (rev 3751) @@ -13,10 +13,13 @@ for . pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than . pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than . pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than . pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below . pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> from . to . pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + between . and . pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (DP N:'square' N:'meters') || <x,l1,<<e,t>,t>>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_additional_queries.txt 2012-06-15 16:39:44 UTC (rev 3751) @@ -38,7 +38,7 @@ question: Give me all houses with gated entrance or in a gated community -question: Give me all recently refurbished houses with lift (access) +question: Give me all recently refurbished houses with lift question: Give me all houses with balcony and vaulted ceiling @@ -46,17 +46,17 @@ question: Give me all houses in a corner or end-of-terrace plot -question: Give me all houses with 3 bedrooms, but cheaper than 150000 GBP +question: Give me all houses with 3 bedrooms, but cheaper than 150000 pounds question: Give me all houses with at least 2 reception rooms and a garden -question: Give me all houses in/with a courtyard +question: Give me all houses with a courtyard question: Give me all houses with fireplace and a garden that is not communal question: Give me all houses with parking but not Victorian -//question: Give me all houses with 2-5 bedrooms, a balcony, and a price less than 450000 GBP +//question: Give me all houses with 2 to 5 bedrooms, a balcony, and a price less than 450000 pounds question: Give me all houses with 3 bathrooms, but not en-suite @@ -64,9 +64,9 @@ question: Give me all houses with double glazing and central heating -question: Give me all houses with “no upper chain” (means immediately available) +question: Give me all houses immediately available -question: Give me all houses listed Grade I or Grade II (means “under protection for age …”) +question: Give me all houses listed Grade I or Grade II question: Give me all houses that have recently replumbed @@ -76,4 +76,4 @@ question: Give me all houses in Banbury Road -question: Give me all houses in the area of (John) Radcliffe hospital \ No newline at end of file +question: Give me all houses in the area of John Radcliffe hospital \ No newline at end of file Modified: trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/main/resources/tbsl/oxford_eval_queries.txt 2012-06-15 16:39:44 UTC (rev 3751) @@ -58,7 +58,7 @@ question: houses close to The King's Arms pub -question: houses with garden large at least 2 acres +question: houses with a garden that is at least 2 acres big question: houses with many reception rooms @@ -68,13 +68,13 @@ question: houses with double glazed windows -question: 2 bedroom houses near oxford train station +question: 2 bedroom houses near Oxford train station question: 4 bedroom detached houses in Oxford -question: studio apartments in summertown, Oxford +question: studio apartments in Summertown, Oxford -question: freehold houses with 2 bedrooms and a living room in banbury +question: freehold houses with 2 bedrooms and a living room in Banbury question: houses in Oxford city centre with at most 2 bedrooms @@ -94,7 +94,7 @@ question: flats near supermarket -question: give me flats in central Oxford with at least one bedroom below 1000 GBP a month? +question: give me flats in central Oxford with at least one bedroom below 1000 pounds a month? question: find 2 bedroom flats in walking distance from the computer science departement! @@ -122,7 +122,7 @@ question: what is the average price of furnished 1 bedroom apartments in Heddington? -question: how many flats are offered in central Oxford below 1000 GBP a month? +question: how many flats are offered in central Oxford below 1000 pounds a month? question: houses close to The Ashmolean Museum, with more than one bedroom @@ -162,7 +162,7 @@ question: houses with gated entrance or in a gated community -question: recently refurbished houses with lift (access) +question: recently refurbished houses with lift question: house with balcony and vaulted ceiling @@ -170,7 +170,7 @@ question: house in a corner or end-of-terrace plot -question: house with 3 bedrooms, but cheaper than 150000 GBP +question: house with 3 bedrooms, but cheaper than 150000 pounds question: house with at least 2 reception rooms and a garden @@ -186,11 +186,11 @@ question: house with double glazing and central heating -question: house with “no upper chain” (means immediately available) +question: house with immediately available -question: house listed Grade I or Grade II (means “under protection for age …”) +question: house listed Grade I or Grade II -question: house that has recently replumbed +question: house that has been recently replumbed question: house in Witney or Wolvercote @@ -198,4 +198,4 @@ question: house in Banbury Road -question: house in the area of (John) Radcliffe hospital +question: house in the area of John Radcliffe hospital Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 14:46:46 UTC (rev 3750) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-15 16:39:44 UTC (rev 3751) @@ -63,9 +63,9 @@ while((question = in.readLine()) != null){ question = question.replace("question:", "").trim(); if(question.isEmpty() || question.startsWith("//")) continue; - if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ - question = "Give me all " + question; - } + //if(!question.toLowerCase().contains("Give me all") && Character.isLowerCase(question.charAt(0))){ + // question = "Give me all " + question; + //} System.out.println("########################################################"); questionCnt++; System.out.println(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-18 12:51:56
|
Revision: 3760 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3760&view=rev Author: lorenz_b Date: 2012-06-18 12:51:44 +0000 (Mon, 18 Jun 2012) Log Message: ----------- Added further lexicon entries for Oxford data. Extended eval output. Added some manual mappings. Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-18 10:50:46 UTC (rev 3759) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-06-18 12:51:44 UTC (rev 3760) @@ -15,21 +15,22 @@ in walking distance from || (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> - + in the area || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' DP[dp])))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> + on || (NP NP* (PP P:'on' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> - for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> - between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-18 10:50:46 UTC (rev 3759) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-06-18 12:51:44 UTC (rev 3760) @@ -1,3 +1,5 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code http://www.w3.org/2006/vcard/ns#locality|address, location -http://purl.org/goodrelations/v1#description|description \ No newline at end of file +http://purl.org/goodrelations/v1#description|description +http://purl.org/goodrelations/v1#hasPrice|has price, price +http://diadem.cs.ox.ac.uk/ontologies/real-estate#receptions|receptions, reception room, reception rooms \ No newline at end of file Modified: trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-18 10:50:46 UTC (rev 3759) +++ trunk/components-ext/src/main/resources/tbsl/oxford_objectproperty_mappings.txt 2012-06-18 12:51:44 UTC (rev 3760) @@ -1 +1 @@ -http://dbpedia.org/property/near|close, near \ No newline at end of file +http://dbpedia.org/property/near|close, near, at walking distance \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-18 10:50:46 UTC (rev 3759) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-06-18 12:51:44 UTC (rev 3760) @@ -22,11 +22,12 @@ import com.hp.hpl.jena.query.QueryParseException; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; +import com.jamonapi.Monitor; +import com.jamonapi.MonitorFactory; public class OxfordEvaluation { - private static final String QUERIES_FILE1 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_queries.txt").getPath(); - private static final String QUERIES_FILE2 = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_additional_queries.txt").getPath(); + private static final String QUERIES_FILE = OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_eval_queries.txt").getPath(); private static final String LOG_DIRECTORY = "log/oxford/"; private static final String LOG_FILE = "evaluation.txt"; @@ -52,8 +53,14 @@ int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); - BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE1))); + Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); + + BufferedReader in = new BufferedReader(new FileReader(new File(QUERIES_FILE))); BufferedWriter out = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + LOG_FILE), false)); + BufferedWriter answerOut = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + "questionsWithAnswer.txt"), false)); + BufferedWriter noAnswerOut = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + "questionsWithNoAnswer.txt"), false)); + BufferedWriter templatesOut = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + "questionsWithTemplate.txt"), false)); + BufferedWriter noTemplatesOut = new BufferedWriter(new FileWriter(new File(LOG_DIRECTORY + "questionsWithNoTemplate.txt"), false)); int questionCnt = 0; int errorCnt = 0; @@ -73,26 +80,49 @@ out.write("****************************************\n"); out.write("QUESTION: " + question + "\n"); learner.setQuestion(question); + mon.start(); learner.learnSPARQLQueries(); + mon.stop(); String learnedQuery = learner.getBestSPARQLQuery(); if(learnedQuery != null){ question2QueryMap.put(question, learnedQuery); learnedQuestions++; out.write("ANSWER FOUND: YES\n"); out.write(learnedQuery + "\n"); + + answerOut.write("****************************************\n"); + answerOut.write("QUESTION: " + question + "\n"); + answerOut.write("ANSWER FOUND: YES\n"); + answerOut.write(learnedQuery + "\n"); + answerOut.write("TIME NEEDED: " + mon.getLastValue() + "ms\n"); + answerOut.flush(); + } else { noQueryWithNonEmptyResultSetCnt++; out.write("ANSWER FOUND: NO\n"); out.write("REASON: NO SPARQL QUERY WITH NON-EMPTY RESULTSET FOUND\n"); out.write("SPARQL QUERY WITH HIGHEST SCORE TESTED:\n" + learner.getGeneratedQueries().first()); + noAnswerOut.write("****************************************\n"); + noAnswerOut.write("QUESTION: " + question + "\n"); + noAnswerOut.write("ANSWER FOUND: NO\n"); + noAnswerOut.write("REASON: NO SPARQL QUERY WITH NON-EMPTY RESULTSET FOUND\n"); + noAnswerOut.write("SPARQL QUERY WITH HIGHEST SCORE TESTED:\n" + learner.getGeneratedQueries().first() + "\n"); + noAnswerOut.write("TIME NEEDED: " + mon.getLastValue() + "ms\n"); + noAnswerOut.flush(); + } + templatesOut.write(question + "\n"); + templatesOut.flush(); } catch (Exception e) { + mon.stop(); e.printStackTrace(); out.write("ANSWER FOUND: NO\n"); if(e instanceof NoTemplateFoundException){ noTemplateFoundCnt++; out.write("REASON: NO TEMPLATE FOUND"); + noTemplatesOut.write(question + "\n"); + noTemplatesOut.flush(); } else { errorCnt++; out.write("REASON: ERROR OCCURED (" + e.getClass() + ")\n"); @@ -100,7 +130,9 @@ out.write("\nLast tested SPARQL query: " + learner.getCurrentlyExecutedQuery()); } } + } catch (Error e){ + mon.stop(); e.printStackTrace(); out.write("ANSWER FOUND: NO\n"); errorCnt++; @@ -111,6 +143,10 @@ } out.write("\n\n###################SUMMARY################\n"); out.write("Questions tested:\t" + questionCnt + "\n"); + out.write("Overall time:\t" + mon.getTotal() + "ms\n"); + out.write("Avg. time per question:\t" + mon.getAvg() + "ms\n"); + out.write("Longest time:\t" + mon.getMax() + "ms\n"); + out.write("Shortest time:\t" + mon.getMin() + "ms\n"); out.write("Questions with answer:\t" + learnedQuestions + "\n"); out.write("Questions with no answer (and no error):\t" + noQueryWithNonEmptyResultSetCnt + "\n"); out.write("Questions with no templates:\t" + noTemplateFoundCnt + "\n"); @@ -118,6 +154,8 @@ in.close(); out.close(); + templatesOut.close(); + noTemplatesOut.close(); } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-18 10:50:46 UTC (rev 3759) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-06-18 12:51:44 UTC (rev 3760) @@ -118,7 +118,7 @@ String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; question = "Give me all Victorian houses in Oxfordshire"; - question = "Give me all Edwardian house in Oxfordshire for less than 1000000."; + question = "modern houses with gas central heating"; // question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-02 11:58:36
|
Revision: 3769 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3769&view=rev Author: lorenz_b Date: 2012-07-02 11:58:25 +0000 (Mon, 02 Jul 2012) Log Message: ----------- Removed some unused classes and add option to filter predicates. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/IndexEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/ModelGenerator.java Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -31,7 +31,6 @@ import org.apache.commons.collections15.ListUtils; import org.apache.log4j.Logger; -import org.dllearner.algorithm.qtl.cache.ModelCache; import org.dllearner.algorithm.qtl.cache.QueryTreeCache; import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; @@ -42,7 +41,6 @@ import org.dllearner.algorithm.qtl.operations.NBR; import org.dllearner.algorithm.qtl.operations.lgg.LGGGenerator; import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; -import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.SPARQLEndpointEx; import org.dllearner.core.AbstractComponent; import org.dllearner.core.AbstractLearningProblem; @@ -56,6 +54,9 @@ import org.dllearner.core.options.IntegerConfigOption; import org.dllearner.core.owl.Individual; import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.CachingConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; @@ -92,8 +93,6 @@ private ExtractionDBCache cache; private QueryTreeCache treeCache; - private ModelGenerator modelGen; - private ModelCache modelCache; private LGGGenerator<String> lggGenerator; private NBR<String> nbr; @@ -106,6 +105,8 @@ private QueryTreeFilter queryTreeFilter; + private ConciseBoundedDescriptionGenerator cbdGenerator; + private int maxExecutionTimeInSeconds = 60; private int maxQueryTreeDepth = 2; @@ -138,9 +139,8 @@ this.cache = cache; treeCache = new QueryTreeCache(); - modelGen = new ModelGenerator(endpoint, endpoint.getPredicateFilters(), cache); - modelCache = new ModelCache(modelGen); - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); nbr = new NBR<String>(endpoint, cache); @@ -208,7 +208,7 @@ public void setMaxQueryTreeDepth(int maxQueryTreeDepth){ this.maxQueryTreeDepth = maxQueryTreeDepth; - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); } public String getSPARQLQuery(){ @@ -218,6 +218,10 @@ return lgg.toSPARQLQueryString(); } + public void setRestrictToNamespaces(List<String> namespaces){ + cbdGenerator.setRestrictToNamespaces(namespaces); + } + private void generatePositiveExampleTrees(){ posExampleTrees.clear(); posExampleTrees.addAll(getQueryTrees(posExamples)); @@ -236,7 +240,7 @@ if(logger.isDebugEnabled()){ logger.debug("Tree for resource " + resource); } - model = modelCache.getModel(resource); + model = cbdGenerator.getConciseBoundedDescription(resource); tree = treeCache.getQueryTree(resource, model); if(logger.isDebugEnabled()){ logger.debug(tree.getStringRepresentation()); @@ -324,9 +328,8 @@ endpoint = endpointKS.getEndpoint(); treeCache = new QueryTreeCache(); - modelGen = new ModelGenerator(endpoint); - modelCache = new ModelCache(modelGen); - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); nbr = new NBR<String>(endpoint); Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,46 +0,0 @@ -package org.dllearner.algorithm.qtl.cache; - -import java.util.HashMap; -import java.util.Map; - -import org.dllearner.algorithm.qtl.util.ModelGenerator; -import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; - -import com.hp.hpl.jena.rdf.model.Model; - -public class ModelCache { - - private Map<String, Model> cache; - private ModelGenerator modelGen; - - private int recursionDepth = 2; - - - public ModelCache(ModelGenerator modelGen){ - this.modelGen = modelGen; - - cache = new HashMap<String, Model>(); - } - - public Model getModel(String uri){ - Model model = cache.get(uri); - if(model == null){ - model = modelGen.createModel(uri, Strategy.CHUNKS, recursionDepth); - cache.put(uri, model); - } - return cache.get(uri); - } - - public void setRecursionDepth(int recursionDepth){ - this.recursionDepth = recursionDepth; - } - - public void clear(){ - cache.clear(); - } - - public void dispose(){ - cache = null; - } - -} Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,248 +0,0 @@ -package org.dllearner.algorithm.qtl.util; - -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; -import java.sql.SQLException; -import java.util.Collections; -import java.util.Iterator; -import java.util.Set; - -import org.apache.log4j.Logger; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; - -import com.hp.hpl.jena.query.Query; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -public class ModelGenerator { - - private static final Logger logger = Logger.getLogger(ModelGenerator.class); - private Monitor queryMonitor = MonitorFactory.getTimeMonitor("SPARQL Query monitor"); - - private SparqlEndpoint endpoint; - private int recursionDepth = 1; - - private static final int CHUNK_SIZE = 1000; - - private ExtractionDBCache cache; - - private Set<String> predicateFilters; - - public enum Strategy{ - INCREMENTALLY, - CHUNKS - } - - public ModelGenerator(SparqlEndpoint endpoint){ - this(endpoint, Collections.<String>emptySet(), null); - } - - public ModelGenerator(SparqlEndpoint endpoint, Set<String> predicateFilters){ - this(endpoint, predicateFilters, null); - } - - public ModelGenerator(SparqlEndpoint endpoint, Set<String> predicateFilters, ExtractionDBCache cache){ - this.endpoint = endpoint; - this.predicateFilters = predicateFilters; - this.cache = cache; - } - - public ModelGenerator(SparqlEndpoint endpoint, ExtractionDBCache cache){ - this(endpoint, Collections.<String>emptySet(), cache); - } - - public ModelGenerator(String endpointURL){ - try { - this.endpoint = new SparqlEndpoint(new URL(endpointURL)); - } catch (MalformedURLException e) { - e.printStackTrace(); - } - } - - public Model createModel(String resource, Strategy strategy, int recursionDepth){ - this.recursionDepth = recursionDepth; - if(strategy == Strategy.INCREMENTALLY){ - return getModelIncrementallyRec(resource, 0); - } else if(strategy == Strategy.CHUNKS){ - return getModelChunked(resource); - } - return ModelFactory.createDefaultModel(); - } - - public void setRecursionDepth(int recursionDepth){ - this.recursionDepth = recursionDepth; - } - - - /** - * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth. - * @param example The example resource for which a CONSTRUCT query is created. - * @return The JENA ARQ Query object. - */ - private String makeConstructQueryOptional(String resource, int limit, int offset, Set<String> predicateFilter){ - StringBuilder sb = new StringBuilder(); - sb.append("CONSTRUCT {\n"); - sb.append("<").append(resource).append("> ").append("?p0 ").append("?o0").append(".\n"); - for(int i = 1; i < recursionDepth; i++){ - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); - } - sb.append("}\n"); - sb.append("WHERE {\n"); - sb.append("<").append(resource).append("> ").append("?p0 ").append("?o0").append(".\n"); - for(int i = 1; i < recursionDepth; i++){ - sb.append("OPTIONAL{\n"); - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); - } - for(int i = 1; i < recursionDepth; i++){ - sb.append("}"); - } - - - for(int i = 0; i < recursionDepth; i++){ - for(String predicate : predicateFilter){ - sb.append("FILTER (!REGEX (?p").append(i).append(", \"").append(predicate).append("\"))"); - } - - } - - sb.append("}\n"); -// sb.append("ORDER BY "); -// for(int i = 0; i < recursionDepth; i++){ -// sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); -// } -// sb.append("\n"); - sb.append("LIMIT ").append(limit).append("\n"); - sb.append("OFFSET ").append(offset); - - Query query = QueryFactory.create(sb.toString()); - - return sb.toString(); - } - - - /** - * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example. - * @param example The example resource for which a CONSTRUCT query is created. - * @return The JENA ARQ Query object. - */ - private String makeConstructQuery(String example, Set<String> predicateFilters){ - - StringBuilder sb = new StringBuilder(); - sb.append("CONSTRUCT {\n"); - sb.append("<").append(example).append("> ").append("?p ").append("?o").append(".\n"); - sb.append("}\n"); - sb.append("WHERE {\n"); - sb.append("<").append(example).append("> ").append("?p ").append("?o").append(".\n"); - - for(String predicate : predicateFilters){ - sb.append("FILTER (!REGEX (?p, \"").append(predicate).append("\"))"); - } - - sb.append("}\n"); - Query query = QueryFactory.create(sb.toString()); - - return sb.toString(); - } - - - - private Model getModelChunked(String resource){ -// logger.debug("Resource: " + resource); - String query = makeConstructQueryOptional(resource, CHUNK_SIZE, 0, predicateFilters); -// logger.debug("Sending SPARQL query ..."); -// logger.debug("Query:\n" + query.toString()); - queryMonitor.start(); - Model all = ModelFactory.createDefaultModel(); - try { - Model model; - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } -// logger.debug("Got " + model.size() + " new triple in " + queryMonitor.getLastValue() + "ms."); - all.add(model); - queryMonitor.stop(); - int i = 1; - while(model.size() != 0){ - query = makeConstructQueryOptional(resource, CHUNK_SIZE, i * CHUNK_SIZE, predicateFilters); -// logger.debug("Sending SPARQL query ..."); -// logger.debug("Query:\n" + query.toString()); - queryMonitor.start(); - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } - queryMonitor.stop(); -// logger.debug("Got " + model.size() + " new triple in " + queryMonitor.getLastValue() + "ms."); - all.add(model); - i++; - } - } catch (UnsupportedEncodingException e) { - logger.error(e); - } catch (SQLException e) { - logger.error(e); - } - return all; - } - - private Model getModelIncrementallyRec(String resource, int depth){ - logger.debug("Resource: " + resource); - String query = makeConstructQuery(resource, predicateFilters); - logger.debug("Sending SPARQL query ..."); - logger.debug("Query:\n" + query); - queryMonitor.start(); - Model model = null; - try { - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } - } catch (UnsupportedEncodingException e) { - logger.error(e); - } catch (SQLException e) { - logger.error(e); - } - queryMonitor.stop(); - logger.debug("Got " + model.size() + " new triples in " + queryMonitor.getLastValue() + "ms:"); - Statement st = null; - for(Iterator<Statement> i = model.listStatements();i.hasNext(); st = i.next()){ - logger.debug(st); - } - if(depth < recursionDepth){ - Model tmp = ModelFactory.createDefaultModel(); - for(Iterator<Statement> i = model.listStatements(); i.hasNext();){ - st = i.next(); - if(st.getObject().isURIResource()){ - tmp.add(getModelIncrementallyRec(st.getObject().toString(), depth + 1)); - } - } - model.add(tmp); - } - - return model; - } - - private Model getModel(String query){ - QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query); - for (String dgu : endpoint.getDefaultGraphURIs()) { - queryExecution.addDefaultGraph(dgu); - } - for (String ngu : endpoint.getNamedGraphURIs()) { - queryExecution.addNamedGraph(ngu); - } - Model model = queryExecution.execConstruct(); - return model; - } - - -} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -7,11 +7,11 @@ import java.net.URL; import java.util.Collections; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; -import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; -import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.SOLRIndex; import org.dllearner.kb.sparql.SparqlEndpoint; import org.ini4j.InvalidFileFormatException; @@ -21,11 +21,18 @@ public static void main(String[] args) throws InvalidFileFormatException, FileNotFoundException, IOException { // Logger.getLogger(SPARQLTemplateBasedLearner.class).setLevel(Level.OFF); + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + + SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); + resourcesIndex.setPrimarySearchField("label"); +// resourcesIndex.setSortField("pagerank"); + Index classesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_classes"); + Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); + + + Knowledgebase kb = new Knowledgebase(endpoint, "DBpedia Live", "TODO", resourcesIndex, propertiesIndex, classesIndex, null); + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(kb); - SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), - Collections.<String>singletonList(""), Collections.<String>emptyList()); - System.out.println("======= TBSL v0.1 ============="); System.out.println("\nType ':q' to quit."); Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,1644 +0,0 @@ -package org.dllearner.algorithm.tbsl.learning; - -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import org.apache.log4j.Logger; -import org.dllearner.algorithm.qtl.util.ModelGenerator; -import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; -import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; -import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.WordNet; -import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; -import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; -import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; -import org.dllearner.algorithm.tbsl.search.SolrSearch; -import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; -import org.dllearner.algorithm.tbsl.sparql.Allocation; -import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.RatedQuery; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.Slot; -import org.dllearner.algorithm.tbsl.sparql.SlotType; -import org.dllearner.algorithm.tbsl.sparql.Template; -import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; -import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.algorithm.tbsl.util.Prefixes; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.Oracle; -import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.kb.SparqlEndpointKS; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; -import org.ini4j.Options; - -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.query.QueryExecution; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.core.Var; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.syntax.Element; -import com.hp.hpl.jena.sparql.syntax.ElementAssign; -import com.hp.hpl.jena.sparql.syntax.ElementBind; -import com.hp.hpl.jena.sparql.syntax.ElementDataset; -import com.hp.hpl.jena.sparql.syntax.ElementExists; -import com.hp.hpl.jena.sparql.syntax.ElementFetch; -import com.hp.hpl.jena.sparql.syntax.ElementFilter; -import com.hp.hpl.jena.sparql.syntax.ElementGroup; -import com.hp.hpl.jena.sparql.syntax.ElementMinus; -import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph; -import com.hp.hpl.jena.sparql.syntax.ElementNotExists; -import com.hp.hpl.jena.sparql.syntax.ElementOptional; -import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; -import com.hp.hpl.jena.sparql.syntax.ElementService; -import com.hp.hpl.jena.sparql.syntax.ElementSubQuery; -import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; -import com.hp.hpl.jena.sparql.syntax.ElementUnion; -import com.hp.hpl.jena.sparql.syntax.ElementVisitor; -import com.hp.hpl.jena.vocabulary.OWL; -import com.hp.hpl.jena.vocabulary.RDF; -import com.hp.hpl.jena.vocabulary.RDFS; -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -public class SPARQLTemplateBasedLearner implements SparqlQueryLearningAlgorithm{ - - //for debugging - List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", - "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); - - enum Ranking{ - LUCENE, SIMILARITY, NONE - } - - private static final String OPTIONS_FILE = SPARQLTemplateBasedLearner.class.getClassLoader().getResource("tbsl/tbsl.properties").getPath(); - - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); - private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - - private static final int RECURSION_DEPTH = 2; - private static final int MAX_URIS_PER_SLOT = 10; - - private Ranking ranking; - private boolean useRemoteEndpointValidation; - private boolean stopIfQueryResultNotEmpty; - private int maxTestedQueriesPerTemplate = 50; - private int maxQueryExecutionTimeInSeconds; - - private int maxTestedQueries = 200; - - private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); - private ExtractionDBCache cache = new ExtractionDBCache("cache"); - - private SolrSearch resource_index; - private SolrSearch class_index; - private SolrSearch property_index; - private SolrSearch boa_pattern_property_index; - private ModelGenerator modelGenenerator; - private Templator templateGenerator; - - private String question; - private int learnedPos = -1; - - private Oracle oracle; - - private Map<String, SolrQueryResultSet> resourcesURICache; - private Map<String, SolrQueryResultSet> classesURICache; - private Map<String, SolrQueryResultSet> propertiesURICache; - - private Map<String, Object> learnedSPARQLQueries; - private Set<Template> templates; - private Collection<Query> sparqlQueryCandidates; - private Map<Template, Collection<? extends Query>> template2Queries; - private Map<Slot, List<String>> slot2URI; - - private Set<WeightedQuery> generatedQueries; - - private Map<String, String> prefixMap; - - private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); - - private SPARQLReasoner reasoner; - - public SPARQLTemplateBasedLearner() throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(OPTIONS_FILE); - } - - public SPARQLTemplateBasedLearner(String optionsFile) throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(new Options(new FileInputStream(optionsFile))); - } - - public SPARQLTemplateBasedLearner(Options options){ - this(options, new StanfordPartOfSpeechTagger()); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger){ - this(options, tagger, new WordNet()); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger, WordNet wordNet){ - this(options, tagger, wordNet, "cache"); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger, WordNet wordNet, String cacheDir){ - init(options); - - Set<String> predicateFilters = new HashSet<String>(); - predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); - predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - - prefixMap = Prefixes.getPrefixes(); - - modelGenenerator = new ModelGenerator(endpoint, predicateFilters); - - templateGenerator = new Templator(tagger, wordNet); - cache = new ExtractionDBCache(cacheDir); - } - - /* - * Only for Evaluation useful. - */ - public void setUseIdealTagger(boolean value){ - templateGenerator.setUNTAGGED_INPUT(!value); - } - - private void init(Options options){ - String resourcesIndexUrl = options.fetch("solr.resources.url"); - String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); - resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, "label", 1.0, 0.1); - - String classesIndexUrl = options.fetch("solr.classes.url"); - String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField, "label"); - - String yagoClassesIndexUrl = options.fetch("solr.yago.classes.url"); - String yagoClassesIndexSearchField = options.fetch("solr.yago.classes.searchfield"); - SolrSearch yagoClassIndex = new SolrSearch(yagoClassesIndexUrl, yagoClassesIndexSearchField); - - class_index = new ThresholdSlidingSolrSearch(dbpediaClassIndex);// new HierarchicalSolrSearch(dbpediaClassIndex, yagoClassIndex); - - String propertiesIndexUrl = options.fetch("solr.properties.url"); - String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - SolrSearch labelBasedPropertyIndex = new ThresholdSlidingSolrSearch(propertiesIndexUrl, propertiesIndexSearchField, "label", 1.0, 0.1); - - String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); - String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); - SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField, "nlr-no-var"); - - //first BOA pattern then label based -// property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); - - //first label based then BOA pattern - property_index = new HierarchicalSolrSearch(labelBasedPropertyIndex, patternBasedPropertyIndex); - - int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); - - maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); - cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - - ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); - useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; - stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); - maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); - - String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); - wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); - System.setProperty("wordnet.database.dir", wordnetPath); - } - - public void setEndpoint(SparqlEndpoint endpoint){ - this.endpoint = endpoint; - Set<String> predicateFilters = new HashSet<String>(); - predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); - predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - modelGenenerator = new ModelGenerator(endpoint, predicateFilters); - - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); - reasoner.setCache(cache); - reasoner.prepareSubsumptionHierarchy(); - } - - public void setQuestion(String question){ - this.question = question; - } - - public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ - this.useRemoteEndpointValidation = useRemoteEndpointValidation; - } - - public int getMaxQueryExecutionTimeInSeconds() { - return maxQueryExecutionTimeInSeconds; - } - - public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { - this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; - } - - public int getMaxTestedQueriesPerTemplate() { - return maxTestedQueriesPerTemplate; - } - - public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { - this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; - } - - public void setRanking(Ranking ranking) { - this.ranking = ranking; - } - - private void reset(){ - learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, SolrQueryResultSet>(); - classesURICache = new HashMap<String, SolrQueryResultSet>(); - propertiesURICache = new HashMap<String, SolrQueryResultSet>(); - template2Queries = new HashMap<Template, Collection<? extends Query>>(); - slot2URI = new HashMap<Slot, List<String>>(); - } - - public void learnSPARQLQueries() throws NoTemplateFoundException{ - reset(); - //generate SPARQL query templates - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - -// //generate SPARQL query candidates, but select only a fixed number per template -// template2Queries = getSPARQLQueryCandidates(templates, ranking); -// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); - sparqlQueryCandidates = new ArrayList<Query>(); - int i = 0; - for(WeightedQuery wQ : generatedQueries){ - System.out.println(wQ.explain()); - sparqlQueryCandidates.add(wQ.getQuery()); - if(i == maxTestedQueries){ - break; - } - i++; - } - - //test candidates - if(useRemoteEndpointValidation){ //on remote endpoint - validateAgainstRemoteEndpoint(sparqlQueryCandidates); - } else {//on local model - validateAgainstLocalModel(sparqlQueryCandidates); - } - - } - - public Set<WeightedQuery> getGeneratedQueries() { - return generatedQueries; - } - - public Set<WeightedQuery> getGeneratedQueries(int topN) { - Set<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); - int max = Math.min(topN, generatedQueries.size()); - for(WeightedQuery wQ : generatedQueries){ - topNQueries.add(wQ); - if(topNQueries.size() == max){ - break; - } - } - return topNQueries; - } - - public List<String> getSPARQLQueries() throws NoTemplateFoundException{ - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - - //generate SPARQL query candidates - logger.info("Generating SPARQL query candidates..."); - mon.start(); - Map<Template, Collection<? extends Query>> template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - - public Set<Template> getTemplates(){ - return templates; - } - - public List<String> getGeneratedSPARQLQueries(){ - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - - public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ - return template2Queries; - } - - public Map<Slot, List<String>> getSlot2URIs(){ - return slot2URI; - } - - private Model getWorkingModel(List<String> resources){ - logger.info("Generating local model..."); - mon.start(); - Model workingModel = ModelFactory.createDefaultModel(); - Model model; - for(String resource : resources){ - model = modelGenenerator.createModel(resource, Strategy.CHUNKS, RECURSION_DEPTH); - workingModel.add(model); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Local model contains " + workingModel.size() + " triples."); - return workingModel; - } - - private Map<Template,Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ - switch(ranking){ - case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); - case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); - case NONE: return getSPARQLQueryCandidates(templates); - default: return null; - } - } - - /* - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - double alpha = 0.8; - double beta = 1 - alpha; - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allAllocations; - for(Template t : templates){ - allAllocations = new HashSet<Allocation>(); - - for(Slot slot : t.getSlots()){ - Set<Allocation> allocations = computeAllocation(slot); - allAllocations.addAll(allocations); - slot2Allocations.put(slot, allocations); - } - - int min = Integer.MAX_VALUE; - int max = Integer.MIN_VALUE; - for(Allocation a : allAllocations){ - if(a.getInDegree() < min){ - min = a.getInDegree(); - } - if(a.getInDegree() > max){ - max = a.getInDegree(); - } - } - for(Allocation a : allAllocations){ - double prominence = a.getInDegree()/(max-min); - a.setProminence(prominence); - - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - - } -// System.out.println(allAllocations); - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - //check if the query is possible - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - - boolean drop = false; - for(SPARQL_Triple triple : reversedQuery.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - } - - if(!drop){ - reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(reversedQuery); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - } - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - } - } - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - return allQueries; - } - */ - - private void normProminenceValues(Set<Allocation> allocations){ - double min = 0; - double max = 0; - for(Allocation a : allocations){ - if(a.getProminence() < min){ - min = a.getProminence(); - } - if(a.getProminence() > max){ - max = a.getProminence(); - } - } - for(Allocation a : allocations){ - double prominence = a.getProminence()/(max-min); - a.setProminence(prominence); - } - } - - private void computeScore(Set<Allocation> allocations){ - double alpha = 0.8; - double beta = 1 - alpha; - - for(Allocation a : allocations){ - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - } - - } - - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.info("Generating SPARQL query candidates..."); - - Map<Slot, Set<Allocation>> slot2Allocations2 = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { - - @Override - public int compare(Slot o1, Slot o2) { - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; - } - } - }); - - - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); - - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<SortedSet<Allocation>>> list = new ArrayList<Future<SortedSet<Allocation>>>(); - - for (Slot slot : t.getSlots()) { - Callable<SortedSet<Allocation>> worker = new SlotProcessor(slot); - Future<SortedSet<Allocation>> submit = executor.submit(worker); - list.add(submit); - } - -// for (Future<SortedSet<Allocation>> future : list) { -// try { -// future.get(); -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } catch (ExecutionException e) { -// e.printStackTrace(); -// } -// } - - /*for(Slot slot : t.getSlots()){ - allocations = slot2Allocations2.get(slot); - if(allocations == null){ - allocations = computeAllocations(slot, 10); - slot2Allocations2.put(slot, allocations); - } - slot2Allocations.put(slot, allocations); - - //for tests add the property URI with http://dbpedia.org/property/ namespace - //TODO should be replaced by usage of a separate SOLR index - Set<Allocation> tmp = new HashSet<Allocation>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(Allocation a : allocations){ - String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); - Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); - newA.setScore(a.getScore()-0.000001); - tmp.add(newA); - } - } - allocations.addAll(tmp); - }*/ - - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - //add for each SYMPROPERTY Slot the reversed query - for(Slot slot : sortedSlots){ - for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(wQ.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - tmp.add(new WeightedQuery(reversedQuery)); - } - tmp.add(wQ); - } - queries.clear(); - queries.addAll(tmp); - tmp.clear(); - } - - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - - boolean drop = false;/* - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(isObjectProperty(a.getUri())){ - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - allRanges.addAll(ranges); - allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeURI); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// } - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Software>")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// System.out.println("DROPPING: \n" + q.toString()); -// } - } else { - - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - allDomains.addAll(domains); - allDomains.remove("http://www.w3.org/2002/07/owl#Thing"); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("DOMAINS: " + allDomains); -// System.out.println("TYPES: " + allTypes); -// } - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; -// System.out.println("DROPPING: \n" + q.toString()); - } else { - - } - } - } - } - }*/ - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - logger.info("...done in "); - return allQueries; - } - -/* - * for(SPARQL_Triple triple : t.getQuery().getTriplesWithVar(slot.getAnchor())){System.out.println(triple); - for(SPARQL_Triple typeTriple : t.getQuery().getRDFTypeTriples(triple.getVariable().getName())){ - System.out.println(typeTriple); - for(Allocation a : allocations){ - Set<String> domains = getDomains(a.getUri()); - System.out.println(a); - System.out.println(domains); - for(Slot s : classSlots){ - if(s.getAnchor().equals(triple.getVariable().getName())){ - for(Allocation all : slot2Allocations.get(s)){ - if(!domains.contains(all.getUri())){ - System.out.println("DROP " + a); - } - } - } - } - } - - - } - */ - - private SortedSet<Allocation> computeAllocations(Slot slot){ - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - SolrSearch index = getIndexBySlotType(slot); - - SolrQueryResultSet rs; - for(String word : slot.getWords()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 250); - } else { - rs = index.getResourcesWithScores(word, 20); - } - - - //debugging -// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ -// SolrQueryResultItem item = iter.next(); -// if(exclusions.contains(item.getUri())){ -// iter.remove(); -// } -// } - - for(SolrQueryResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - - } - - normProminenceValues(allocations); - - computeScore(allocations); - return new TreeSet<Allocation>(allocations); - } - - private Set<Allocation> computeAllocations(Slot slot, int limit){ - logger.info("Computing allocations for " + slot); - SortedSet<Allocation> allocations = computeAllocations(slot); - - if(allocations.isEmpty()){ - logger.info("...done."); - return allocations; - } - - ArrayList<Allocation> l = new ArrayList<Allocation>(allocations); - Collections.sort(l, new Comparator<Allocation>() { - - @Override - public int compare(Allocation o1, Allocation o2) { - double dif = o1.getScore() - o2.getScore(); - if(dif < 0){ - return 1; - } else if(dif > 0){ - return -1; - } else { - return o1.getUri().compareTo(o2.getUri()); - } - } - }); - logger.info("...done."); - return new TreeSet<Allocation>(l.subList(0, Math.min(limit, allocations.size()))); - } - - private Set<String> getRedirectLabels(String uri){ - Set<String> labels = new HashSet<String>(); - String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - labels.add(qs.getLiteral("label").getLexicalForm()); - - } - return labels; - } - - private double getProminenceValue(String uri, SlotType type){ - int cnt = 1; - String query = null; - if(type == SlotType.CLASS){ - query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; - } - query = String.format(query, uri); - - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - cnt = qs.get(projectionVar).asLiteral().getInt(); - } -// if(cnt == 0){ -// return 0; -// } -// return Math.log(cnt); - return cnt; - } - - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Set<Query> queries = new HashSet<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - for(Template template : templates){ - queries = new HashSet<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - Set<Query> tmp = new HashSet<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Query query : queries){ - Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, item.getUri()); - tmp.add(newQuery); - } - } - if(!words.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Map<String, Float> query2Score = new HashMap<String, Float>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - query2Score.put(query.toString(), Float.valueOf(0)); - for(Slot slot : template.getSlots()){ - Map<String, Float> tmp = new HashMap<String, Float>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); - } - } - if(!words.isEmpty()){ - query2Score.clear(); - query2Score.putAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return query2Score; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - ratedQueries = new TreeSet<RatedQuery>(); - ratedQueries.add(new RatedQuery(query, 0)); - template2Queries.put(template, ratedQueries); - for(Slot slot : template.getSlots()){ - Set<RatedQuery> tmp = new HashSet<RatedQuery>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(RatedQuery rQ : ratedQueries){ - RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, item.getUri()); - newRQ.setScore(newRQ.getScore() + item.getScore()); - tmp.add(newRQ); - } - } - if(!words.isEmpty()){ - ratedQueries.clear(); - ratedQueries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - List<Query> queries = new ArrayList<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - List<String> uriCandidates; - for(Template template : templates){ - queries = new ArrayList<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - List<Query> tmp = new ArrayList<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - SPARQL_Prefix prefix = null; - uriCandidates = getCandidateURIsSortedBySimilarity(slot); - for(S... [truncated message content] |
From: <lor...@us...> - 2012-07-03 14:16:39
|
Revision: 3770 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3770&view=rev Author: lorenz_b Date: 2012-07-03 14:16:28 +0000 (Tue, 03 Jul 2012) Log Message: ----------- Some changes to get relevant keywords for a question. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -24,6 +24,7 @@ import java.util.concurrent.Future; import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.ltag.parser.GrammarFilter; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; @@ -143,6 +144,8 @@ private PopularityMap popularityMap; + private Set<String> relevantKeywords; + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ this(endpoint, resourcesIndex, classesIndex, propertiesIndex, new StanfordPartOfSpeechTagger()); } @@ -349,6 +352,7 @@ learnedSPARQLQueries = new HashMap<String, Object>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); + relevantKeywords = new HashSet<String>(); currentlyExecutedQuery = null; // templateMon.reset(); @@ -367,8 +371,10 @@ } templateMon.stop(); logger.info("Done in " + templateMon.getLastValue() + "ms."); + relevantKeywords.addAll(templateGenerator.getUnknownWords()); if(templates.isEmpty()){ throw new NoTemplateFoundException(); + } logger.info("Templates:"); for(Template t : templates){ @@ -463,13 +469,7 @@ } public Set<String> getRelevantKeywords(){ - Set<String> keywords = new HashSet<String>(); - for(Template t : templates){ - for (Slot slot : t.getSlots()) { - keywords.add(slot.getWords().get(0)); - } - } - return keywords; + return relevantKeywords; } private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -21,18 +21,20 @@ * grammar contains the .+ wildcard the input n-gram "a b x y c" matches the * anchor "a b .+ c". */ -class GrammarFilter { +public class GrammarFilter { private static final Logger logger = Logger.getLogger(GrammarFilter.class); final static String[] NAMED_Strings = {"named", "called"}; // DISAM - static List<Integer> usedInts = new ArrayList<Integer>(); - static ArrayList<String> doubles = new ArrayList<String>(); + private List<Integer> usedInts = new ArrayList<Integer>(); + private List<String> doubles = new ArrayList<String>(); public static boolean VERBOSE = true; - static ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { + private List<String> unknownWords; + + public ParseGrammar filter(String taggedinput,LTAGLexicon grammar,List<Integer> temps, String mode) { // DISAM: CLEAR usedInts = new ArrayList<Integer>(); @@ -208,7 +210,7 @@ } } - List<String> unknownWords = new ArrayList<String>(); + unknownWords = new ArrayList<String>(); for (String t : unknownTokens) { String[] tParts = t.split(" "); for (String s : tParts) { @@ -280,8 +282,12 @@ return parseG; } + + public List<String> getUnknownWords(){ + return unknownWords; + } - private static List<Pair<String,String>> checkForNamedString(String token) { + private List<Pair<String,String>> checkForNamedString(String token) { String[] split; if (token.contains(" ")) { @@ -366,7 +372,7 @@ return result; } - private static int createFresh() { + private int createFresh() { int fresh = 0; for (int i = 0; usedInts.contains(i); i++) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -32,6 +32,8 @@ private List<Dude> dudes = new ArrayList<Dude>(); private ParseGrammar parseGrammar = null; private List<Integer> temporaryEntries = new ArrayList<Integer>(); + + private GrammarFilter grammarFilter = new GrammarFilter(); @SuppressWarnings("unchecked") private final Class[] operations = { Scanner.class, MoveDotDown.class, @@ -65,7 +67,7 @@ * times, a tree for each token is added. Both trees need to have * different treeIDs for the parser to work correctly. */ - parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + parseGrammar = grammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); String inputNoTags = ""; for (String s : taggeduserinput.split(" ")) { @@ -97,6 +99,10 @@ } + public List<String> getUnknownWords(){ + return grammarFilter.getUnknownWords(); + } + public List<DerivationTree> parseMultiThreaded(String taggeduserinput, LTAGLexicon grammar) { derivationTrees.clear(); @@ -112,7 +118,7 @@ * times, a tree for each token is added. Both trees need to have * different treeIDs for the parser to work correctly. */ - parseGrammar = GrammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); + parseGrammar = grammarFilter.filter(taggeduserinput,grammar,temporaryEntries,MODE); String inputNoTags = ""; for (String s : taggeduserinput.split(" ")) { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -44,7 +44,7 @@ LTAGLexicon g; LTAG_Lexicon_Constructor LTAG_Constructor = new LTAG_Lexicon_Constructor(); - Parser p; + Parser parser; Preprocessor pp; WordNet wordnet; @@ -83,11 +83,11 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = true; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); } @@ -104,11 +104,11 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = true; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = true; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); } @@ -125,12 +125,12 @@ g = LTAG_Constructor.construct(grammarFiles); - p = new Parser(); - p.SHOW_GRAMMAR = false; - p.VERBOSE = b; - p.USE_DPS_AS_INITTREES = true; - p.CONSTRUCT_SEMANTICS = true; - p.MODE = "LEIPZIG"; + parser = new Parser(); + parser.SHOW_GRAMMAR = false; + parser.VERBOSE = b; + parser.USE_DPS_AS_INITTREES = true; + parser.CONSTRUCT_SEMANTICS = true; + parser.MODE = "LEIPZIG"; pp = new Preprocessor(USE_NER); pp.setVERBOSE(b); @@ -180,16 +180,16 @@ newtagged = pp.condense(newtagged); if (VERBOSE) logger.trace("Preprocessed: " + newtagged); - p.parse(newtagged,g); + parser.parse(newtagged,g); - if (p.getDerivationTrees().isEmpty()) { - p.clear(g,p.getTemps()); + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); clearAgain = false; if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { - p.buildDerivedTrees(g); + parser.buildDerivedTrees(g); } catch (ParseException e) { if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } @@ -205,7 +205,7 @@ Set<DRS> drses = new HashSet<DRS>(); Set<Template> templates = new HashSet<Template>(); - for (Dude dude : p.getDudes()) { + for (Dude dude : parser.getDudes()) { UDRS udrs = d2u.convert(dude); if (udrs != null) { @@ -295,7 +295,7 @@ } if (clearAgain) { - p.clear(g,p.getTemps()); + parser.clear(g,parser.getTemps()); } // System.gc(); @@ -326,16 +326,16 @@ newtagged = pp.condense(newtagged); if (VERBOSE) logger.trace("Preprocessed: " + newtagged); - p.parseMultiThreaded(newtagged,g); + parser.parseMultiThreaded(newtagged,g); - if (p.getDerivationTrees().isEmpty()) { - p.clear(g,p.getTemps()); + if (parser.getDerivationTrees().isEmpty()) { + parser.clear(g,parser.getTemps()); clearAgain = false; if (VERBOSE) logger.error("[Templator.java] '" + s + "' could not be parsed."); } else { try { - p.buildDerivedTreesMultiThreaded(g); + parser.buildDerivedTreesMultiThreaded(g); } catch (ParseException e) { if (VERBOSE) logger.error("[Templator.java] ParseException at '" + e.getMessage() + "'", e); } @@ -358,7 +358,7 @@ // threadPool.shutdown(); // while(!threadPool.isTerminated()){} - for (Dude dude : p.getDudes()) { + for (Dude dude : parser.getDudes()) { UDRS udrs = d2u.convert(dude); if (udrs != null) { @@ -451,7 +451,7 @@ if (clearAgain) { - p.clear(g,p.getTemps()); + parser.clear(g,parser.getTemps()); } // System.gc(); @@ -462,6 +462,10 @@ return taggedInput; } + public List<String> getUnknownWords(){ + return parser.getUnknownWords(); + } + private List<String> getLemmatizedWords(List<String> words){ List<String> stemmed = new ArrayList<String>(); for(String word : words){ Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -49,6 +49,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); learner.init(); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); int learnedQuestions = 0; Map<String, String> question2QueryMap = new HashMap<String, String>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-07-02 11:58:25 UTC (rev 3769) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java 2012-07-03 14:16:28 UTC (rev 3770) @@ -114,11 +114,12 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.setMappingIndex(mappingIndex); learner.init(); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); String question = "Give me all houses near a school."; question = "Give me all houses with more than 3 bathrooms and more than 2 bedrooms."; question = "Give me all Victorian houses in Oxfordshire"; - question = "houses with more than 3 bedrooms"; + question = "Edwardian houses close to supermarket for less than 1,000,000 in Oxfordshire"; // question = "Give me all family houses with more than 2 bathrooms and more than 4 bedrooms"; learner.setQuestion(question); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-20 14:01:35
|
Revision: 3802 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3802&view=rev Author: kirdie Date: 2012-07-20 14:01:29 +0000 (Fri, 20 Jul 2012) Log Message: ----------- continued on Junit test (not finished). Added Paths: ----------- trunk/components-ext/src/main/resources/test/ trunk/components-ext/src/main/resources/test/dbpedia_class_mappings.txt trunk/components-ext/src/main/resources/test/dbpedia_dataproperty_mappings.txt trunk/components-ext/src/main/resources/test/dbpedia_objectproperty_mappings.txt trunk/components-ext/src/main/resources/test/dbpedia_resource_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added: trunk/components-ext/src/main/resources/test/dbpedia_class_mappings.txt =================================================================== Added: trunk/components-ext/src/main/resources/test/dbpedia_dataproperty_mappings.txt =================================================================== Added: trunk/components-ext/src/main/resources/test/dbpedia_objectproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/test/dbpedia_objectproperty_mappings.txt (rev 0) +++ trunk/components-ext/src/main/resources/test/dbpedia_objectproperty_mappings.txt 2012-07-20 14:01:29 UTC (rev 3802) @@ -0,0 +1,6 @@ +http://dbpedia.org/ontology/writer|written by, written +http://dbpedia.org/ontology/author|written by, written, author, author of +http://dbpedia.org/ontology/director|directed by, directed, director +http://dbpedia.org/ontology/producer|produced by, produced, producer +http://dbpedia.org/ontology/birthPlace|birth place, birth, born in, born +http://dbpedia.org/ontology/deathPlace|death place, death, died in, died \ No newline at end of file Added: trunk/components-ext/src/main/resources/test/dbpedia_resource_mappings.txt =================================================================== Added: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java (rev 0) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-20 14:01:29 UTC (rev 3802) @@ -0,0 +1,196 @@ +package org.dllearner.algorithm.tbsl.learning; + +import static org.junit.Assert.fail; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Collections; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Map.Entry; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.apache.log4j.Logger; +import org.dllearner.algorithm.tbsl.Evaluation; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoPropertiesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; +import org.dllearner.core.ComponentInitException; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.junit.Before; +import org.junit.Test; +import org.w3c.dom.DOMException; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; +import com.hp.hpl.jena.query.ResultSet; + +/** @author konrad * */ +public class SPARQLTemplateBasedLearner3Test +{ + private static Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); + + private SPARQLTemplateBasedLearner3 oxfordLearner; + private SPARQLTemplateBasedLearner3 dbpediaLiveLearner; + + private ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + private ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + + SparqlEndpoint dbpediaLiveEndpoint; + SparqlEndpoint oxfordEndpoint; + + private SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); + private SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); + private SortedMap<Integer, Object> id2Answer = new TreeMap<Integer, Object>(); + + private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} + private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + + private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) throws MalformedURLException + { + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + + SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + MappingBasedIndex mappingIndex= new MappingBasedIndex( + this.getClass().getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); + + Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + return kb; + } + + private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) throws MalformedURLException + { + SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); + resourcesIndex.setPrimarySearchField("label"); + // resourcesIndex.setSortField("pagerank"); + Index classesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_classes"); + Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); + + MappingBasedIndex mappingIndex= new MappingBasedIndex( + this.getClass().getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("test/dbpedia_dataproperty_mappings.txt").getPath(), + this.getClass().getClassLoader().getResource("test/dbpedia_objectproperty_mappings.txt").getPath() + ); + + Knowledgebase kb = new Knowledgebase(dbpediaLiveEndpoint, "DBpedia Live", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + return kb; + } + + @Before + public void setup() throws MalformedURLException + { + dbpediaLiveEndpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + +// oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); +// oxfordLearner = new SPARQLTemplateBasedLearner3(createOxfordKnowledgebase(oxfordCache)); + } + + private void readQueries(File file) + { + logger.info("Reading file containing queries and answers..."); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(file); + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + int id; + String question; + String query; + Set<String> answers; + + for(int i = 0; i < questionNodes.getLength(); i++){ + Element questionNode = (Element) questionNodes.item(i); + //read question ID + id = Integer.valueOf(questionNode.getAttribute("id")); + //Read question + question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + //Read SPARQL query + query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); +// //Read answers +// answers = new HashSet<String>(); +// NodeList aswersNodes = questionNode.getElementsByTagName("answer"); +// for(int j = 0; j < aswersNodes.getLength(); j++){ +// Element answerNode = (Element) aswersNodes.item(j); +// answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); +// } + + id2Question.put(id, question); + id2Query.put(id, query); +// question2Answers.put(question, answers); + + } + } catch (DOMException e) { + e.printStackTrace(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } catch (SAXException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } +// StringBuilder sb = new StringBuilder(); +// for(Entry<Integer, String> e : id2Question.entrySet()){ +// sb.append(e.getKey()+ ": " + extractSentence(e.getValue()) + "\n"); +// } +// try { +// BufferedWriter out = new BufferedWriter(new FileWriter("questions.txt")); +// out.write(sb.toString()); +// out.close(); +// } +// catch (IOException e) +// { +// System.out.println("Exception "); +// +// } + logger.info("Done."); + } + + @Test public void testDBpedia() throws NoTemplateFoundException, ComponentInitException + { + // get question and answer from file + readQueries(new File(getClass().getClassLoader().getResource("/tbsl/evaluation/qald2-dbpedia-train.xml").getPath())); + dbpediaLiveLearner.init(); + + dbpediaLiveLearner.setQuestion("houses with more than 2 bedrooms"); + dbpediaLiveLearner.learnSPARQLQueries(); + String learnedQuery = oxfordLearner.getBestSPARQLQuery(); + + //fail("Not yet implemented"); + } + + +// @Test public void test() throws NoTemplateFoundException, ComponentInitException +// { +// // get question and answer from file +// +// oxfordLearner.init(); +// oxfordLearner.setQuestion("houses with more than 2 bedrooms"); +// oxfordLearner.learnSPARQLQueries(); +// String learnedQuery = oxfordLearner.getBestSPARQLQuery(); +// +// //fail("Not yet implemented"); +// } + +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-25 14:46:22
|
Revision: 3805 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3805&view=rev Author: kirdie Date: 2012-07-25 14:46:12 +0000 (Wed, 25 Jul 2012) Log Message: ----------- updating old reference test data complete. reading reference data complete. generating test data from sparql endpoints nearly complete. still to do: save first run of test data and compare further test runs with it, letting the test pass if all correctly learned queries from the first run still work. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-25 10:33:43 UTC (rev 3804) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-25 14:46:12 UTC (rev 3805) @@ -95,6 +95,7 @@ public class SPARQLTemplateBasedLearner3 implements SparqlQueryLearningAlgorithm{ + // TODO: is it possible to use this learner concurrently? and if not would it be easy to implement it or at least a copy constructor? enum Mode{ BEST_QUERY, BEST_NON_EMPTY_QUERY Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-25 10:33:43 UTC (rev 3804) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-25 14:46:12 UTC (rev 3805) @@ -1,21 +1,35 @@ package org.dllearner.algorithm.tbsl.learning; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.Collections; import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; @@ -37,18 +51,169 @@ import org.xml.sax.SAXException; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -/** @author konrad * */ +/** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. + * The qald2 endpoint is not used because it may not always be available. + * To speed up the process at first the test file is read and an updated copy of it is saved that + * only contains the questions where the reference query does not return a nonempty list of resources. + * This could be questions which return literals, ask queries, queries which have no results in the DBpedia endpoint + * and queries that cause errors. This updated test file contains the reference answers as well and is only created once. + * Because there are multiple queries that are not all valid at first, further test runs are compared against the first run. + * The updated test data and the test runs are saved in the cache folder in the same format as the original test data + * (an xml with the tags question, query and answer). + * A test fails if it generates questions whose generated queries fail while in the first test run it worked. + * Because the logging in the dl-learner is so verbose (TODO: rewrite all prints to logging statements), the + * logging output is also wrote to the file log/#classname. + * @author Konrad Höffner + * **/ public class SPARQLTemplateBasedLearner3Test { + @Test public void testDBpedia() throws ParserConfigurationException, SAXException, IOException, TransformerException + {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} + //@Test public void testOxford() {test(new File(""),"");} + + public void test(File file, String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + { + String dir = "cache/"+getClass().getSimpleName()+"/"; + new File(dir).mkdirs(); + File updatedFile=new File(dir+"updated_"+file.getName()); + if(!updatedFile.exists()) {generateUpdatedFile(file,updatedFile,endpoint);} + + QueryTestData savedTestData = readQueries(updatedFile); + QueryTestData newTestData = generateQueries(updatedFile); + Diff QueryTestDataDiff = diffTestQueries(savedTestData,newTestData); + } + + /** + * @param savedTestData + * @param newTestData + * @return + */ + private Diff diffTestQueries(QueryTestData savedTestData, QueryTestData newTestData) + { + // TODO Auto-generated method stub + return null; + } + + private class Diff + { + + } + + /** + * @param updatedFile + * @return + */ + private QueryTestData generateQueries(File updatedFile) + { + // TODO Auto-generated method stub + return null; + } + + /** + * @param file + * @param updatedFile + * @throws ParserConfigurationException + * @throws IOException + * @throws SAXException + * @throws TransformerException + */ + private void generateUpdatedFile(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + { + logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(originalFile); + + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + List<Element> questionElementsToDelete = new LinkedList<Element>(); + int id; + String question; + String query; + // Set<String> answers; + + for(int i = 0; i < questionNodes.getLength(); i++) + { + Element questionNode = (Element) questionNodes.item(i); + //keep the id to aid comparison between original and updated files + id = Integer.valueOf(questionNode.getAttribute("id")); + //Read question + + question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + //Read SPARQL query + query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); + // //Read answers + // answers = new HashSet<String>(); + // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); + // for(int j = 0; j < aswersNodes.getLength(); j++){ + // Element answerNode = (Element) aswersNodes.item(j); + // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); + // } + + if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) + { + Set<String> uris = getUris(endpoint, query); + if(!uris.isEmpty()) + { + // remove reference answers of the benchmark because they are obtained from an other endpoint + Element existingAnswersElement = (Element)questionNode.getElementsByTagName("answers").item(0); // there is at most one "answers"-element + if(existingAnswersElement!=null) {questionNode.removeChild(existingAnswersElement);} + + Element answersElement = doc.createElement("answers"); + questionNode.appendChild(answersElement); + for(String uri:uris) + { + Element answerElement = doc.createElement("answer"); + answerElement.setTextContent(uri); + answersElement.appendChild(answerElement); + } + System.out.print('.'); + continue; + } + } + // no answers gotten, mark for deletion + questionElementsToDelete.add(questionNode); + System.out.print('x'); + } + for(Element element: questionElementsToDelete) {doc.getDocumentElement().removeChild(element);} + + TransformerFactory tFactory = + TransformerFactory.newInstance(); + Transformer transformer = tFactory.newTransformer(); + + DOMSource source = new DOMSource(doc); + StreamResult result = new StreamResult(new FileOutputStream(updatedFile)); + transformer.transform(source, result); + +// catch (DOMException e) { +// e.printStackTrace(); +// } catch (ParserConfigurationException e) { +// e.printStackTrace(); +// } catch (SAXException e) { +// e.printStackTrace(); +// } catch (IOException e) { +// e.printStackTrace(); +// } + } + + + int correctMatches = 0; + int numberOfNoTemplateFoundExceptions = 0; + int numberOfOtherExceptions = 0; + // int successfullTestThreadRuns = 0; + /** */ private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; private static Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); - private SPARQLTemplateBasedLearner3 oxfordLearner; - private SPARQLTemplateBasedLearner3 dbpediaLiveLearner; + // private SPARQLTemplateBasedLearner3 oxfordLearner; + // private SPARQLTemplateBasedLearner3 dbpediaLiveLearner; private ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); @@ -96,18 +261,25 @@ return kb; } - @Before - public void setup() throws MalformedURLException - { - logger.setLevel(Level.ALL); - // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - // oxfordLearner = new SPARQLTemplateBasedLearner3(createOxfordKnowledgebase(oxfordCache)); - } + @Before + public void setup() throws IOException + { + logger.setLevel(Level.ALL); // TODO: remove when finishing implementation of this class + logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); + // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // oxfordLearner = new SPARQLTemplateBasedLearner3(createOxfordKnowledgebase(oxfordCache)); + } - private void readQueries(final File file,final SortedMap<Integer, String> id2Question,final SortedMap<Integer, String> id2Query) + private class QueryTestData { - id2Question.clear(); - id2Query.clear(); + public SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); + public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); + public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + } + + private QueryTestData readQueries(final File file) + { + QueryTestData testData = new QueryTestData(); logger.info("Reading file containing queries and answers..."); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); @@ -116,11 +288,12 @@ doc.getDocumentElement().normalize(); NodeList questionNodes = doc.getElementsByTagName("question"); int id; - String question; - String query; - Set<String> answers; - for(int i = 0; i < questionNodes.getLength(); i++){ + for(int i = 0; i < questionNodes.getLength(); i++) + { + String question; + String query; + Set<String> answers = new HashSet<String>(); Element questionNode = (Element) questionNodes.item(i); //read question ID id = Integer.valueOf(questionNode.getAttribute("id")); @@ -138,8 +311,19 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - id2Question.put(id, question); - id2Query.put(id, query); + testData.id2Question.put(id, question); + testData.id2Query.put(id, query); + Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); + if(answersElement!=null) + { + NodeList answerElements = answersElement.getElementsByTagName("answer"); + for(int j=0; j<answerElements.getLength();j++) + { + String answer = ((Element)answerElements.item(j)).getTextContent(); + answers.add(answer); + } + testData.id2Answers.put(id, answers); + } } // question2Answers.put(question, answers); @@ -168,83 +352,180 @@ // // } logger.info("Done."); + return testData; } - private Set<String> getURIs(String endpoint, String query) + private Set<String> getUris(String endpoint, String query) { + if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); ResultSet rs = qe.execSelect(); - while(rs.hasNext()) - { - QuerySolution qs = rs.nextSolution(); - String uri = qs.getResource("?uri").getURI(); - uris.add(uri); - } + String variable = "?uri"; + resultsetloop: + while(rs.hasNext()) + { + QuerySolution qs = rs.nextSolution(); + RDFNode node = qs.get(variable); + if(node!=null&&node.isResource()) + { + String uri=node.asResource().getURI(); + uris.add(uri); + } + else // there is no variable "uri" + { + // try to guess the correct variable by using the first one which is assigned to a resource + for(Iterator<String> it = qs.varNames();it.hasNext();) + { + String varName = it.next(); + RDFNode node2 = qs.get(varName); + if(node2.isResource()) + { + variable = "?"+varName; + String uri=node2.asResource().getURI(); + uris.add(uri); + continue resultsetloop; + } + } + return Collections.<String>emptySet(); // we didn't a resource for the first query solution - give up and don't look in the others + } + } return uris; } - @Test public void testDBpedia() throws NoTemplateFoundException, ComponentInitException, MalformedURLException + private class TestQueryThread implements Runnable { - SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); - SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); - SortedMap<Integer, Object> id2Answer = new TreeMap<Integer, Object>(); + private String question; + private String referenceQuery; + public TestQueryThread(String question, String referenceQuery) { - String s = "tbsl/evaluation/qald2-dbpedia-train.xml"; - URL url = getClass().getClassLoader().getResource(s); - assertFalse("resource not found: "+s,url==null); - readQueries(new File(url.getPath()),id2Question,id2Query); + this.question=question; + this.referenceQuery=referenceQuery; } - assertTrue("no questions loaded",id2Question.size()>0); - logger.info(id2Question.size()+" questions loaded."); - assertTrue(String.format("no number of questions (%n) != number of queries (%n).",id2Question.size(),id2Query.size()), - id2Question.size()==id2Query.size()); + // String referenceQuery = id2Query.get(i); + // String question = id2Question.get(i); + @Override public void run() + { - // get question and answer from file - dbpediaLiveEndpoint = new SparqlEndpoint(new URL(DBPEDIA_LIVE_ENDPOINT_URL_STRING), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); - - dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - dbpediaLiveLearner.init(); - - // TODO: use thread pools - for(int i: id2Query.keySet()) - { - if(i>3) break; // TODO: remove - String question = id2Question.get(i); logger.trace("question: "+question); - String referenceQuery = id2Query.get(i); + // TODO: check for query isomorphism and leave out result comparison if possible // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) // get the answers for the gold standard query logger.trace("reference query: "+referenceQuery); - - Set<String> referenceURIs = getURIs(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); - - // learn query - dbpediaLiveLearner.setQuestion(question); - dbpediaLiveLearner.learnSPARQLQueries(); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - - logger.trace(learnedQuery); - - Set<String> learnedURIs = getURIs(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); - - logger.trace(referenceURIs); - logger.trace(learnedURIs); - assertTrue(referenceURIs.equals(learnedURIs)); + + try + { + Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); + + // learn query + SPARQLTemplateBasedLearner3 dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + dbpediaLiveLearner.init(); + dbpediaLiveLearner.setQuestion(question); + dbpediaLiveLearner.learnSPARQLQueries(); + String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + + logger.trace(learnedQuery); + + Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); + + logger.trace("referenced uris: "+referenceURIs); + logger.trace("learned uris: "+learnedURIs); + + boolean correctMatch = referenceURIs.equals(learnedURIs); + logger.trace(correctMatch?"matches":"doesn't match"); + if(correctMatch) {synchronized(this) {correctMatches++;}} + } + catch(NoTemplateFoundException e) + { + synchronized(this) {numberOfNoTemplateFoundExceptions++;} + logger.warn(String.format("no template found for question \"%s\"",question)); + } + catch(Exception e) + { + synchronized(this) {numberOfOtherExceptions++;} + logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + e.printStackTrace(); + // maybe the exception has corrupted the learner? better create a new one + // + } // get the answers for the learned query // compare gold standard query and learned query answers } - -// dbpediaLiveLearner.setQuestion("houses with more than 2 bedrooms"); -// dbpediaLiveLearner.learnSPARQLQueries(); -// String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); -// logger.trace(learnedQuery); - //fail("Not yet implemented"); } + private void updateFile(File originalFile, File updatedFile, String endpoint) + { + + + } + +// private void test(File file) throws MalformedURLException, InterruptedException +// { +// SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); +// SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); +// SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); +// +// { +// // URL url = getClass().getClassLoader().getResource(s); +// // assertFalse("resource not found: "+s,url==null); +// // readQueries(new File(url.getPath()),id2Question,id2Query); +// readQueries(file); +// } +// assertTrue("no questions loaded",id2Question.size()>0); +// logger.info(id2Question.size()+" questions loaded."); +// assertTrue(String.format("number of questions (%d) != number of queries (%d).",id2Question.size(),id2Query.size()), +// id2Question.size()==id2Query.size()); +// +// // get question and answer from file +// dbpediaLiveEndpoint = new SparqlEndpoint(new URL(DBPEDIA_LIVE_ENDPOINT_URL_STRING), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); +// +// // dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); +// // dbpediaLiveLearner.init(); +// +// // TODO: use thread pools +// ExecutorService service = Executors.newFixedThreadPool(10); +// for(int i: id2Query.keySet()) +// { +// Runnable r = new TestQueryThread(id2Question.get(i),id2Query.get(i)); +// service.execute(r); +// } +// boolean timeout =!service.awaitTermination(600, TimeUnit.SECONDS); +// +// logger.info(timeout?"timeout":"finished all threads"); +// if(numberOfNoTemplateFoundExceptions>0) {logger.warn(numberOfNoTemplateFoundExceptions+" NoTemplateFoundExceptions");} +// if(numberOfOtherExceptions>0) {logger.error(numberOfOtherExceptions+" other exceptions");} +// assertTrue(String.format("only %d/%d correct answers",correctMatches,id2Query.size()),correctMatches==id2Query.size()); +// +// // dbpediaLiveLearner.setQuestion("houses with more than 2 bedrooms"); +// // dbpediaLiveLearner.learnSPARQLQueries(); +// // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); +// // logger.trace(learnedQuery); +// //fail("Not yet implemented"); +// } + +// @Test public void testDBpediaX() throws NoTemplateFoundException, ComponentInitException, MalformedURLException, InterruptedException +// { +// // original file - qald benchmark xml +// // updated file - delete questions giving no nonempty list of resources (literals, ask query, no result or error) +// final String originalDirName = "tbsl/evaluation"; +// final String updatedDirName = "cache"; +// final File processedDir = new File(updatedDirName); +// +// if(!processedDir.exists()) {processedDir.mkdirs();} +// +// final String originalFilename = "qald2-dbpedia-train.xml"; +// final String updatedFilename = "processed_"+originalFilename; +// final File originalFile = new File(originalDirName+'/'+originalFilename); +// final File updatedFile = new File(updatedDirName+'/'+updatedFilename); +// +// if(!updatedFile.exists()) {updateFile(originalFile,updatedFile,DBPEDIA_LIVE_ENDPOINT_URL_STRING);} +// +// test(updatedFile); +// +// } // @Test public void test() throws NoTemplateFoundException, ComponentInitException // { // // get question and answer from file @@ -256,5 +537,4 @@ // // //fail("Not yet implemented"); // } - } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-26 11:01:32
|
Revision: 3806 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3806&view=rev Author: kirdie Date: 2012-07-26 11:01:22 +0000 (Thu, 26 Jul 2012) Log Message: ----------- more work on junit test. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java trunk/components-ext/src/main/resources/log4j.properties trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -691,8 +691,7 @@ allTypes.add(type); if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - System.err.println("DROPPING: \n" + q.toString()); + drop = true; } else { } @@ -1258,7 +1257,7 @@ SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); learner.init(); - String question = "Give me all books written by Dan Brown"; + String question = "What is the highest mountain?"; learner.setQuestion(question); learner.learnSPARQLQueries(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -1023,10 +1023,7 @@ learner.learnSPARQLQueries(); System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); - System.out.println(learner.getLearnedPosition()); - + System.out.println(learner.getLearnedPosition()); } - - -} +} \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Parser.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -79,11 +79,11 @@ if (SHOW_GRAMMAR) { - logger.trace(parseGrammar); + logger.debug(parseGrammar); } if (SHOW_LEXICAL_COVERAGE) { - logger.trace("# OF TREES FOUND: " + parseGrammar.size()); - logger.trace("# OF INPUT TOKENS: " + n); + logger.debug("# OF TREES FOUND: " + parseGrammar.size()); + logger.debug("# OF INPUT TOKENS: " + n); } List<Pair<TreeNode, Short>> initTrees = parseGrammar.getInitTrees(); @@ -94,7 +94,7 @@ internalParse(parseGrammar.getDPInitTrees(), n); } - if (VERBOSE) logger.trace("Constructed " + derivationTrees.size() + " derivation trees.\n"); + if (VERBOSE) logger.debug("Constructed " + derivationTrees.size() + " derivation trees.\n"); return derivationTrees; } @@ -130,11 +130,11 @@ if (SHOW_GRAMMAR) { - logger.trace(parseGrammar); + logger.debug(parseGrammar); } if (SHOW_LEXICAL_COVERAGE) { - logger.trace("# OF TREES FOUND: " + parseGrammar.size()); - logger.trace("# OF INPUT TOKENS: " + n); + logger.debug("# OF TREES FOUND: " + parseGrammar.size()); + logger.debug("# OF INPUT TOKENS: " + n); } List<Pair<TreeNode, Short>> initTrees = parseGrammar.getInitTrees(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/Templator.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -382,11 +382,10 @@ try { Template temp = d2s.convert(drs,slots); + if (temp == null) {continue;} temp = temp.checkandrefine(); - if (temp == null) { - continue; - } + if (USE_WORDNET) { // find WordNet synonyms List<String> newwords; String word; Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -116,8 +116,7 @@ } else { solrString += queryString; } - } - System.out.println(solrString); + } SolrQuery query = new SolrQuery(solrString); query.setRows(limit); query.setStart(offset); Modified: trunk/components-ext/src/main/resources/log4j.properties =================================================================== --- trunk/components-ext/src/main/resources/log4j.properties 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/main/resources/log4j.properties 2012-07-26 11:01:22 UTC (rev 3806) @@ -40,8 +40,8 @@ ####TBSL log4j.category.org.dllearner.algorithm.tbsl=INFO -log4j.category.org.dllearner.algorithm.tbsl.ltag.parser=TRACE -log4j.category.org.dllearner.algorithm.tbsl.templator=TRACE +log4j.category.org.dllearner.algorithm.tbsl.ltag.parser=WARN +log4j.category.org.dllearner.algorithm.tbsl.templator=WARN ####SOLR log4j.category.org.apache.solr.level = OFF Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-25 14:46:12 UTC (rev 3805) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-26 11:01:22 UTC (rev 3806) @@ -1,6 +1,7 @@ package org.dllearner.algorithm.tbsl.learning; import static org.junit.Assert.assertTrue; +import org.ini4j.Options; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -26,10 +27,16 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import opennlp.tools.postag.POSTagger; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; +import org.dllearner.algorithm.tbsl.ltag.parser.Parser; +import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.WordNet; +import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; @@ -44,6 +51,7 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.junit.Before; import org.junit.Test; +import org.openjena.atlas.logging.Log; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -69,12 +77,12 @@ * @author Konrad Höffner * **/ public class SPARQLTemplateBasedLearner3Test -{ - @Test public void testDBpedia() throws ParserConfigurationException, SAXException, IOException, TransformerException +{ + @Test public void testDBpedia() throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} //@Test public void testOxford() {test(new File(""),"");} - public void test(File file, String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + public void test(File file, String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); @@ -82,34 +90,80 @@ if(!updatedFile.exists()) {generateUpdatedFile(file,updatedFile,endpoint);} QueryTestData savedTestData = readQueries(updatedFile); - QueryTestData newTestData = generateQueries(updatedFile); - Diff QueryTestDataDiff = diffTestQueries(savedTestData,newTestData); + QueryTestData newTestData = generateTestData(savedTestData.id2Question); + Diff QueryTestDataDiff = diffTestData(savedTestData,newTestData); } - + /** * @param savedTestData * @param newTestData * @return */ - private Diff diffTestQueries(QueryTestData savedTestData, QueryTestData newTestData) + private Diff diffTestData(QueryTestData d, QueryTestData e) { +// if(d.id2Question.size()!=e.id2Question.size()) + {logger.info("comparing test data D against E. number of questions: "+d.id2Question.size()+" vs "+e.id2Question.size());} + + Set<Integer> dMinusE = new HashSet<Integer>(d.id2Question.keySet()); + dMinusE.removeAll(e.id2Question.keySet()); + if(!dMinusE.isEmpty()) logger.info("questions D/E: "+dMinusE+" ("+dMinusE.size()+" elements)"); + + Set<Integer> eMinusD = new HashSet<Integer>(e.id2Question.keySet()); + eMinusD.removeAll(d.id2Question.keySet()); + if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); + + Set<Integer> intersection = new HashSet<Integer>(d.id2Question.keySet()); + intersection.retainAll(e.id2Question.keySet()); + + if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); + + // TODO Auto-generated method stub return null; } private class Diff { - + } - - /** - * @param updatedFile - * @return + + /** + * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateQueries(File updatedFile) + private QueryTestData generateTestData(SortedMap<Integer, String> id2Question) throws MalformedURLException, ComponentInitException { + QueryTestData testData = new QueryTestData(); + // -- only create the learner parameters once to save time -- + PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + WordNet wordnet = new WordNet(); + Options options = new Options(); + // ---------------------------------------------------------- + int successes = 0; + for(int i:id2Question.keySet()) + { + String question = id2Question.get(i); + logger.debug("generating query for question \""+question+"\", id "+i); + long start = System.currentTimeMillis(); + SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); + + dbpediaLiveLearner.init(); + dbpediaLiveLearner.setQuestion(question); + + try{dbpediaLiveLearner.learnSPARQLQueries();} + catch(NoTemplateFoundException e) {continue;} + catch(Exception e) {logger.error("Error processing question "+question,e);continue;} + successes++; + testData.id2Question.put(i, question); + String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + testData.id2Query.put(i, learnedQuery); + + long end = System.currentTimeMillis(); + logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + + } + logger.info(String.format("Successfully learned queries for %d of %d questions.",successes,id2Question.size())); // TODO Auto-generated method stub - return null; + return testData; } /** @@ -124,81 +178,81 @@ { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(originalFile); + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - doc.getDocumentElement().normalize(); - NodeList questionNodes = doc.getElementsByTagName("question"); - List<Element> questionElementsToDelete = new LinkedList<Element>(); - int id; - String question; - String query; - // Set<String> answers; + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(originalFile); - for(int i = 0; i < questionNodes.getLength(); i++) - { - Element questionNode = (Element) questionNodes.item(i); - //keep the id to aid comparison between original and updated files - id = Integer.valueOf(questionNode.getAttribute("id")); - //Read question + doc.getDocumentElement().normalize(); + NodeList questionNodes = doc.getElementsByTagName("question"); + List<Element> questionElementsToDelete = new LinkedList<Element>(); + int id; + String question; + String query; + // Set<String> answers; - question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); - //Read SPARQL query - query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); - // //Read answers - // answers = new HashSet<String>(); - // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); - // for(int j = 0; j < aswersNodes.getLength(); j++){ - // Element answerNode = (Element) aswersNodes.item(j); - // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); - // } + for(int i = 0; i < questionNodes.getLength(); i++) + { + Element questionNode = (Element) questionNodes.item(i); + //keep the id to aid comparison between original and updated files + id = Integer.valueOf(questionNode.getAttribute("id")); + //Read question - if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) + question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); + //Read SPARQL query + query = ((Element)questionNode.getElementsByTagName("query").item(0)).getChildNodes().item(0).getNodeValue().trim(); + // //Read answers + // answers = new HashSet<String>(); + // NodeList aswersNodes = questionNode.getElementsByTagName("answer"); + // for(int j = 0; j < aswersNodes.getLength(); j++){ + // Element answerNode = (Element) aswersNodes.item(j); + // answers.add(((Element)answerNode.getElementsByTagName("uri").item(0)).getChildNodes().item(0).getNodeValue().trim()); + // } + + if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) + { + Set<String> uris = getUris(endpoint, query); + if(!uris.isEmpty()) { - Set<String> uris = getUris(endpoint, query); - if(!uris.isEmpty()) - { - // remove reference answers of the benchmark because they are obtained from an other endpoint - Element existingAnswersElement = (Element)questionNode.getElementsByTagName("answers").item(0); // there is at most one "answers"-element - if(existingAnswersElement!=null) {questionNode.removeChild(existingAnswersElement);} + // remove reference answers of the benchmark because they are obtained from an other endpoint + Element existingAnswersElement = (Element)questionNode.getElementsByTagName("answers").item(0); // there is at most one "answers"-element + if(existingAnswersElement!=null) {questionNode.removeChild(existingAnswersElement);} - Element answersElement = doc.createElement("answers"); - questionNode.appendChild(answersElement); - for(String uri:uris) - { - Element answerElement = doc.createElement("answer"); - answerElement.setTextContent(uri); - answersElement.appendChild(answerElement); - } - System.out.print('.'); - continue; - } + Element answersElement = doc.createElement("answers"); + questionNode.appendChild(answersElement); + for(String uri:uris) + { + Element answerElement = doc.createElement("answer"); + answerElement.setTextContent(uri); + answersElement.appendChild(answerElement); + } + System.out.print('.'); + continue; } - // no answers gotten, mark for deletion - questionElementsToDelete.add(questionNode); - System.out.print('x'); } - for(Element element: questionElementsToDelete) {doc.getDocumentElement().removeChild(element);} + // no answers gotten, mark for deletion + questionElementsToDelete.add(questionNode); + System.out.print('x'); + } + for(Element element: questionElementsToDelete) {doc.getDocumentElement().removeChild(element);} - TransformerFactory tFactory = - TransformerFactory.newInstance(); - Transformer transformer = tFactory.newTransformer(); + TransformerFactory tFactory = + TransformerFactory.newInstance(); + Transformer transformer = tFactory.newTransformer(); - DOMSource source = new DOMSource(doc); - StreamResult result = new StreamResult(new FileOutputStream(updatedFile)); - transformer.transform(source, result); - -// catch (DOMException e) { -// e.printStackTrace(); -// } catch (ParserConfigurationException e) { -// e.printStackTrace(); -// } catch (SAXException e) { -// e.printStackTrace(); -// } catch (IOException e) { -// e.printStackTrace(); -// } + DOMSource source = new DOMSource(doc); + StreamResult result = new StreamResult(new FileOutputStream(updatedFile)); + transformer.transform(source, result); + + // catch (DOMException e) { + // e.printStackTrace(); + // } catch (ParserConfigurationException e) { + // e.printStackTrace(); + // } catch (SAXException e) { + // e.printStackTrace(); + // } catch (IOException e) { + // e.printStackTrace(); + // } } @@ -212,37 +266,53 @@ private static Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); - // private SPARQLTemplateBasedLearner3 oxfordLearner; - // private SPARQLTemplateBasedLearner3 dbpediaLiveLearner; + // private SPARQLTemplateBasedLearner2 oxfordLearner; + // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; private ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); - SparqlEndpoint dbpediaLiveEndpoint; - SparqlEndpoint oxfordEndpoint; + private Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); + + static SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + //static SparqlEndpoint oxfordEndpoint; private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} - private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} +// private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} - private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) throws MalformedURLException - { - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); +// @Test public void benchmarkCreateOxfordKnowledgeBase() +// { +// long start = System.currentTimeMillis(); +// for(int i=0;i<1000;i++) +// { +// createOxfordKnowledgebase(oxfordCache); +// } +// long end = System.currentTimeMillis(); +// long diff = end-start; +// System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); +// } - SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); - SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); - SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); - MappingBasedIndex mappingIndex= new MappingBasedIndex( - this.getClass().getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() - ); +// private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) +// { +// URL url; +// try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} +// SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); +// +// SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); +// SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); +// SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); +// MappingBasedIndex mappingIndex= new MappingBasedIndex( +// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), +// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), +// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), +// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() +// ); +// +// Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); +// return kb; +// } - Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); - return kb; - } - - private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) throws MalformedURLException + private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); resourcesIndex.setPrimarySearchField("label"); @@ -251,10 +321,10 @@ Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); MappingBasedIndex mappingIndex= new MappingBasedIndex( - this.getClass().getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("test/dbpedia_dataproperty_mappings.txt").getPath(), - this.getClass().getClassLoader().getResource("test/dbpedia_objectproperty_mappings.txt").getPath() + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_class_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_resource_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_dataproperty_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("test/dbpedia_objectproperty_mappings.txt").getPath() ); Knowledgebase kb = new Knowledgebase(dbpediaLiveEndpoint, "DBpedia Live", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); @@ -264,10 +334,14 @@ @Before public void setup() throws IOException { + Logger.getRootLogger().setLevel(Level.WARN); + Logger.getLogger(Templator.class).setLevel(Level.WARN); + Logger.getLogger(Parser.class).setLevel(Level.WARN); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.ALL); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - // oxfordLearner = new SPARQLTemplateBasedLearner3(createOxfordKnowledgebase(oxfordCache)); + // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } private class QueryTestData @@ -276,7 +350,7 @@ public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); } - + private QueryTestData readQueries(final File file) { QueryTestData testData = new QueryTestData(); @@ -420,7 +494,7 @@ Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); // learn query - SPARQLTemplateBasedLearner3 dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); dbpediaLiveLearner.init(); dbpediaLiveLearner.setQuestion(question); dbpediaLiveLearner.learnSPARQLQueries(); @@ -462,70 +536,70 @@ } -// private void test(File file) throws MalformedURLException, InterruptedException -// { -// SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); -// SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); -// SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); -// -// { -// // URL url = getClass().getClassLoader().getResource(s); -// // assertFalse("resource not found: "+s,url==null); -// // readQueries(new File(url.getPath()),id2Question,id2Query); -// readQueries(file); -// } -// assertTrue("no questions loaded",id2Question.size()>0); -// logger.info(id2Question.size()+" questions loaded."); -// assertTrue(String.format("number of questions (%d) != number of queries (%d).",id2Question.size(),id2Query.size()), -// id2Question.size()==id2Query.size()); -// -// // get question and answer from file -// dbpediaLiveEndpoint = new SparqlEndpoint(new URL(DBPEDIA_LIVE_ENDPOINT_URL_STRING), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); -// -// // dbpediaLiveLearner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); -// // dbpediaLiveLearner.init(); -// -// // TODO: use thread pools -// ExecutorService service = Executors.newFixedThreadPool(10); -// for(int i: id2Query.keySet()) -// { -// Runnable r = new TestQueryThread(id2Question.get(i),id2Query.get(i)); -// service.execute(r); -// } -// boolean timeout =!service.awaitTermination(600, TimeUnit.SECONDS); -// -// logger.info(timeout?"timeout":"finished all threads"); -// if(numberOfNoTemplateFoundExceptions>0) {logger.warn(numberOfNoTemplateFoundExceptions+" NoTemplateFoundExceptions");} -// if(numberOfOtherExceptions>0) {logger.error(numberOfOtherExceptions+" other exceptions");} -// assertTrue(String.format("only %d/%d correct answers",correctMatches,id2Query.size()),correctMatches==id2Query.size()); -// -// // dbpediaLiveLearner.setQuestion("houses with more than 2 bedrooms"); -// // dbpediaLiveLearner.learnSPARQLQueries(); -// // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); -// // logger.trace(learnedQuery); -// //fail("Not yet implemented"); -// } + // private void test(File file) throws MalformedURLException, InterruptedException + // { + // SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); + // SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); + // SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + // + // { + // // URL url = getClass().getClassLoader().getResource(s); + // // assertFalse("resource not found: "+s,url==null); + // // readQueries(new File(url.getPath()),id2Question,id2Query); + // readQueries(file); + // } + // assertTrue("no questions loaded",id2Question.size()>0); + // logger.info(id2Question.size()+" questions loaded."); + // assertTrue(String.format("number of questions (%d) != number of queries (%d).",id2Question.size(),id2Query.size()), + // id2Question.size()==id2Query.size()); + // + // // get question and answer from file + // dbpediaLiveEndpoint = new SparqlEndpoint(new URL(DBPEDIA_LIVE_ENDPOINT_URL_STRING), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + // + // // dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + // // dbpediaLiveLearner.init(); + // + // // TODO: use thread pools + // ExecutorService service = Executors.newFixedThreadPool(10); + // for(int i: id2Query.keySet()) + // { + // Runnable r = new TestQueryThread(id2Question.get(i),id2Query.get(i)); + // service.execute(r); + // } + // boolean timeout =!service.awaitTermination(600, TimeUnit.SECONDS); + // + // logger.info(timeout?"timeout":"finished all threads"); + // if(numberOfNoTemplateFoundExceptions>0) {logger.warn(numberOfNoTemplateFoundExceptions+" NoTemplateFoundExceptions");} + // if(numberOfOtherExceptions>0) {logger.error(numberOfOtherExceptions+" other exceptions");} + // assertTrue(String.format("only %d/%d correct answers",correctMatches,id2Query.size()),correctMatches==id2Query.size()); + // + // // dbpediaLiveLearner.setQuestion("houses with more than 2 bedrooms"); + // // dbpediaLiveLearner.learnSPARQLQueries(); + // // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + // // logger.trace(learnedQuery); + // //fail("Not yet implemented"); + // } -// @Test public void testDBpediaX() throws NoTemplateFoundException, ComponentInitException, MalformedURLException, InterruptedException -// { -// // original file - qald benchmark xml -// // updated file - delete questions giving no nonempty list of resources (literals, ask query, no result or error) -// final String originalDirName = "tbsl/evaluation"; -// final String updatedDirName = "cache"; -// final File processedDir = new File(updatedDirName); -// -// if(!processedDir.exists()) {processedDir.mkdirs();} -// -// final String originalFilename = "qald2-dbpedia-train.xml"; -// final String updatedFilename = "processed_"+originalFilename; -// final File originalFile = new File(originalDirName+'/'+originalFilename); -// final File updatedFile = new File(updatedDirName+'/'+updatedFilename); -// -// if(!updatedFile.exists()) {updateFile(originalFile,updatedFile,DBPEDIA_LIVE_ENDPOINT_URL_STRING);} -// -// test(updatedFile); -// -// } + // @Test public void testDBpediaX() throws NoTemplateFoundException, ComponentInitException, MalformedURLException, InterruptedException + // { + // // original file - qald benchmark xml + // // updated file - delete questions giving no nonempty list of resources (literals, ask query, no result or error) + // final String originalDirName = "tbsl/evaluation"; + // final String updatedDirName = "cache"; + // final File processedDir = new File(updatedDirName); + // + // if(!processedDir.exists()) {processedDir.mkdirs();} + // + // final String originalFilename = "qald2-dbpedia-train.xml"; + // final String updatedFilename = "processed_"+originalFilename; + // final File originalFile = new File(originalDirName+'/'+originalFilename); + // final File updatedFile = new File(updatedDirName+'/'+updatedFilename); + // + // if(!updatedFile.exists()) {updateFile(originalFile,updatedFile,DBPEDIA_LIVE_ENDPOINT_URL_STRING);} + // + // test(updatedFile); + // + // } // @Test public void test() throws NoTemplateFoundException, ComponentInitException // { // // get question and answer from file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-26 14:21:26
|
Revision: 3807 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3807&view=rev Author: kirdie Date: 2012-07-26 14:21:14 +0000 (Thu, 26 Jul 2012) Log Message: ----------- test nearly finished. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/exploration/exploration_main/MainInterface.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -32,41 +32,41 @@ public class MainInterface { //private static int anzahlAbgeschickterQueries = 10; - + private static ArrayList<Template> global_template_list=new ArrayList<Template>(); private static BasicTemplator btemplator_global; private static SQLiteIndex myindex_global; private static WordNet wordnet_global; private static StanfordLemmatizer lemmatiser_global; private static String type_global=""; - + public static ArrayList<String> startQuestioning(String question,BasicTemplator btemplator,SQLiteIndex myindex, WordNet wordnet,StanfordLemmatizer lemmatiser) throws ClassNotFoundException, SQLException, IOException{ - + /* * true, if you have to push a button to get to the next module * false, goes through all */ boolean wait = false; //Setting.setThresholdSelect(0.5); - + if(Setting.isWaitModus())wait=true; - + TemplateBuilder templateObject = new TemplateBuilder(btemplator, myindex); ArrayList<Template> template_list = new ArrayList<Template>(); - + /* * Array List with the answers from the queries */ ArrayList<String> answers = new ArrayList<String>(); - - + + /* * generate Templates! */ template_list=templateObject.createTemplates(question); - + answers = singleSteps(myindex, wordnet, lemmatiser, wait, template_list); - + return answers; } @@ -85,17 +85,17 @@ private static ArrayList<String> singleSteps(SQLiteIndex myindex, WordNet wordnet, StanfordLemmatizer lemmatiser, boolean wait, ArrayList<Template> template_list) - throws IOException { - + throws IOException { + ArrayList<String> answers = new ArrayList<String>(); /* * generate Queries and test the first Time */ ArrayList<QueryPair> qp = new ArrayList<QueryPair>(); - + //generate QueryPair String Question=""; - + //TODO: parallel here? for(Template t : template_list){ Question=t.getQuestion(); @@ -113,7 +113,7 @@ if(!contain)qp.add(p); } } - + //sort QueryPairs qp=LinearSort.doSort(qp); qp=HeuristicSort.doHeuristicSort(qp, Question); @@ -124,7 +124,7 @@ for(QueryPair q : qp){ System.out.println(q.getQuery()+" rank:"+q.getRank()); } - + int anzahl=1; boolean go_on = true; for(QueryPair q : qp){ @@ -139,7 +139,7 @@ //if(qp.size()<3)go_on=true; System.out.println("Got Answer from Server with this Query: "+ q.getQuery()); if(Setting.isTagging()) write_ResourcePropertyInformation(q.getResource(),q.getPropertyName(),q.getProperty()); - + //printSingleQuery(q.getQuery(),Question); //go_on=true; boolean contains_uri=false; @@ -165,7 +165,7 @@ else answers.add(s); } } - + } } //if(checkAnswer(answer_tmp))answers.addAll(answer_tmp); @@ -173,26 +173,26 @@ } anzahl+=1; } - + System.out.println("\n Answer from Server: \n"); for(String answer:answers){ System.out.println(answer); } if(wait)DebugMode.waitForButton(); - - + + if(answers.isEmpty()&&Setting.getModuleStep()>=2){ - + answers.clear(); //Setting.setLevenstheinMin(0.65); //Setting.setAnzahlAbgeschickterQueries(10); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"LEVENSTHEIN","neu")); if(wait)DebugMode.waitForButton(); } - + if(answers.isEmpty()&&Setting.getModuleStep()>=3){ - + answers.clear(); //Setting.setAnzahlAbgeschickterQueries(10); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"WORDNET","neu")); @@ -201,31 +201,31 @@ if(answers.isEmpty()&&Setting.getModuleStep()>=4){ - + answers.clear(); //Setting.setAnzahlAbgeschickterQueries(10); //Setting.setThresholdSelect(0.2); answers.addAll(doStart(myindex, wordnet, lemmatiser, template_list,"RELATE","neu")); if(wait)DebugMode.waitForButton(); } - - + + if(answers.isEmpty()&&Setting.getModuleStep()>=5){ System.out.println("NO Answer from Server =>Start Query Manipulation"); answers.clear(); answers.addAll(stufe5(myindex,wordnet,lemmatiser,wait,template_list)); if(wait)DebugMode.waitForButton(); } - - - - - - + + + + + + /* * return answers! */ - + return answers; } @@ -234,13 +234,13 @@ - - - - + + + + private static ArrayList<String> doStart(SQLiteIndex myindex, WordNet wordnet, StanfordLemmatizer lemmatiser, ArrayList<Template> template_list, String type, String test) { ArrayList<String> answers = new ArrayList<String>(); @@ -248,9 +248,9 @@ boolean special=false; int anzahl; boolean go_on; - + System.out.println("No answer from direkt match, start "+type+"Modul"); - + /*ArrayList<Thread> thread_list = new ArrayList<Thread>(); ThreadGroup group = new ThreadGroup("QA-Threads"); int anzahl_thread=0; @@ -260,7 +260,7 @@ wordnet_global=wordnet; lemmatiser_global=lemmatiser; type_global=type; - + for(Template t : template_list){ final int anzahl_thread_new=anzahl_thread; @@ -270,11 +270,11 @@ { String blub=do_something(anzahl_thread_new); }; - + thread_list.add(t1); t1.start(); - - + + } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -286,14 +286,14 @@ e.printStackTrace(); } anzahl_thread+=1; - + } - */ - + */ + /* * NOw wait until all are finished */ - + /*for(int i =0; i<thread_list.size();i++){ try { thread_list.get(i).join(); @@ -302,42 +302,42 @@ e.printStackTrace(); } }*/ - - + + for(Template t : template_list){ try{ - - ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); - - - for(ArrayList<Hypothesis> l_h : t.getHypothesen()){ - ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); - generated_hypothesis= IterationModule.new_iteration(t.getElm(),l_h,t.getCondition(),type,myindex,wordnet,lemmatiser); - for(ArrayList<Hypothesis> h_t : generated_hypothesis){ - ArrayList<Hypothesis> new_hypothesen_set = new ArrayList<Hypothesis>(); - for(Hypothesis bla : h_t){ - new_hypothesen_set.add(bla); - } - hypothesenSetList.add(new_hypothesen_set); + + ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); + + + for(ArrayList<Hypothesis> l_h : t.getHypothesen()){ + ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); + generated_hypothesis= IterationModule.new_iteration(t.getElm(),l_h,t.getCondition(),type,myindex,wordnet,lemmatiser); + for(ArrayList<Hypothesis> h_t : generated_hypothesis){ + ArrayList<Hypothesis> new_hypothesen_set = new ArrayList<Hypothesis>(); + for(Hypothesis bla : h_t){ + new_hypothesen_set.add(bla); } - - //hypothesenSetList.addAll(blub); + hypothesenSetList.add(new_hypothesen_set); } - if(type.contains("WORDNET"))t.setHypothesenWordnet(hypothesenSetList); - if(type.contains("LEVENSTHEIN"))t.setHypothesenLevensthein(hypothesenSetList); - if(type.contains("RELATE"))t.setHypothesenRelate(hypothesenSetList); - + + //hypothesenSetList.addAll(blub); } - + if(type.contains("WORDNET"))t.setHypothesenWordnet(hypothesenSetList); + if(type.contains("LEVENSTHEIN"))t.setHypothesenLevensthein(hypothesenSetList); + if(type.contains("RELATE"))t.setHypothesenRelate(hypothesenSetList); + + } + //} catch (Exception e){ - + } - + } - - + + /* * Generate Queries and test queries */ @@ -359,12 +359,12 @@ if(!contain&&checkQuery(p.getQuery()))qp.add(p); } } - + //sort QueryPairs qp=LinearSort.doSort(qp); qp=HeuristicSort.doHeuristicSort(qp, Question); //printQueries(qp, type, Question); - + System.out.println("Following Querries were created:"); for(QueryPair q : qp){ System.out.println(q.getQuery()+" rank:"+q.getRank()); @@ -385,7 +385,7 @@ //else go_on=false; //go_on=true; go_on=false; - + System.out.println("Got Answer from Server with this Query: "+ q.getQuery()); if(Setting.isTagging()) write_ResourcePropertyInformation(q.getResource(),q.getPropertyName(),q.getProperty()); //printSingleQuery(q.getQuery(),Question); @@ -400,8 +400,8 @@ */ if(Question.toLowerCase().contains("which")) go_on=false; if(Question.toLowerCase().contains("who")) go_on=false; - - + + boolean contains_uri=false; for(String s : answer_tmp){ if(s.contains("http")){ @@ -413,8 +413,8 @@ for(String answer:answer_tmp){ System.out.println(answer); }*/ - - + + for(String s : answer_tmp){ if(checkAnswer(s)){ boolean double_result = false; @@ -437,7 +437,7 @@ //if(checkAnswer(answer_tmp))answers.addAll(answer_tmp); } } - + else if(q.getRank()>Setting.getThresholdAsk()&go_on &q.getQuery().contains("ASK")){ ArrayList<String> answer_tmp = new ArrayList<String>(); answer_tmp=ServerUtil.requestAnswerFromServer(q.getQuery()); @@ -455,8 +455,8 @@ go_on=true; } } - - + + boolean contains_uri=false; for(String s : answer_tmp){ if(s.contains("http")){ @@ -464,8 +464,8 @@ break; } } - - + + for(String s : answer_tmp){ if(checkAnswer(s)){ boolean double_result = false; @@ -501,12 +501,12 @@ for(String answer:answers){ System.out.println(answer); } - + return answers; } - - - + + + private static ArrayList<String> filterAnswer(ArrayList<String> answers, String Question){ if(Question.toLowerCase().contains("who")){ boolean contains_only_uri=true; @@ -521,29 +521,29 @@ new_answer.add(s); } } - + return new_answer; } else{ return answers; } } - - + + return answers; } private static boolean checkAnswer(String answer){ if(answer.contains("File:")||answer.contains(".png")||answer.contains("upload.wikimedia.org")||answer.contains("dbpedia.org/datatype/")||answer.contains("http://www.w3.org/2001/XMLSchema")||answer.contains("flickerwrappr/photos/")) return false; else return true; - + } - + private static boolean checkQuery(String query){ if(query.contains("wikiPageWiki")||query.contains("wikiPageExternal")||query.contains("wikiPageRedirects")|| query.contains("thumbnail")||query.contains("wikiPage")) return false; else return true; - + } - + private static void printQueries(ArrayList<QueryPair> qp, String type, String Question){ /*String dateiname="/home/swalter/Dokumente/Auswertung/CreatedQuery"+Setting.getLevenstheinMin()+".txt"; String result_string =""; @@ -558,7 +558,7 @@ catch (IOException e) { System.err.println("Error: " + e); } - + File file = new File(dateiname); BufferedWriter bw = null; try { @@ -571,18 +571,18 @@ querylist="\n Modul: "+type+"\nfor Question: "+ Question+"\n"; int anzahl= 0; /* - * write only the first 10 queries: - */ - /* for(QueryPair q : qp){ + * write only the first 10 queries: + */ + /* for(QueryPair q : qp){ if(anzahl<10){ querylist+=q.getQuery()+" "+q.getRank()+"\n"; anzahl+=1; } - + } - - + + try { bw.write(result_string+querylist); } catch (IOException e) { @@ -602,9 +602,9 @@ e.printStackTrace(); }*/ } - - - + + + private static void printSingleQuery(String query,String Question){ /*String dateiname="/home/swalter/Dokumente/Auswertung/WorkingQuery"+Setting.getLevenstheinMin()+".txt"; String result_string =""; @@ -619,7 +619,7 @@ catch (IOException e) { System.err.println("Error: " + e); } - + File file = new File(dateiname); BufferedWriter bw = null; try { @@ -628,8 +628,8 @@ // TODO Auto-generated catch block e2.printStackTrace(); } - + try { bw.write(result_string+Question+" "+query+"\n"); } catch (IOException e) { @@ -649,11 +649,11 @@ e.printStackTrace(); }*/ } - - - + + + private static ArrayList<String> stufe5(SQLiteIndex myindex, WordNet wordnet,StanfordLemmatizer lemmatiser, boolean wait,ArrayList<Template> template_list){ - + ArrayList<Template> new_template_list=new ArrayList<Template>(); ArrayList<String> answers=new ArrayList<String>(); /* @@ -668,7 +668,7 @@ if(condition.get(1).toLowerCase().equals("isa")) go_on=true; System.out.println("go_on:"+go_on); if(go_on){ - + String resource_variable=condition.get(0); String class_variable=condition.get(2); Hypothesis resource_h = null; @@ -686,11 +686,11 @@ class_h=h; } } - + } System.out.println("go_on_resource:"+go_on_resource); if(go_on_resource){ - + /* * manipulate Class variable to make a property from it */ @@ -703,82 +703,82 @@ small_h_list.add(resource_h); small_h_list.add(class_h); new_hypothesen_list.add(small_h_list); - + ArrayList<String> condition_new = new ArrayList<String>(); condition_new.add("?x"); condition_new.add("?y"); condition_new.add("?z"); - + ArrayList<ArrayList<String>> new_c_list = new ArrayList<ArrayList<String>>(); new_c_list.add(condition_new); - + Template new_Template = new Template(new_c_list, t.getQueryType(), "","" , "?z", "", "", t.getQuestion()); - + new_Template.setHypothesen(new_hypothesen_list); Elements elm = new Elements(new_Template.getCondition(),new_Template.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - new_Template.setElm(elm); - new_template_list.add(new_Template); - } - - Template template_reverse_conditions = new Template(new_Template.getCondition(),new_Template.getQueryType(), new_Template.getHaving(), new_Template.getFilter(), new_Template.getSelectTerm(), new_Template.getOrderBy(), new_Template.getLimit(), new_Template.getQuestion()); - template_reverse_conditions.setHypothesen(new_hypothesen_list); - - ArrayList<ArrayList<String>> condition_template_reverse_conditions = template_reverse_conditions.getCondition(); - ArrayList<ArrayList<String>> condition_reverse_new= new ArrayList<ArrayList<String>>(); + if(elm.isElementEmty()==false){ + //elm.printAll(); + new_Template.setElm(elm); + new_template_list.add(new_Template); + } + Template template_reverse_conditions = new Template(new_Template.getCondition(),new_Template.getQueryType(), new_Template.getHaving(), new_Template.getFilter(), new_Template.getSelectTerm(), new_Template.getOrderBy(), new_Template.getLimit(), new_Template.getQuestion()); + template_reverse_conditions.setHypothesen(new_hypothesen_list); - for (ArrayList<String> x : condition_template_reverse_conditions){ - ArrayList<String> new_list = new ArrayList<String>(); - new_list.add(x.get(2)); - new_list.add(x.get(1)); - new_list.add(x.get(0)); - condition_reverse_new.add(new_list); - } - - - template_reverse_conditions.setCondition(condition_reverse_new); - - Elements elm_reverse = new Elements(template_reverse_conditions.getCondition(),template_reverse_conditions.getHypothesen()); - if(elm_reverse.isElementEmty()==false){ - //elm.printAll(); - template_reverse_conditions.setElm(elm_reverse); - new_template_list.add(template_reverse_conditions); - } - - - + ArrayList<ArrayList<String>> condition_template_reverse_conditions = template_reverse_conditions.getCondition(); + ArrayList<ArrayList<String>> condition_reverse_new= new ArrayList<ArrayList<String>>(); + + + for (ArrayList<String> x : condition_template_reverse_conditions){ + ArrayList<String> new_list = new ArrayList<String>(); + new_list.add(x.get(2)); + new_list.add(x.get(1)); + new_list.add(x.get(0)); + condition_reverse_new.add(new_list); + } + + + template_reverse_conditions.setCondition(condition_reverse_new); + + Elements elm_reverse = new Elements(template_reverse_conditions.getCondition(),template_reverse_conditions.getHypothesen()); + if(elm_reverse.isElementEmty()==false){ + //elm.printAll(); + template_reverse_conditions.setElm(elm_reverse); + new_template_list.add(template_reverse_conditions); + } + + + } - - - - + + + + } } - - - - + + + + /* * only if condition.size==2 */ if(t.getCondition().size()==2){ System.out.println("Yeah, found two Conditions!"); - + /* * now look if one have the [isa][resource] or [resource][isa] case */ ArrayList<String> condition1=new ArrayList<String>(); ArrayList<String> condition2=new ArrayList<String>(); - + condition1=t.getCondition().get(0); condition2=t.getCondition().get(1); System.out.println("condition1:"+condition1); System.out.println("condition2:"+condition2); - + boolean go_on=false; - + if(condition1.get(1).toLowerCase().contains("isa")&&!condition2.get(1).toLowerCase().contains("isa")){ String resource1_variable=condition2.get(0); String resource2_variable=condition2.get(2); @@ -788,17 +788,17 @@ if(h.getType().toLowerCase().contains("resource")) go_on=true; } } - + } - + /*if(condition2.get(0).contains("resource/")||condition2.get(2).contains("resource/")){ go_on=true; } else go_on=false;*/ } - + else if(condition2.get(1).toLowerCase().contains("isa")){ - + String resource1_variable=condition1.get(0); String resource2_variable=condition1.get(2); for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ @@ -807,10 +807,10 @@ if(h.getType().toLowerCase().contains("resource")) go_on=true; } } - + } - - + + /* * in the conditions there is for sure no resource!!! */ @@ -820,28 +820,28 @@ else go_on=false;*/ } else go_on=false; - - + + System.out.println("Go_on:"+go_on); if(go_on==true){ - + /* * use now only the conditions WITHOUT the class */ ArrayList<ArrayList<Hypothesis>> new_hypothesen_list = new ArrayList<ArrayList<Hypothesis>>(); - + String resource_variable=null; for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ ArrayList<Hypothesis> t_h_l = new ArrayList<Hypothesis>(); - + for(Hypothesis h : h_l){ if(!h.getType().toLowerCase().contains("isa"))t_h_l.add(h); if(h.getType().toLowerCase().contains("resource"))resource_variable=h.getVariable(); } - + if(t_h_l.size()>0) new_hypothesen_list.add(t_h_l); } - + /* * New HypothesenList */ @@ -855,12 +855,12 @@ ArrayList<String> new_condition= new ArrayList<String>(); if(!condition1.get(1).toLowerCase().contains("isa")) new_condition=condition1; else new_condition=condition2; - + String new_SelectTerm=null; - + if(new_condition.get(0).contains(resource_variable)) new_SelectTerm=new_condition.get(2); else new_SelectTerm=new_condition.get(0); - + ArrayList<ArrayList<String>> new_c_list = new ArrayList<ArrayList<String>>(); new_c_list.add(new_condition); /* @@ -874,19 +874,19 @@ new_Template.setElm(t.getElm()); new_template_list.add(new_Template); //new_Template.printAll(); - + } - - + + } - + if(t.getCondition().size()>=30){ ArrayList<ArrayList<Hypothesis>> new_hypothesen_list = new ArrayList<ArrayList<Hypothesis>>(); for(ArrayList<Hypothesis> h_l :t.getHypothesen()){ /* * if greater 2, than it means, there are at least 3 propertys/resources or whatever */ - + /* * Resource ?x * Property ?y @@ -940,56 +940,56 @@ list_two.add(h_r); list_two.add(h_p2); new_hypothesen_list.add(list_two); - + } } } - + ArrayList<ArrayList<String>> condition_new=new ArrayList<ArrayList<String>>(); ArrayList<String> con = new ArrayList<String>(); con.add("?x"); con.add("?y"); con.add("?z"); condition_new.add(con); - + ArrayList<ArrayList<String>> condition_new_r=new ArrayList<ArrayList<String>>(); ArrayList<String> con_r = new ArrayList<String>(); con_r.add("?z"); con_r.add("?y"); con_r.add("?x"); condition_new_r.add(con_r); - - - + + + Template template_new = new Template(condition_new,"SELECT", t.getHaving(), t.getFilter(), "?z", t.getOrderBy(), t.getLimit(), t.getQuestion()); template_new.setHypothesen(new_hypothesen_list); template_new.setElm(t.getElm()); - + Template template_new_r = new Template(condition_new_r,"SELECT", t.getHaving(), t.getFilter(), "?z", t.getOrderBy(), t.getLimit(), t.getQuestion()); template_new_r.setHypothesen(new_hypothesen_list); template_new_r.setElm(t.getElm()); - + Elements elm = new Elements(template_new.getCondition(),template_new.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - template_new.setElm(elm); - new_template_list.add(template_new); - } - - Elements elm_r = new Elements(template_new.getCondition(),template_new.getHypothesen()); - if(elm.isElementEmty()==false){ - //elm.printAll(); - template_new_r.setElm(elm_r); - new_template_list.add(template_new_r); - } - - - + if(elm.isElementEmty()==false){ + //elm.printAll(); + template_new.setElm(elm); + new_template_list.add(template_new); + } + + Elements elm_r = new Elements(template_new.getCondition(),template_new.getHypothesen()); + if(elm.isElementEmty()==false){ + //elm.printAll(); + template_new_r.setElm(elm_r); + new_template_list.add(template_new_r); + } + + + //new_template_list.add(template_new); //new_template_list.add(template_new_r); } } - + /* * if there are new templates, start rescursive call; */ @@ -1003,18 +1003,18 @@ return answers; } } - + return answers; } - - + + private static String do_something(int number) throws SQLException, JWNLException, IOException{ //String str_number=Thread.currentThread().getName(); //System.out.println("ThreadName: "+str_number); //int number= Integer.parseInt(str_number); ArrayList<ArrayList<Hypothesis>> hypothesenSetList = new ArrayList<ArrayList<Hypothesis>>(); - - + + for(ArrayList<Hypothesis> l_h : global_template_list.get(number).getHypothesen()){ ArrayList<ArrayList<Hypothesis>> generated_hypothesis = new ArrayList<ArrayList<Hypothesis>>(); generated_hypothesis= IterationModule.new_iteration(global_template_list.get(number).getElm(),l_h,global_template_list.get(number).getCondition(),type_global,myindex_global,wordnet_global,lemmatiser_global); @@ -1025,61 +1025,40 @@ } hypothesenSetList.add(new_hypothesen_set); } - + //hypothesenSetList.addAll(blub); } if(type_global.contains("WORDNET"))global_template_list.get(number).setHypothesenWordnet(hypothesenSetList); if(type_global.contains("LEVENSTHEIN"))global_template_list.get(number).setHypothesenLevensthein(hypothesenSetList); if(type_global.contains("RELATE"))global_template_list.get(number).setHypothesenRelate(hypothesenSetList); return "DONE"; - + } - + private static void write_ResourcePropertyInformation(String Resource, String PropertyName, String Property){ String dateiname="/home/swalter/Dokumente/Auswertung/ResourcePropertyRelation.txt"; String result_string =""; //Open the file for reading - try { - BufferedReader br = new BufferedReader(new FileReader(dateiname)); - String thisLine; - while ((thisLine = br.readLine()) != null) { // while loop begins here - result_string+=thisLine+"\n"; - } // end while - } // end try - catch (IOException e) { - System.err.println("Error: " + e); - } - - - - File file = new File(dateiname); - BufferedWriter bw = null; try { - bw = new BufferedWriter(new FileWriter(file)); - } catch (IOException e2) { - // TODO Auto-generated catch block - e2.printStackTrace(); + BufferedReader br = new BufferedReader(new FileReader(dateiname)); + String thisLine; + while ((thisLine = br.readLine()) != null) { // while loop begins here + result_string+=thisLine+"\n"; + } // end while + } // end try + catch (IOException e) { + System.err.println("Error: " + e); } - - - try { - bw.write(result_string+Resource+"::"+PropertyName+"::"+Property+"\n"); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - bw.flush(); - } catch (IOException e1) { - // TODO Auto-generated catch block - e1.printStackTrace(); - } - try { - bw.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + File file = new File(dateiname); + BufferedWriter bw = null; + try + { + bw = new BufferedWriter(new FileWriter(file)); + bw.write(result_string+Resource+"::"+PropertyName+"::"+Property+"\n"); + bw.flush(); + bw.close(); + } + catch (IOException e) {e.printStackTrace();} } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/agreement/Unification.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -58,7 +58,7 @@ private static boolean unify(LexicalSelection a, LexicalSelection b) { if (a == null && b == null) { return true; - } else if (a.equals(b)) { + } else if (a!=null&&a.equals(b)) { return true; } return false; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-26 11:01:22 UTC (rev 3806) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-26 14:21:14 UTC (rev 3807) @@ -1,12 +1,14 @@ package org.dllearner.algorithm.tbsl.learning; -import static org.junit.Assert.assertTrue; -import org.ini4j.Options; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; import java.net.MalformedURLException; -import java.net.URL; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; @@ -15,19 +17,15 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import opennlp.tools.postag.POSTagger; +import net.sf.oval.constraint.AssertTrue; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -41,17 +39,14 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; -import org.dllearner.common.index.SPARQLIndex; -import org.dllearner.common.index.VirtuosoClassesIndex; -import org.dllearner.common.index.VirtuosoPropertiesIndex; -import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.ini4j.Options; import org.junit.Before; import org.junit.Test; -import org.openjena.atlas.logging.Log; +import static org.junit.Assert.*; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -68,6 +63,7 @@ * only contains the questions where the reference query does not return a nonempty list of resources. * This could be questions which return literals, ask queries, queries which have no results in the DBpedia endpoint * and queries that cause errors. This updated test file contains the reference answers as well and is only created once. + * The answers in the updated query could be out of date as well, so if the answers don't match they are newly queried from the reference query. * Because there are multiple queries that are not all valid at first, further test runs are compared against the first run. * The updated test data and the test runs are saved in the cache folder in the same format as the original test data * (an xml with the tags question, query and answer). @@ -78,20 +74,42 @@ * **/ public class SPARQLTemplateBasedLearner3Test { - @Test public void testDBpedia() throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + @Test public void testDBpedia() throws Exception {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} //@Test public void testOxford() {test(new File(""),"");} - public void test(File file, String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); - File updatedFile=new File(dir+"updated_"+file.getName()); - if(!updatedFile.exists()) {generateUpdatedFile(file,updatedFile,endpoint);} + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) {generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint);} - QueryTestData savedTestData = readQueries(updatedFile); - QueryTestData newTestData = generateTestData(savedTestData.id2Question); - Diff QueryTestDataDiff = diffTestData(savedTestData,newTestData); + logger.debug("Reading updated reference test data"); + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question); + + logger.info("Comparing updated reference test data a with learned test data b:"); + Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); + logger.info(queryTestDataDiff); + + logger.info("Comparing learned test data with old learned test data"); + + try{ + QueryTestData oldLearnedTestData = QueryTestData.read(); + Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); + logger.info(queryTestDataDiff); +// assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, +// queryTestDataDiff2.aMinusB.isEmpty()); + assertFalse("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, + queryTestDataDiff2.differentAnswers.isEmpty()); + + } + catch(IOException e) + { + logger.info("Old test data not loadable, creating it and exiting."); + learnedTestData.write(); + } } /** @@ -99,32 +117,47 @@ * @param newTestData * @return */ - private Diff diffTestData(QueryTestData d, QueryTestData e) + private static Diff diffTestData(QueryTestData a, QueryTestData b) { -// if(d.id2Question.size()!=e.id2Question.size()) - {logger.info("comparing test data D against E. number of questions: "+d.id2Question.size()+" vs "+e.id2Question.size());} - - Set<Integer> dMinusE = new HashSet<Integer>(d.id2Question.keySet()); - dMinusE.removeAll(e.id2Question.keySet()); - if(!dMinusE.isEmpty()) logger.info("questions D/E: "+dMinusE+" ("+dMinusE.size()+" elements)"); - - Set<Integer> eMinusD = new HashSet<Integer>(e.id2Question.keySet()); - eMinusD.removeAll(d.id2Question.keySet()); - if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); - - Set<Integer> intersection = new HashSet<Integer>(d.id2Question.keySet()); - intersection.retainAll(e.id2Question.keySet()); - - if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); - - + // if(d.id2Question.size()!=e.id2Question.size()) + {logger.info("comparing test data a against b. number of questions: "+a.id2Question.size()+" vs "+b.id2Question.size());} + Diff diff = new Diff(); + diff.aMinusB.addAll(a.id2Question.keySet()); + diff.aMinusB.removeAll(b.id2Question.keySet()); + + diff.bMinusA.addAll(b.id2Question.keySet()); + diff.bMinusA.removeAll(a.id2Question.keySet()); + + diff.intersection.addAll(a.id2Question.keySet()); + diff.intersection.retainAll(b.id2Question.keySet()); + + for(int i: diff.intersection) + { + if(a.id2Answers.containsKey(i)&&!a.id2Answers.get(i).equals(b.id2Answers.get(i))) {diff.differentAnswers.add(i);} + } + // if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); + + // TODO Auto-generated method stub - return null; + return diff; } - private class Diff + public static class Diff { + final Set<Integer> aMinusB = new HashSet<Integer>(); + final Set<Integer> bMinusA = new HashSet<Integer>(); + final Set<Integer> intersection = new HashSet<Integer>(); + final Set<Integer> differentAnswers = new HashSet<Integer>(); + @Override public String toString() + { + StringBuilder sb = new StringBuilder(); + if(!aMinusB.isEmpty()) sb.append("questions a/b: "+aMinusB+" ("+aMinusB.size()+" elements)\n"); + if(!bMinusA.isEmpty()) sb.append("questions b/a: "+bMinusA+" ("+bMinusA.size()+" elements)\n"); + if(!intersection.isEmpty()) sb.append("questions intersection: "+intersection+" ("+intersection.size()+" elements)\n"); + if(!differentAnswers.isEmpty()) sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n"); + return sb.substring(0, sb.length()-2); // remove last \n + } } /** @@ -148,7 +181,7 @@ dbpediaLiveLearner.init(); dbpediaLiveLearner.setQuestion(question); - + try{dbpediaLiveLearner.learnSPARQLQueries();} catch(NoTemplateFoundException e) {continue;} catch(Exception e) {logger.error("Error processing question "+question,e);continue;} @@ -156,7 +189,9 @@ testData.id2Question.put(i, question); String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); testData.id2Query.put(i, learnedQuery); - + // generate answers + // getUris(endpoint, learnedQuery); + long end = System.currentTimeMillis(); logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); @@ -174,7 +209,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedFile(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -262,55 +297,55 @@ // int successfullTestThreadRuns = 0; /** */ - private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; + private static final String DBPEDIA_LIVE_ENDPOINT_URL_STRING = "http://live.dbpedia.org/sparql"; - private static Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); + private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner3Test.class); // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; - private ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); - private ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + private final ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); - private Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); - - static SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); + private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); + + static final SparqlEndpoint dbpediaLiveEndpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); //static SparqlEndpoint oxfordEndpoint; private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} -// private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} -// @Test public void benchmarkCreateOxfordKnowledgeBase() -// { -// long start = System.currentTimeMillis(); -// for(int i=0;i<1000;i++) -// { -// createOxfordKnowledgebase(oxfordCache); -// } -// long end = System.currentTimeMillis(); -// long diff = end-start; -// System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); -// } + // @Test public void benchmarkCreateOxfordKnowledgeBase() + // { + // long start = System.currentTimeMillis(); + // for(int i=0;i<1000;i++) + // { + // createOxfordKnowledgebase(oxfordCache); + // } + // long end = System.currentTimeMillis(); + // long diff = end-start; + // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); + // } -// private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) -// { -// URL url; -// try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} -// SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); -// -// SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); -// SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); -// SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); -// MappingBasedIndex mappingIndex= new MappingBasedIndex( -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), -// SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() -// ); -// -// Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); -// return kb; -// } + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); + // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + // return kb; + // } private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { @@ -344,11 +379,35 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - private class QueryTestData + private static class QueryTestData implements Serializable { public SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + + private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); + + public void write() + { + try + { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(persistancePath))); + oos.writeObject(this); + oos.close(); + } catch(IOException e) {throw new RuntimeException(e);} + } + + public static QueryTestData read() throws FileNotFoundException, IOException + { + try + { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(persistancePath))); + QueryTestData testData = (QueryTestData) ois.readObject(); + ois.close(); + return testData; + } + catch (ClassNotFoundException e){throw new RuntimeException(e);} + } } private QueryTestData readQueries(final File file) @@ -467,69 +526,69 @@ return uris; } - private class TestQueryThread implements Runnable - { - private String question; - private String referenceQuery; + // private class TestQueryThread implements Runnable + // { + // private String question; + // private String referenceQuery; + // + // public TestQueryThread(String question, String referenceQuery) + // { + // this.question=question; + // this.referenceQuery=referenceQuery; + // } + // // String referenceQuery = id2Query.get(i); + // // String question = id2Question.get(i); + // @Override public void run() + // { + // + // logger.trace("question: "+question); + // + // // TODO: check for query isomorphism and leave out result comparison if possible + // // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) + // // get the answers for the gold standard query + // logger.trace("reference query: "+referenceQuery); + // + // try + // { + // Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); + // + // // learn query + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + // dbpediaLiveLearner.init(); + // dbpediaLiveLearner.setQuestion(question); + // dbpediaLiveLearner.learnSPARQLQueries(); + // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + // + // logger.trace(learnedQuery); + // + // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); + // + // logger.trace("referenced uris: "+referenceURIs); + // logger.trace("learned uris: "+learnedURIs); + // + // boolean correctMatch = referenceURIs.equals(learnedURIs); + // logger.trace(correctMatch?"matches":"doesn't match"); + //// if(correctMatch) {synchronized(this) {correctMatches++;}} + // } + // catch(NoTemplateFoundException e) + // { + // synchronized(this) {numberOfNoTemplateFoundExceptions++;} + // logger.warn(String.format("no template found for question \"%s\"",question)); + // } + // catch(Exception e) + // { + // synchronized(this) {numberOfOtherExceptions++;} + // logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + // e.printStackTrace(); + // // maybe the exception has corrupted the learner? better create a new one + // // + // } + // // get the answers for the learned query + // // compare gold standard query and learned query answers + // } + // + // } - public TestQueryThread(String question, String referenceQuery) - { - this.question=question; - this.referenceQuery=referenceQuery; - } - // String referenceQuery = id2Query.get(i); - // String question = id2Question.get(i); - @Override public void run() - { - - logger.trace("question: "+question); - - // TODO: check for query isomorphism and leave out result comparison if possible - // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) - // get the answers for the gold standard query - logger.trace("reference query: "+referenceQuery); - - try - { - Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); - - // learn query - SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - dbpediaLiveLearner.init(); - dbpediaLiveLearner.setQuestion(question); - dbpediaLiveLearner.learnSPARQLQueries(); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - - logger.trace(learnedQuery); - - Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); - - logger.trace("referenced uris: "+referenceURIs); - logger.trace("learned uris: "+learnedURIs); - - boolean correctMatch = referenceURIs.equals(learnedURIs); - logger.trace(correctMatch?"matches":"doesn't match"); - if(correctMatch) {synchronized(this) {correctMatches++;}} - } - catch(NoTemplateFoundException e) - { - synchronized(this) {numberOfNoTemplateFoundExceptions++;} - logger.warn(String.format("no template found for question \"%s\"",question)); - } - catch(Exception e) - { - synchronized(this) {numberOfOtherExceptions++;} - logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - e.printStackTrace(); - // maybe the exception has corrupted the learner? better create a new one - // - } - // get the answers for the learned query - // compare gold standard query and learned query answers - } - - } - private void updateFile(File originalFile, File updatedFile, String endpoint) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-07-31 16:14:16
|
Revision: 3812 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3812&view=rev Author: kirdie Date: 2012-07-31 16:14:04 +0000 (Tue, 31 Jul 2012) Log Message: ----------- SPARQLTemplateBasedLearner3Test now also saves an evaluation history and creates an html file that displays it. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 10:36:11 UTC (rev 3811) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/nlp/SynchronizedStanfordPartOfSpeechTagger.java 2012-07-31 16:14:04 UTC (rev 3812) @@ -1,10 +1,6 @@ package org.dllearner.algorithm.tbsl.nlp; -public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger { - - @Override - public synchronized String tag(String sentence) { - return super.tag(sentence); - } - -} +public class SynchronizedStanfordPartOfSpeechTagger extends StanfordPartOfSpeechTagger +{ + @Override public synchronized String tag(String sentence) {return super.tag(sentence);} +} \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-31 10:36:11 UTC (rev 3811) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-07-31 16:14:04 UTC (rev 3812) @@ -7,16 +7,27 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.io.PrintWriter; import java.io.Serializable; import java.net.MalformedURLException; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.SortedMap; -import java.util.TreeMap; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -25,14 +36,13 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import net.sf.oval.constraint.AssertTrue; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.SynchronizedStanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Knowledgebase; @@ -43,10 +53,11 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; +import org.eclipse.jdt.annotation.NonNull; import org.ini4j.Options; import org.junit.Before; import org.junit.Test; -import static org.junit.Assert.*; +import org.openjena.atlas.logging.Log; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -56,6 +67,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; /** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. * The qald2 endpoint is not used because it may not always be available. @@ -74,42 +86,174 @@ * **/ public class SPARQLTemplateBasedLearner3Test { + private static final File evaluationFolder = new File("log/evaluation"); + @Test public void testDBpedia() throws Exception - {test(new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} + {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()),"http://live.dbpedia.org/sparql");} //@Test public void testOxford() {test(new File(""),"");} - public void test(final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final String endpoint) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { String dir = "cache/"+getClass().getSimpleName()+"/"; new File(dir).mkdirs(); File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); - if(!updatedReferenceXML.exists()) {generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint);} + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint); + } - logger.debug("Reading updated reference test data"); QueryTestData referenceTestData = readQueries(updatedReferenceXML); - QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); - logger.info("Comparing updated reference test data a with learned test data b:"); - Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); - logger.info(queryTestDataDiff); + QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, endpoint).generateAnswers(endpoint); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); + generateHTML(); + /* { + logger.info("Comparing updated reference test data with learned test data:"); + Diff queryTestDataDiff = diffTestData(referenceTestData,learnedTestData); + logger.info(queryTestDataDiff); + } + logger.info("Comparing learned test data with old learned test data"); - logger.info("Comparing learned test data with old learned test data"); + try{ + QueryTestData oldLearnedTestData = QueryTestData.read(); + Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); + logger.info(queryTestDataDiff2); + // assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, + // queryTestDataDiff2.aMinusB.isEmpty()); + assertTrue("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, + queryTestDataDiff2.differentAnswers.isEmpty()); - try{ - QueryTestData oldLearnedTestData = QueryTestData.read(); - Diff queryTestDataDiff2 = diffTestData(oldLearnedTestData,learnedTestData); - logger.info(queryTestDataDiff); -// assertFalse("the following queries did not return an answer in the current learned test data: "+queryTestDataDiff2.aMinusB, -// queryTestDataDiff2.aMinusB.isEmpty()); - assertFalse("the following queries had different answers: "+queryTestDataDiff2.differentAnswers, - queryTestDataDiff2.differentAnswers.isEmpty()); - + } + catch(IOException e) + { + logger.info("Old test data not loadable, creating it and exiting."); + } + learnedTestData.write();*/ + } + + /** evaluates a data set against a reference. + * @param reference the test data assumed to be correct. needs to contain the answers for all queries. + * @param suspect the test data to compare with the reference. + * if a query for a question does not match and the answers are not provided or don't match as well then the question is marked as incorrectly answered.*/ + private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) + { + // Diff d = diffTestData(reference,testData); + Evaluation evaluation = new Evaluation(); + evaluation.numberOfQuestions = reference.id2Question.keySet().size(); + + for(int i: reference.id2Question.keySet()) + { + String question = reference.id2Question.get(i); + if(!suspect.id2Query.containsKey(i)) + { + evaluation.unansweredQuestions.add(question); + continue; + } + evaluation.numberOfAnsweredQuestions++; + + String referenceQuery = reference.id2Query.get(i); + String suspectQuery = suspect.id2Query.get(i); + // reference is required to contain answers for every key so we shouldn't get NPEs here (even though it could be the empty set but that shouldn't happen because only questions with nonempty answers are included in the updated reference) + if(referenceQuery.equals(suspectQuery)||reference.id2Answers.get(i).equals(suspect.id2Answers.get(i))) + { + evaluation.correctlyAnsweredQuestions.add(question); + evaluation.numberOfCorrectAnswers++; + } + else + { + evaluation.incorrectlyAnsweredQuestions.add(question); + logger.debug("learned queries differing: "+referenceQuery+"\n"+suspectQuery); + logger.debug("learned answers differing: "+reference.id2Answers.get(i)+"\n"+suspect.id2Answers.get(i)); + } } - catch(IOException e) + return evaluation; + } + + static class Evaluation implements Serializable + { + private static final long serialVersionUID = 1L; + int numberOfQuestions = 0; + int numberOfAnsweredQuestions = 0; + int numberOfCorrectAnswers = 0; + double precision = 0; + double recall = 0; + final Set<String> unansweredQuestions = new HashSet<String>(); + final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); + final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + + void computePrecisionAndRecall() // we have at maximum one answer set per question { - logger.info("Old test data not loadable, creating it and exiting."); - learnedTestData.write(); + precision = numberOfCorrectAnswers / numberOfAnsweredQuestions; + recall = numberOfCorrectAnswers / numberOfQuestions; } + + @Override public String toString() + { + StringBuffer sb = new StringBuffer(); + sb.append(numberOfAnsweredQuestions+" of "+numberOfQuestions+" questions answered, "); + sb.append(numberOfCorrectAnswers+" correct answers."); + sb.append("precision: "+precision+", recall: "+recall+"\n"); + sb.append("Detailed List: "); + sb.append(toHTML()); + return sb.toString(); + } + + public String toHTML() + { + StringBuffer sb = new StringBuffer(); + sb.append(htmlDetailsList("Unanswered Questions",unansweredQuestions)); + sb.append(htmlDetailsList("Wrongly Answered Questions",incorrectlyAnsweredQuestions)); + sb.append(htmlDetailsList("Correctly Answered Questions",correctlyAnsweredQuestions)); + return sb.toString(); + } + + public static String htmlDetailsList(/*@NonNull*/ String summary,/*@NonNull*/ Collection<String> elements) + { + if(elements.isEmpty()) {return "<p>"+summary+": none</p>";} + + StringBuffer sb = new StringBuffer(); + sb.append("<p><details>\n<summary>"+summary+"</summary>\n<ul>"); + for(String element: elements) + sb.append("<li>"+element+"</li>"); + sb.append("</ul>\n</details></p>"); + return sb.toString(); + } + + public synchronized void write() + { + evaluationFolder.mkdirs(); + try + { + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(evaluationFolder,String.valueOf(System.currentTimeMillis())))); + oos.writeObject(this); + oos.close(); + } catch(IOException e) {throw new RuntimeException(e);} + } + + public static SortedMap<Long,Evaluation> read() + { + SortedMap<Long,Evaluation> evaluations = new ConcurrentSkipListMap<Long,Evaluation>(); + evaluationFolder.mkdirs(); + File[] files = evaluationFolder.listFiles(); + for(int i=0;i<files.length;i++) {evaluations.put(Long.valueOf(files[i].getName()),read(files[i]));} + return evaluations; + } + + private static Evaluation read(File file) + { + try + { + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); + Evaluation evaluation = (Evaluation) ois.readObject(); + ois.close(); + return evaluation; + } + catch (Exception e){throw new RuntimeException(e);} + } } /** @@ -117,23 +261,37 @@ * @param newTestData * @return */ - private static Diff diffTestData(QueryTestData a, QueryTestData b) + private static Diff diffTestData(QueryTestData reference, QueryTestData newData) { // if(d.id2Question.size()!=e.id2Question.size()) - {logger.info("comparing test data a against b. number of questions: "+a.id2Question.size()+" vs "+b.id2Question.size());} + // logger.info("comparing test data a against b. number of questions: "+reference.id2Question.size()+" vs "+newData.id2Question.size()); + // if(reference.id2Question.size()!=newData.id2Question.size()) + // { + // logger.info("questions a: "+reference.id2Question.keySet()); + // logger.info("questions b: "+newData.id2Question.keySet()); + // } Diff diff = new Diff(); - diff.aMinusB.addAll(a.id2Question.keySet()); - diff.aMinusB.removeAll(b.id2Question.keySet()); + diff.aMinusB.addAll(reference.id2Question.keySet()); + diff.aMinusB.removeAll(newData.id2Question.keySet()); - diff.bMinusA.addAll(b.id2Question.keySet()); - diff.bMinusA.removeAll(a.id2Question.keySet()); + diff.bMinusA.addAll(newData.id2Question.keySet()); + diff.bMinusA.removeAll(reference.id2Question.keySet()); - diff.intersection.addAll(a.id2Question.keySet()); - diff.intersection.retainAll(b.id2Question.keySet()); + diff.intersection.addAll(reference.id2Question.keySet()); + diff.intersection.retainAll(newData.id2Question.keySet()); for(int i: diff.intersection) { - if(a.id2Answers.containsKey(i)&&!a.id2Answers.get(i).equals(b.id2Answers.get(i))) {diff.differentAnswers.add(i);} + // the questions are the same - we don't care about the answer + if(reference.id2Question.get(i).equals(newData.id2Question.get(i))) + + if(reference.id2Answers.containsKey(i)&&!reference.id2Answers.get(i).equals(newData.id2Answers.get(i))) + { + // logger.info("different answers:"); + // logger.info("a: "+reference.id2Answers.get(i)); + // logger.info("b: "+newData.id2Answers.get(i)); + diff.differentAnswers.add(i); + } } // if(!eMinusD.isEmpty()) logger.info("questions E/D: "+eMinusD+" ("+eMinusD.size()+" elements)"); @@ -155,49 +313,57 @@ if(!aMinusB.isEmpty()) sb.append("questions a/b: "+aMinusB+" ("+aMinusB.size()+" elements)\n"); if(!bMinusA.isEmpty()) sb.append("questions b/a: "+bMinusA+" ("+bMinusA.size()+" elements)\n"); if(!intersection.isEmpty()) sb.append("questions intersection: "+intersection+" ("+intersection.size()+" elements)\n"); - if(!differentAnswers.isEmpty()) sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n"); - return sb.substring(0, sb.length()-2); // remove last \n + if(!differentAnswers.isEmpty()) {sb.append("questions with different answers: "+differentAnswers+" ("+differentAnswers.size()+" elements)\n");} + else {sb.append("all answers are equal\n");} + return sb.substring(0, sb.length()-1); // remove last \n } } /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateTestData(SortedMap<Integer, String> id2Question) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,String endpoint) throws MalformedURLException, ComponentInitException { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- - PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); - WordNet wordnet = new WordNet(); - Options options = new Options(); + // PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + // WordNet wordnet = new WordNet(); + // Options options = new Options(); // ---------------------------------------------------------- - int successes = 0; - for(int i:id2Question.keySet()) - { - String question = id2Question.get(i); - logger.debug("generating query for question \""+question+"\", id "+i); - long start = System.currentTimeMillis(); - SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); + // int successes = 0; - dbpediaLiveLearner.init(); - dbpediaLiveLearner.setQuestion(question); + List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + ExecutorService service = Executors.newFixedThreadPool(10); - try{dbpediaLiveLearner.learnSPARQLQueries();} - catch(NoTemplateFoundException e) {continue;} - catch(Exception e) {logger.error("Error processing question "+question,e);continue;} - successes++; - testData.id2Question.put(i, question); - String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - testData.id2Query.put(i, learnedQuery); - // generate answers - // getUris(endpoint, learnedQuery); + for(int i: id2Question.keySet()) + {todo.add(Executors.callable(new LearnQueryRunnable(id2Question.get(i),i,endpoint, testData)));} - long end = System.currentTimeMillis(); - logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} + // logger.debug("generating query for question \""+question+"\", id "+i); + // long start = System.currentTimeMillis(); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(dbpediaLiveKnowledgebase,posTagger,wordnet,options); + // // dbpediaLiveLearner.setUseIdealTagger(true); // TODO: use this or not? + // dbpediaLiveLearner.init(); + // dbpediaLiveLearner.setQuestion(question); + // + // try{dbpediaLiveLearner.learnSPARQLQueries();} + // catch(NoTemplateFoundException e) {continue;} + // catch(NullPointerException e) {continue;} + //catch(Exception e) {logger.error("Error processing question """+question,e);continue;} + // successes++; + // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + // if(learnedQuery==null) {continue;} + // + // testData.id2Question.put(i, question); + // testData.id2Query.put(i, learnedQuery); + // try {testData.id2Answers.put(i,getUris(endpoint, learnedQuery));} + // catch(Exception e) {logger.warn("Error with learned query "+learnedQuery+" for question "+question+" at endpoint "+endpoint+": "+e.getLocalizedMessage());} - } - logger.info(String.format("Successfully learned queries for %d of %d questions.",successes,id2Question.size())); - // TODO Auto-generated method stub + long end = System.currentTimeMillis(); + // logger.debug(String.format("Generated query \"%s\" after %d ms", learnedQuery,end-start)); + + + // logger.info(String.format("Learned queries for %d of %d questions.",successes,id2Question.size())); return testData; } @@ -305,7 +471,7 @@ // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); - private final ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); + private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); @@ -313,41 +479,9 @@ //static SparqlEndpoint oxfordEndpoint; private ResultSet executeDBpediaLiveSelect(String query){return SparqlQuery.convertJSONtoResultSet(dbpediaLiveCache.executeSelectQuery(dbpediaLiveEndpoint, query));} - // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} - // @Test public void benchmarkCreateOxfordKnowledgeBase() - // { - // long start = System.currentTimeMillis(); - // for(int i=0;i<1000;i++) - // { - // createOxfordKnowledgebase(oxfordCache); - // } - // long end = System.currentTimeMillis(); - // long diff = end-start; - // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); - // } - // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) - // { - // URL url; - // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} - // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); - // - // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); - // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); - // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); - // MappingBasedIndex mappingIndex= new MappingBasedIndex( - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() - // ); - // - // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); - // return kb; - // } - - private Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) + private static Knowledgebase createDBpediaLiveKnowledgebase(ExtractionDBCache cache) { SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); resourcesIndex.setPrimarySearchField("label"); @@ -372,22 +506,24 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); - logger.setLevel(Level.ALL); // TODO: remove when finishing implementation of this class + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); + // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); + logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); + // oxfordEndpoint = new SparqlEndpoint(new URL("http://lgd.aksw.org:8900/sparql"), Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } private static class QueryTestData implements Serializable { - public SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); - public SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); - public SortedMap<Integer, Set<String>> id2Answers = new TreeMap<Integer, Set<String>>(); + public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); + public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); - public void write() + public synchronized void write() { try { @@ -408,12 +544,18 @@ } catch (ClassNotFoundException e){throw new RuntimeException(e);} } + + public QueryTestData generateAnswers(String endpoint) + { + if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} + for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i)));} + return this; + } } private QueryTestData readQueries(final File file) { QueryTestData testData = new QueryTestData(); - logger.info("Reading file containing queries and answers..."); try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); @@ -430,6 +572,7 @@ Element questionNode = (Element) questionNodes.item(i); //read question ID id = Integer.valueOf(questionNode.getAttribute("id")); + if(id>5) continue; // TODO: remove //Read question question = ((Element)questionNode.getElementsByTagName("string").item(0)).getChildNodes().item(0).getNodeValue().trim(); //Read SPARQL query @@ -445,7 +588,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { testData.id2Question.put(id, question); - testData.id2Query.put(id, query); + testData.id2Query.put(id, query); Element answersElement = (Element) questionNode.getElementsByTagName("answers").item(0); if(answersElement!=null) { @@ -484,16 +627,23 @@ // System.out.println("Exception "); // // } - logger.info("Done."); return testData; } - private Set<String> getUris(String endpoint, String query) - { + protected static Set<String> getUris(final String endpoint, final String query) + { + if(query==null) {throw new AssertionError("query is null");} + if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); - ResultSet rs = qe.execSelect(); + ResultSet rs; + try{rs = qe.execSelect();} + catch(QueryExceptionHTTP e) + { + logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); + return Collections.<String>emptySet(); + } String variable = "?uri"; resultsetloop: while(rs.hasNext()) @@ -520,81 +670,161 @@ continue resultsetloop; } } - return Collections.<String>emptySet(); // we didn't a resource for the first query solution - give up and don't look in the others + if(uris.isEmpty()) {return Collections.<String>emptySet();} // we didn't a resource for the first query solution - give up and don't look in the others } } return uris; } - // private class TestQueryThread implements Runnable + + // private ResultSet executeOxfordSelect(String query){return SparqlQuery.convertJSONtoResultSet(oxfordCache.executeSelectQuery(oxfordEndpoint, query));} + + // @Test public void benchmarkCreateOxfordKnowledgeBase() // { - // private String question; - // private String referenceQuery; - // - // public TestQueryThread(String question, String referenceQuery) + // long start = System.currentTimeMillis(); + // for(int i=0;i<1000;i++) // { - // this.question=question; - // this.referenceQuery=referenceQuery; + // createOxfordKnowledgebase(oxfordCache); // } - // // String referenceQuery = id2Query.get(i); - // // String question = id2Question.get(i); - // @Override public void run() - // { + // long end = System.currentTimeMillis(); + // long diff = end-start; + // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); + // } + + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); // - // logger.trace("question: "+question); + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); // - // // TODO: check for query isomorphism and leave out result comparison if possible - // // TODO: only load the reference answers once and permanently cache them somehow (file, ehcache, serialization, ...) - // // get the answers for the gold standard query - // logger.trace("reference query: "+referenceQuery); - // - // try - // { - // Set<String> referenceURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,referenceQuery); - // - // // learn query - // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - // dbpediaLiveLearner.init(); - // dbpediaLiveLearner.setQuestion(question); - // dbpediaLiveLearner.learnSPARQLQueries(); - // String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); - // - // logger.trace(learnedQuery); - // - // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); - // - // logger.trace("referenced uris: "+referenceURIs); - // logger.trace("learned uris: "+learnedURIs); - // - // boolean correctMatch = referenceURIs.equals(learnedURIs); - // logger.trace(correctMatch?"matches":"doesn't match"); - //// if(correctMatch) {synchronized(this) {correctMatches++;}} - // } - // catch(NoTemplateFoundException e) - // { - // synchronized(this) {numberOfNoTemplateFoundExceptions++;} - // logger.warn(String.format("no template found for question \"%s\"",question)); - // } - // catch(Exception e) - // { - // synchronized(this) {numberOfOtherExceptions++;} - // logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - // e.printStackTrace(); - // // maybe the exception has corrupted the learner? better create a new one - // // - // } - // // get the answers for the learned query - // // compare gold standard query and learned query answers - // } - // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); + // return kb; // } - - private void updateFile(File originalFile, File updatedFile, String endpoint) + private static class LearnQueryRunnable implements Runnable { + private final String question; + private final String endpoint; + private final int id; + private final QueryTestData testData; + static private final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger(); + static private final WordNet wordnet = new WordNet(); + static private final Options options = new Options(); + public LearnQueryRunnable(String question, int id,String endpoint, QueryTestData testData) + { + this.question=question; + this.id=id; + this.endpoint=endpoint; + this.testData=testData; + } + + @Override public void run() + { + logger.trace("learning question: "+question); + try + { + // learn query + SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); + dbpediaLiveLearner.init(); + dbpediaLiveLearner.setQuestion(question); + dbpediaLiveLearner.learnSPARQLQueries(); + String learnedQuery = dbpediaLiveLearner.getBestSPARQLQuery(); + if(learnedQuery!=null&&!learnedQuery.isEmpty()) + { + testData.id2Question.put(id, question); + testData.id2Query.put(id, learnedQuery); + } + logger.trace("learned query for question "+question+": "+learnedQuery); + + // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); + } + catch(NoTemplateFoundException e) + { + logger.warn(String.format("no template found for question \"%s\"",question)); + } + catch(Exception e) + { + logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); + e.printStackTrace(); + } + } } + + /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. + * Also creates and links to a file which contains the questions.*/ + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal) + { + final StringBuilder sb = new StringBuilder(); + sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); + sb.append("<div style='float:left;width:"+100.0*questions.size()/numberOfQuestionsTotal+"%;height:1em;background-color:"+color+";'></div>"); + sb.append("</a>"); + // link.getParentFile().mkdirs(); + try + { + PrintWriter out = new PrintWriter(link); + for(String question: questions) {out.println(question);} + out.close(); + } + catch (Exception e){throw new RuntimeException(e);} + + return sb.toString(); + } + + static void generateHTML() + { + StringBuilder sb = new StringBuilder(); + sb.append("<html><body><table style='width:100%'>"); + SortedMap<Long,Evaluation> evaluations = Evaluation.read(); + SortedSet<Long> timestampsDescending = new TreeSet<Long>(Collections.reverseOrder()); + timestampsDescending.addAll(evaluations.keySet()); + for(long timestamp: timestampsDescending) + { + try + { + File folder = new File("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+"/"+timestamp); + folder.mkdirs(); + Evaluation e = evaluations.get(timestamp); + sb.append("<tr><td style='white-space: nowrap'>"); + Date date = new Date(timestamp); + sb.append(DateFormat.getInstance().format(date)); + sb.append("</td><td width='100%'>"); + sb.append("<div style='width:100%;height:1em;border:solid 1px;'>"); + sb.append(createColoredColumn(new File(folder,"correctly_answered.txt"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions)); + sb.append(createColoredColumn(new File(folder,"incorrectly_answered.txt"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions)); + sb.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions)); + sb.append("<span style='width:1000px;'></span>"); + sb.append("</td></tr>"); + } catch(Exception e) {logger.warn("error with evaluation from timestamp "+timestamp,e);} + } + + sb.append("</table></body></html>"); + try + { + PrintWriter out = new PrintWriter("log/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+".html"); + out.println(sb.toString()); + out.close(); + } + catch (Exception e){throw new RuntimeException(e);} + } + // private void updateFile(File originalFile, File updatedFile, String endpoint) + // { + // + // + // } + // private void test(File file) throws MalformedURLException, InterruptedException // { // SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-09 15:13:08
|
Revision: 3821 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3821&view=rev Author: kirdie Date: 2012-08-09 15:12:57 +0000 (Thu, 09 Aug 2012) Log Message: ----------- corrected a faulty reference query in the qald2 dbpedia train benchmark. also extended the junit test for sparqltemplatedbasedlearner2. Modified Paths: -------------- trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml =================================================================== (Binary files differ) Modified: trunk/components-ext/src/main/resources/tbsl/evaluation/qald2-dbpedia-train.xml =================================================================== (Binary files differ) Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 12:47:29 UTC (rev 3820) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-09 15:12:57 UTC (rev 3821) @@ -11,7 +11,6 @@ import java.io.Serializable; import java.net.MalformedURLException; import java.text.DateFormat; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -26,11 +25,12 @@ import java.util.Stack; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -59,6 +59,7 @@ import org.ini4j.Options; import org.junit.Before; import org.junit.Test; +import org.junit.*; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -84,34 +85,40 @@ * logging output is also wrote to the file log/#classname. * @author Konrad Höffner * **/ + +// problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test { - private static final File evaluationFolder = new File("log/evaluation"); + private static final File evaluationFolder = new File("cache/evaluation"); @Test public void testDBpedia() throws Exception - {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train.xml").getFile()), + {test("QALD 2 Benchmark", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache);} //@Test public void testOxford() {test(new File(""),"");} public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { -// String dir = "cache/"+getClass().getSimpleName()+"/"; -// -// new File(dir).mkdirs(); -// File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); -// if(!updatedReferenceXML.exists()) -// { -// logger.info("Generating updated reference for "+title); -// generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); -// } -// -// QueryTestData referenceTestData = readQueries(updatedReferenceXML); -// logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); -// -// QueryTestData learnedTestData = generateTestData(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); -// Evaluation evaluation = evaluate(referenceTestData, learnedTestData); -// logger.info(evaluation); -// evaluation.write(); + final boolean EVALUATE = true; + if(EVALUATE) + { + String dir = "cache/"+getClass().getSimpleName()+"/"; + + new File(dir).mkdirs(); + File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) + { + logger.info("Generating updated reference for "+title); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + } + + QueryTestData referenceTestData = readQueries(updatedReferenceXML); + logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); + + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase).generateAnswers(endpoint,cache); + Evaluation evaluation = evaluate(referenceTestData, learnedTestData); + logger.info(evaluation); + evaluation.write(); + } generateHTML(); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -146,7 +153,7 @@ private static Evaluation evaluate(QueryTestData reference, QueryTestData suspect) { // Diff d = diffTestData(reference,testData); - Evaluation evaluation = new Evaluation(suspect); + Evaluation evaluation = new Evaluation(suspect,reference); evaluation.numberOfQuestions = reference.id2Question.keySet().size(); for(int i: reference.id2Question.keySet()) @@ -170,8 +177,8 @@ else { evaluation.incorrectlyAnsweredQuestions.add(question); - logger.debug("learned queries differing: "+referenceQuery+"\n"+suspectQuery); - logger.debug("learned answers differing: "+reference.id2Answers.get(i)+"\n"+suspect.id2Answers.get(i)); + logger.debug("learned queries differing. reference query:\n"+referenceQuery+"\nsuspect query:\n"+suspectQuery); + logger.debug("learned answers differing: reference answers:\n"+reference.id2Answers.get(i)+"\nsuspect answers:\n"+suspect.id2Answers.get(i)); } } return evaluation; @@ -179,8 +186,9 @@ static class Evaluation implements Serializable { - private static final long serialVersionUID = 2L; + private static final long serialVersionUID = 4L; final QueryTestData testData; + final QueryTestData referenceData; int numberOfQuestions = 0; int numberOfAnsweredQuestions = 0; int numberOfCorrectAnswers = 0; @@ -188,9 +196,9 @@ double recall = 0; final Set<String> unansweredQuestions = new HashSet<String>(); final Set<String> incorrectlyAnsweredQuestions = new HashSet<String>(); - final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); + final Set<String> correctlyAnsweredQuestions = new HashSet<String>(); - public Evaluation(QueryTestData testData) {this.testData = testData;} + public Evaluation(QueryTestData testData,QueryTestData referenceData) {this.testData = testData;this.referenceData = referenceData;} void computePrecisionAndRecall() // we have at maximum one answer set per question { @@ -336,10 +344,12 @@ } } + enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED} + /** * @return the test data containing those of the given questions for which queries were found and the results of the queries */ - private QueryTestData generateTestData(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- @@ -349,25 +359,42 @@ // ---------------------------------------------------------- // int successes = 0; - // List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); - List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); + // List<Callable<Object>> todo = new ArrayList<Callable<Object>>(id2Question.size()); + Map<Integer,Future<LearnStatus>> futures = new HashMap<Integer,Future<LearnStatus>>(); + + // List<FutureTask> todo = new ArrayList<FutureTask>(id2Question.size()); ExecutorService service = Executors.newFixedThreadPool(10); for(int i: id2Question.keySet()) { - Callable c = Executors.callable(new LearnQueryRunnable(id2Question.get(i),i, testData,kb)); - FutureTask task = new FutureTask(c); - todo.add(task); + futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb))); } - List<Future> futures = new LinkedList<Future>(); - for(FutureTask task : todo) + for(int i: id2Question.keySet()) { - futures.add(service.submit(task)); - } - for(Future future:futures) try {future.get(30, TimeUnit.SECONDS);} catch (Exception e) {logger.warn("Timeout while generating test data.");} + String question = id2Question.get(i); + try + { + testData.id2LearnStatus.put(i,futures.get(i).get(30, TimeUnit.SECONDS)); + } + catch (InterruptedException e) + { + // logger.warn("Timeout while generating test data for question "+id2Question.get(i)+"."); + // testData.id2LearnStatus.put(i, LearnStatus.TIMEOUT); + throw new RuntimeException("question= "+question,e); + } + catch (ExecutionException e) + { + throw new RuntimeException("question="+question,e); + } + catch (TimeoutException e) + { + logger.warn("Timeout while generating test data for question "+question+"."); + testData.id2LearnStatus.put(i, LearnStatus.TIMEOUT); + } + } service.shutdown(); -// try{service.awaitTermination(10, TimeUnit.MINUTES);} catch (InterruptedException e) {throw new RuntimeException("Timeout while generating test data.");} - + // try{service.awaitTermination(10, TimeUnit.MINUTES);} catch (InterruptedException e) {throw new RuntimeException("Timeout while generating test data.");} + // try{service.invokeAll(todo);} catch (InterruptedException e) {throw new RuntimeException(e);} // logger.debug("generating query for question \""+question+"\", id "+i); // long start = System.currentTimeMillis(); @@ -552,6 +579,7 @@ public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, Set<String>> id2Answers = new ConcurrentSkipListMap<Integer, Set<String>>(); + public SortedMap<Integer, LearnStatus> id2LearnStatus = new ConcurrentSkipListMap<Integer, LearnStatus>(); private static final String persistancePath = "cache/"+SPARQLTemplateBasedLearner3Test.class.getSimpleName()+'/'+QueryTestData.class.getSimpleName(); @@ -580,7 +608,13 @@ public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} - for(int i:id2Query.keySet()) {id2Answers.put(i, getUris(endpoint, id2Query.get(i),cache));} + for(int i:id2Query.keySet()) + { + Set<String> uris = getUris(endpoint, id2Query.get(i),cache); + id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) + if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} + else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} + } return this; } } @@ -743,7 +777,7 @@ // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); // return kb; // } - private static class LearnQueryRunnable implements Runnable + private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; // private final String endpoint; @@ -756,7 +790,7 @@ static private final Options options = new Options(); - public LearnQueryRunnable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) + public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) { this.question=question; this.id=id; @@ -764,7 +798,7 @@ this.testData=testData; } - @Override public void run() + @Override public LearnStatus call() { logger.trace("learning question: "+question); try @@ -772,16 +806,19 @@ // learn query // TODO: change to knowledgebase parameter SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); + // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); learner.init(); + learner.setUseIdealTagger(true); learner.setQuestion(question); learner.learnSPARQLQueries(); - String learnedQuery = learner.getBestSPARQLQuery(); + String learnedQuery = learner.getBestSPARQLQuery(); + testData.id2Question.put(id, question); if(learnedQuery!=null&&!learnedQuery.isEmpty()) - { - testData.id2Question.put(id, question); + { testData.id2Query.put(id, learnedQuery); } + else {return LearnStatus.NO_QUERY_LEARNED;} logger.trace("learned query for question "+question+": "+learnedQuery); // Set<String> learnedURIs = getUris(DBPEDIA_LIVE_ENDPOINT_URL_STRING,learnedQuery); @@ -789,12 +826,15 @@ catch(NoTemplateFoundException e) { logger.warn(String.format("no template found for question \"%s\"",question)); + return LearnStatus.NO_TEMPLATE_FOUND; } catch(Exception e) { logger.error(String.format("Exception for question \"%s\": %s",question,e.getLocalizedMessage())); - e.printStackTrace(); - } + e.printStackTrace(); + return LearnStatus.EXCEPTION; + } + return LearnStatus.OK; } } @@ -813,7 +853,7 @@ { Set<String> removedStrings = new HashSet<String>(from); removedStrings.removeAll(to); - sb.append("<ul>"); + sb.append("<ul class='removed'>"); for(String removed: removedStrings) {sb.append("<li>"+removed+"</li>\n");} sb.append("</ul>\n"); @@ -821,11 +861,27 @@ return sb.toString(); } - private static String escapePre(String s) {return s.replace("<", "<").replace(">", "&rt;");} - + private static String escapePre(String s) {return s.replace("<", "<").replace(">", ">");} + + private static String getAnswerHTMLList(String[] answers) + { + StringBuilder sbAnswers = new StringBuilder(); + final int MAX = 10; + for(int i=0;i<answers.length;i++) + { + if(i>=MAX) + { + sbAnswers.append("["+(answers.length-i+1)+" more...]"); + break; + } + sbAnswers.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); + } + return sbAnswers.toString(); + } + /** Generates the HTML string content for one of the 3 colored bars which represent the correctly, incorrectly and unanswered question. * Also creates and links to a file which contains the questions.*/ - private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean htmlAndIncludeQueriesAndAnswers, Evaluation evaluation) + private static String createColoredColumn(/*@NonNull*/ File link,/*@NonNull*/ String title,/*@NonNull*/ String color,/*@NonNull*/ Collection<String> questions, int numberOfQuestionsTotal, boolean queriesAvailable, Evaluation evaluation) { final StringBuilder sb = new StringBuilder(); sb.append("<a href='"+link.getAbsolutePath()+"' title='"+title+"'>"); @@ -836,34 +892,38 @@ try { PrintWriter out = new PrintWriter(link); - Map<String,Integer> question2Id = new HashMap<String,Integer>(); - for(Integer i: evaluation.testData.id2Question.keySet()) {question2Id.put(evaluation.testData.id2Question.get(i),i);} - if(htmlAndIncludeQueriesAndAnswers) + final Map<String,Integer> question2Id = new HashMap<String,Integer>(); + // only the reference data contains entries for questions without answers + for(Integer i: evaluation.referenceData.id2Question.keySet()) {question2Id.put(evaluation.referenceData.id2Question.get(i),i);} + out.println("<!DOCTYPE html><html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); + if(queriesAvailable) { - out.println("<html>\n<head><title>"+title+"</title></head>\n<body>\n<table border='1'>"); - out.println("<tr><th>Question</th><th>Query</th><th>Answers</th></tr>"); + out.println("<tr><th>Question</th><th>Learned Query</th><th>Reference Query</th><th>Learned Answers</th><th>Reference Answers</th></tr>"); for(String question: questions) { - Integer id = question2Id.get(question); - String[] answers = evaluation.testData.id2Answers.get(id).toArray(new String[0]); - StringBuilder sb2 = new StringBuilder(); - final int MAX = 10; - for(int i=0;i<answers.length;i++) - { - if(i>=MAX) - { - sb2.append("["+(answers.length-i+1)+" more...]"); - break; - } - sb2.append("<li><a href='"+answers[i]+"'>"+answers[i].replace("http://dbpedia.org/resource/","dbpedia:")+"</a></li>\n"); - } - out.println("<tr><td>"+question+"</td><td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td><td><ul><code><pre>"+escapePre(sb2.toString())+"</pre></code></ul></td></tr>"); - } - - out.println("</table>\n</body>\n</html>"); + Integer id = question2Id.get(question); + if(evaluation.testData.id2Answers.get(id)==null) {System.err.println(question);continue;} + out.println( + "<tr><td>"+question+"</td>"+ + "<td><code><pre>"+escapePre(evaluation.testData.id2Query.get(id))+"</pre></code></td>"+ + "<td><code><pre>"+escapePre(evaluation.referenceData.id2Query.get(id))+"</pre></code></td>"+ + "<td><ul>"+getAnswerHTMLList(evaluation.testData.id2Answers.get(id).toArray(new String[0]))+"</ul></td>"+ + "<td><ul>"+getAnswerHTMLList(evaluation.referenceData.id2Answers.get(id).toArray(new String[0]))+"</ul></td></tr>"); + } } else - {for(String question: questions) {out.println(question);}} - out.close(); + { + out.println("<tr><th>Question</th><th>Error Type</th></tr>"); + for(String question: questions) + { + Integer id = question2Id.get(question); + if(id==null) {System.err.println(question);continue;} + out.println( + "<tr><td>"+question+"</td>"+ + "<td>"+evaluation.testData.id2LearnStatus.get(id)+"</td></tr>"); + } + } + out.println("</table>\n</body>\n</html>"); + out.close(); } catch (Exception e){throw new RuntimeException(e);} @@ -885,7 +945,7 @@ out.println("</style></head>"); out.println("<body>"); - out.println(diffHTML("Correctly Answered Questions", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); + out.println(diffHTML("Correctly Answered Questions (precision and recall = 1)", from.correctlyAnsweredQuestions, to.correctlyAnsweredQuestions)); out.println(diffHTML("Incorrectly Answered Questions", from.incorrectlyAnsweredQuestions, to.incorrectlyAnsweredQuestions)); out.println(diffHTML("Unanswered Questions", from.unansweredQuestions, to.unansweredQuestions)); out.println("</body>\n</html>"); @@ -928,7 +988,7 @@ sb2.append("<div style='width:100%;height:1em;border:solid 1px;'>"); sb2.append(createColoredColumn(new File(folder,"correctly_answered.html"), "Correctly Answered Questions", "green", e.correctlyAnsweredQuestions, e.numberOfQuestions,true,e)); sb2.append(createColoredColumn(new File(folder,"incorrectly_answered.html"), "Incorrectly Answered Questions", "orange", e.incorrectlyAnsweredQuestions, e.numberOfQuestions,true,e)); - sb2.append(createColoredColumn(new File(folder,"unanswered.txt"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); + sb2.append(createColoredColumn(new File(folder,"unanswered.html"), "Unanswered Questions", "red", e.unansweredQuestions, e.numberOfQuestions,false,e)); sb2.append("<span style='width:1000px;'></span>"); sb2.append("</td></tr>\n"); last = e; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-08-22 13:15:57
|
Revision: 3826 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3826&view=rev Author: kirdie Date: 2012-08-22 13:15:47 +0000 (Wed, 22 Aug 2012) Log Message: ----------- added oxford test but there are still errors thrown by the learner (lorenz please look at this). Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-13 06:15:24 UTC (rev 3825) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-22 13:15:47 UTC (rev 3826) @@ -22,7 +22,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; @@ -56,6 +55,7 @@ import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; import org.dllearner.common.index.SPARQLPropertiesIndex; import org.dllearner.common.index.VirtuosoDatatypePropertiesIndex; @@ -243,6 +243,12 @@ this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); } + public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) + { + this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + setMappingIndex(mappingBasedIndex); + } + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.model = model; this.resourcesIndex = resourcesIndex; Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-13 06:15:24 UTC (rev 3825) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-22 13:15:47 UTC (rev 3826) @@ -1,17 +1,21 @@ package org.dllearner.algorithm.tbsl.learning; import static org.junit.Assert.fail; +import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.PrintWriter; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; +import java.net.URL; import java.net.URLDecoder; import java.text.DateFormat; import java.util.Collection; @@ -26,6 +30,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.Stack; +import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ExecutionException; @@ -56,6 +61,10 @@ import org.dllearner.common.index.Index; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLClassesIndex; +import org.dllearner.common.index.SPARQLIndex; +import org.dllearner.common.index.VirtuosoClassesIndex; +import org.dllearner.common.index.VirtuosoResourcesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -70,6 +79,8 @@ import cern.colt.Arrays; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; @@ -91,30 +102,106 @@ // problem mit "In/IN which/WDT films/NNS did/VBD Julia/NNP Roberts/NNP as/RB well/RB as/IN Richard/NNP Gere/NNP play/NN" public class SPARQLTemplateBasedLearner3Test -{ - private static final boolean PRETAGGED = true; - +{ private static final File evaluationFolder = new File("cache/evaluation"); + private static final boolean DBPEDIA_PRETAGGED = true; + private static final boolean OXFORD_PRETAGGED = false; /*@Test*/ public void testDBpedia() throws Exception {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} - //@Test public void testOxford() {test(new File(""),"");} - @Test public void justTestTheLastWorkingOnesDBpedia() throws Exception + @Test public void testOxford() throws IOException + { + Model m = loadOxfordModel(); + List<String> questions = new LinkedList<String>(); + BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); + int j=0; + for(String line=in.readLine();line!=null;) + { + j++; + if(j>5) break; + if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} + } + in.close(); + SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); + Iterator<String> it = questions.iterator(); + for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} + MappingBasedIndex mappingIndex= new MappingBasedIndex( + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); + generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + } + + private Model loadOxfordModel() + { + // load it into a model because we can and it's faster and doesn't rely on endpoint availability + // the files are located in the paper svn under question-answering-iswc-2012/data + // ls *ttl | xargs -I @ echo \"@\", + final String[] rdf = { + "abbeys-sales-triple.ttl", + "andrewsonline-sales-triple.ttl", + "anker-sales-triple.ttl", + "bairstoweves-sales-triple.ttl", + "ballards-sales-triple.ttl", + "breckon-sales-triple.ttl", + "buckellandballard-sales-triple.ttl", + "carterjonas-sales.ttl", + "churchgribben-salse-triple.ttl", + "findaproperty-sales-triple.ttl", + "johnwood-sales-triple.ttl", + "martinco-letting-triples.ttl", + "scottfraser-letting-triples.ttl", + "scottfraser-sales-triples.ttl", + "scottsymonds-sales-triple.ttl", + "scrivenerandreinger-sales-triple.ttl", + "sequencehome-sales-triple.ttl", + "teampro-sales.ttl", + "thomasmerrifield-sales-triples.ttl", + "wwagency-letting-triple_with-XSD.ttl", + "wwagency-sales-triple_with-XSD.ttl", + // ls links/*ttl | xargs -I @ echo \"@\", + "links/allNear.ttl", + "links/all_walking_distance.ttl", + "links/lgd_data.ttl", + // ls schema/* | xargs -I @ echo \"@\", + "schema/goodRelations.owl", + "schema/LGD-Dump-110406-Ontology.nt", + "schema/ontology.ttl", + "schema/vCard.owl" + }; + Model m = ModelFactory.createDefaultModel(); + for(final String s:rdf) + { + // see http://jena.apache.org/documentation/javadoc/jena/com/hp/hpl/jena/rdf/model/Model.html#read%28java.io.InputStream,%20java.lang.String,%20java.lang.String%29 + String ending = s.substring(s.lastIndexOf('.')+1, s.length()); + String type = (ending.equals("ttl")||ending.equals("nt"))?"N3":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); + // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( + try{m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} + } + // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + // SparqlEndpoint.getEndpointDBpediaLiveAKSW(),dbpediaLiveCache); + return m; + } + + /*@Test*/ public void justTestTheLastWorkingOnesDBpedia() throws Exception { SortedMap<Long,Evaluation> evaluations; - + if((evaluations=Evaluation.read()).isEmpty()) { testDBpedia(); evaluations=Evaluation.read(); } - + Evaluation latestEvaluation = evaluations.get(evaluations.lastKey()); for(String question: latestEvaluation.correctlyAnsweredQuestions) { - LearnStatus status = new LearnQueryCallable(question, 0,new QueryTestData() , dbpediaLiveKnowledgebase).call(); + LearnStatus status = new LearnQueryCallable(question, 0,new QueryTestData() , dbpediaLiveKnowledgebase,DBPEDIA_PRETAGGED).call(); if(status.type!=LearnStatus.Type.OK) {fail("Failed with question \""+question+"\", query status: "+status);} } } @@ -165,7 +252,7 @@ logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); learnedTestData.generateAnswers(endpoint,cache); @@ -414,10 +501,16 @@ } // enum LearnStatus {OK, TIMEOUT,EXCEPTION,NO_TEMPLATE_FOUND,QUERY_RESULT_EMPTY, NO_QUERY_LEARNED;} - /** - * @return the test data containing those of the given questions for which queries were found and the results of the queries + /** + * @param id2Question + * @param kb either the kb or both the model and the index can be null. if the kb is null the model and index are used, else the kb is used. + * @param model can be null if the kb is not null + * @param index can be null if the kb is not null + * @return the test data containing those of the given questions for which queries were found and the results of the queries + * @throws MalformedURLException + * @throws ComponentInitException */ - private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb) throws MalformedURLException, ComponentInitException + private QueryTestData generateTestDataMultiThreaded(SortedMap<Integer, String> id2Question,Knowledgebase kb,Model model, MappingBasedIndex index,boolean pretagged) { QueryTestData testData = new QueryTestData(); // -- only create the learner parameters once to save time -- @@ -435,7 +528,8 @@ for(int i: id2Question.keySet()) {//if(i != 78)continue; - futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb))); + if(kb!=null) {futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,kb,pretagged)));} + else {futures.put(i,service.submit(new LearnQueryCallable(id2Question.get(i),i, testData,model,index,pretagged)));} } for(int i: id2Question.keySet()) {//if(i != 78)continue; @@ -688,6 +782,8 @@ } return this; } + + } private QueryTestData readQueries(final File file) @@ -840,58 +936,74 @@ // System.out.println(diff+" millis as a whole, "+diff/1000.0+" millis per run"); // } - // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) - // { - // URL url; - // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} - // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // private Knowledgebase createOxfordKnowledgebase(ExtractionDBCache cache) + // { + // URL url; + // try{url = new URL("http://lgd.aksw.org:8900/sparql");} catch(Exception e) {throw new RuntimeException(e);} + // SparqlEndpoint endpoint = new SparqlEndpoint(url, Collections.singletonList("http://diadem.cs.ox.ac.uk"), Collections.<String>emptyList()); + // + // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); + // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); + // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); + // MappingBasedIndex mappingIndex= new MappingBasedIndex( + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + // ); + // + // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); // - // SPARQLIndex resourcesIndex = new VirtuosoResourcesIndex(endpoint, cache); - // SPARQLIndex classesIndex = new VirtuosoClassesIndex(endpoint, cache); - // SPARQLIndex propertiesIndex = new VirtuosoPropertiesIndex(endpoint, cache); - // MappingBasedIndex mappingIndex= new MappingBasedIndex( - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), - // SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() - // ); - // - // Knowledgebase kb = new Knowledgebase(oxfordEndpoint, "Oxford - Real estate", "TODO", resourcesIndex, propertiesIndex, classesIndex, mappingIndex); - // return kb; - // } + // return kb; + // } private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; // private final String endpoint; private final int id; private final QueryTestData testData; - private final Knowledgebase knowledgeBase; - static private final PartOfSpeechTagger posTagger = PRETAGGED? null: new SynchronizedStanfordPartOfSpeechTagger(); + static private class POSTaggerHolder + {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} + static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); + private final boolean pretagged; + private final SPARQLTemplateBasedLearner2 learner; + public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase,boolean pretagged) + { + this.question=question; + this.id=id; + this.testData=testData; + this.pretagged=pretagged; + learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); + } - public LearnQueryCallable(String question, int id, QueryTestData testData, Knowledgebase knowledgeBase) + public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) { this.question=question; this.id=id; - this.knowledgeBase=knowledgeBase; this.testData=testData; + this.pretagged=pretagged; + MappingBasedIndex mappingIndex= new MappingBasedIndex( + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), + SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() + ); + + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); } + @Override public LearnStatus call() { logger.trace("learning question: "+question); try { // learn query - // TODO: change to knowledgebase parameter - SPARQLTemplateBasedLearner3 learner = new SPARQLTemplateBasedLearner3(createDBpediaLiveKnowledgebase(dbpediaLiveCache),posTagger,wordnet,options); - // SPARQLTemplateBasedLearner2 dbpediaLiveLearner = new SPARQLTemplateBasedLearner2(createDBpediaLiveKnowledgebase(dbpediaLiveCache)); - learner.init(); - learner.setUseIdealTagger(true); learner.setQuestion(question); learner.learnSPARQLQueries(); String learnedQuery = learner.getBestSPARQLQuery(); @@ -1170,4 +1282,4 @@ // } private static ResultSet executeSelect(SparqlEndpoint endpoint, String query, ExtractionDBCache cache){return SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query));} -} \ No newline at end of file +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |