From: <ki...@us...> - 2012-08-23 14:35:44
|
Revision: 3835 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3835&view=rev Author: kirdie Date: 2012-08-23 14:35:33 +0000 (Thu, 23 Aug 2012) Log Message: ----------- more work on the oxford test. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -911,6 +911,7 @@ // return 0; // } // return Math.log(cnt); + if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -33,6 +33,7 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import com.hp.hpl.jena.rdf.model.Model; class QueryTestData implements Serializable { @@ -68,12 +69,12 @@ catch (ClassNotFoundException e){throw new RuntimeException(e);} } - public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) + public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} for(int i:id2Query.keySet()) { - Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache); + Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache,model); id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -50,6 +50,7 @@ import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.SynchronizedStanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; @@ -70,8 +71,11 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import cern.colt.Arrays; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -104,17 +108,25 @@ {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + /*@Test*/ public void testOxford() throws Exception + { + } + @Test public void generateXMLOxford() throws IOException { - Model m = loadOxfordModel(); + boolean ADD_POS_TAGS = false; + PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; for(String line;(line=in.readLine())!=null;) { j++; - if(j>1) break; - if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} + // if(j>5) break; // TODO: remove later + String question = line.replace("question: ", "").trim(); + if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} + if(!line.trim().isEmpty()) {questions.add(question);} } in.close(); SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); @@ -126,7 +138,8 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); testData.writeQaldXml(new File("log/test.xml")); } @@ -175,7 +188,7 @@ String type = (ending.equals("ttl")||ending.equals("nt"))?"TURTLE":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ -// m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} + // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } @@ -241,7 +254,7 @@ if(!updatedReferenceXML.exists()) { logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); @@ -251,7 +264,7 @@ QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache); + learnedTestData.generateAnswers(endpoint,cache,null); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -259,6 +272,11 @@ evaluation.write(); } + private void evaluateAndWrite() + { + + } + /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -542,7 +560,8 @@ } catch (ExecutionException e) { - throw new RuntimeException("question="+question,e); + testData.id2LearnStatus.put(i, new LearnStatus(LearnStatus.Type.EXCEPTION, e)); + //throw new RuntimeException("question="+question,e); } catch (TimeoutException e) { @@ -590,7 +609,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -629,7 +648,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - Set<String> uris = getUris(endpoint, query,cache); + Set<String> uris = getUris(endpoint, query,cache,model); if(!uris.isEmpty()) { // remove reference answers of the benchmark because they are obtained from an other endpoint @@ -687,7 +706,7 @@ // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; -// private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + // private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); @@ -725,7 +744,7 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); @@ -734,16 +753,21 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) + public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} - if(endpoint==null) {throw new AssertionError("endpoint is null");} +// if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); + ResultSet rs; // try{rs = qe.execSelect();} - try{rs = executeSelect(endpoint, query, cache);} + try + { + if(model!=null) {rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execSelect();} + else {rs = executeSelect(endpoint, query, cache);} + } catch(QueryExceptionHTTP e) { logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); @@ -780,7 +804,7 @@ } return uris; } - + private static String urlDecode(String url){ String decodedURL = null; try { @@ -836,7 +860,7 @@ static private class POSTaggerHolder {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} - + static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); private final SPARQLTemplateBasedLearner2 learner; @@ -848,6 +872,7 @@ this.testData=testData; learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) @@ -861,16 +886,18 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); learner.setUseDomainRangeRestriction(false); } @Override public LearnStatus call() - { + { + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <chr...@us...> - 2012-08-26 12:25:06
|
Revision: 3837 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3837&view=rev Author: christinaunger Date: 2012-08-26 12:24:54 +0000 (Sun, 26 Aug 2012) Log Message: ----------- very minor things... Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java Property Changed: ---------------- trunk/components-ext/src/main/resources/ Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/GrammarFilter.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -163,7 +163,7 @@ if (token.matches(anchor)) { foundCandidates = true; - coveredTokens.add(token); + coveredTokens.add(anchor.replace(".+","")); // DISAM String[] newTokenParts = new String[tokenParts.length]; @@ -243,7 +243,7 @@ buildSlotFor.add(new Pair<String,String>(word,s.trim().substring(s.indexOf("/")+1))); doubles.add(word); } else { - System.out.println("Oh no, " + s + " has no POS tag!"); + logger.error("Oh no, " + s + " has no POS tag!"); } } if (VERBOSE) logger.trace("build slot for: " + buildSlotFor + "\n"); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/templator/TemplatorHandler.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -17,6 +17,10 @@ basictemplator.setGrammarFiles(files); } + public void setVerbose(boolean b) { + templator.setVERBOSE(b); + } + public Set<Template> buildTemplates(String s) { return templator.buildTemplates(s); } Property changes on: trunk/components-ext/src/main/resources ___________________________________________________________________ Added: svn:ignore + .log4j.properties.swp Modified: trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex =================================================================== --- trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/main/resources/tbsl/lexicon/english_oxford.lex 2012-08-26 12:24:54 UTC (rev 3837) @@ -15,27 +15,32 @@ in walking distance from || (PP P:'in' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (NP NP* (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp]))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> at walking distance from || (PP P:'at' (NP N:'walking' N:'distance' P:'from' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/at_walking_distance ]> - in the area || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' DP[dp])))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> + in the area || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' DP[dp])))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> + in the area of || (NP NP* (PP P:'in' (DP DET:'the' (NP N:'area' (PP P:'of' DP[dp]))))) || <x,l1,<e,t>, [ l1:[ | SLOT_near(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_near/OBJECTPROPERTY/near]> in || (NP NP* (PP P:'in' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> on || (NP NP* (PP P:'on' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_location(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_location/PROPERTY/location^city^postal_code^address^street ]> since || (NP NP* (PP P:'since' DP[dp])) || <x,l1,<e,t>, [ l1:[ | SLOT_since(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_since/PROPERTY/since ]> - for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[v | SLOT_includes(v,x), SLOT_price(v,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> - between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_includes(v,x), SLOT_price(v,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price, SLOT_includes/OBJECTPROPERTY/includes ]> + for .+ pounds || (NP NP* (PP P:'for' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y) ] ], [ (l2,y,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than .+ pounds || (NP NP* (PP P:'for' DET:'more' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than .+ pounds || (NP NP* (PP P:'for' DET:'less' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for less than || (NP NP* (PP P:'for' DET:'less' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + for more than || (NP NP* (PP P:'for' DET:'more' DET:'than' NUM[num])) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greater(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than .+ pounds || (NP NP* (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1: [ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + cheaper than .+ pounds || (ADJ ADJ:'cheaper' DET:'than' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below .+ pounds || (NP NP* (PP P:'below' (NP NUM[num] N:'pounds'))) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + below .+ pounds || (PP P:'below' (NP NUM[num] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), less(y,z) ] ], [ (l2,z,num,e) ], [ l2=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + from .+ to .+ pounds || (NP NP* (PP P:'from' NUM[num1] P:'to' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/price ]> + between .+ and .+ pounds || (NP NP* (PP P:'between' NUM[num1] P:'and' NUM[num2] N:'pounds')) || <x,l1,<e,t>, [ l1:[ | SLOT_price(x,y), greaterorequal(y,n1), lessorequal(y,n2) ] ], [ (l2,n1,num1,e),(l3,n2,num2,e) ], [ l2=l1,l3=l1 ],[ SLOT_price/DATATYPEPROPERTY/prices ]> - with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> - with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> + with || (NP NP* (PP P:'with' DP[dp])) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> + with || (PP P:'with' DP[dp]) || <x,l1,<e,t>, [ l1:[ | empty(x,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[]> ;; <x,l1,<e,t>, [ l1:[ z | SLOT_description(x,z), regextoken(z,y) ] ], [ (l2,y,dp,<<e,t>,t>) ], [ l2=l1 ],[ SLOT_description/DATATYPEPROPERTY/description, SLOT_arg/LITERAL/z ]> square meters || (NP N:'square' N:'meters') || <x,l1,<e,t>, [l1:[ | SLOT_size(x,y) ]], [],[],[SLOT_size/DATATYPEPROPERTY/size ]> +// ADJECTIVES + + brand new || (NP ADJ:'brand' ADJ:'new' NP*) || <x,l1,<e,t>, [ l1:[ | ] ], [], [],[]> + // MONTHS january || (DP DP:'january') || <x,l1,<<e,t>,t>, [ l1:[ x | xsd:month(x,1) ]], [],[],[]> Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-08-25 17:06:30 UTC (rev 3836) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TestFrontend.java 2012-08-26 12:24:54 UTC (rev 3837) @@ -17,6 +17,7 @@ public static void main(String[] args) { TemplatorHandler handler = new TemplatorHandler(GRAMMAR_FILES); + handler.setVerbose(true); System.out.println("======= SPARQL Templator v0.1 ============="); System.out.print("\nMode: "); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-09-17 12:02:27
|
Revision: 3844 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3844&view=rev Author: kirdie Date: 2012-09-17 12:02:15 +0000 (Mon, 17 Sep 2012) Log Message: ----------- Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -494,6 +494,7 @@ max = a.getProminence(); } } + if(min==max) {return;} for(Allocation a : allocations){ double prominence = a.getProminence()/(max-min); a.setProminence(prominence); @@ -905,13 +906,14 @@ if(popularity == null){ popularity = Integer.valueOf(0); } + System.out.println(popularity); // if(cnt == 0){ // return 0; // } // return Math.log(cnt); - if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} + if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Added: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-17 12:02:15 UTC (rev 3844) @@ -1,4 +1,4 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code http://purl.org/goodrelations/v1#description|description -http://purl.org/goodrelations/v1#hasPrice|has price, price +http://diadem.cs.ox.ac.uk/ontologies/real-estate#hasPrice|has price, price http://diadem.cs.ox.ac.uk/ontologies/real-estate#receptions|receptions, reception room, reception rooms \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -12,6 +12,8 @@ import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.VirtuosoClassesIndex; @@ -46,7 +48,10 @@ OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); +// SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2 + (SPARQLTemplateBasedLearner3Test.loadOxfordModel(),SPARQLTemplateBasedLearner3Test.getOxfordMappingIndex(), new StanfordPartOfSpeechTagger()); + learner.setMappingIndex(mappingIndex); learner.init(); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -35,7 +35,7 @@ import org.xml.sax.SAXException; import com.hp.hpl.jena.rdf.model.Model; -class QueryTestData implements Serializable +public class QueryTestData implements Serializable { public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -81,7 +81,8 @@ import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; -/** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. +/** If you just want to test the standard queries, activate justTestTheLastWorkingOnesDBpedia() and testOxford(). + * Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. * The qald2 endpoint is not used because it may not always be available. * To speed up the process at first the test file is read and an updated copy of it is saved that * only contains the questions where the reference query does not return a nonempty list of resources. @@ -105,18 +106,66 @@ private static final boolean OXFORD_PRETAGGED = false; /*@Test*/ public void testDBpedia() throws Exception - {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), - SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + { + File file = generateTestDataIfNecessary( + new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + SparqlEndpoint.getEndpointDBpedia(), + dbpediaLiveCache); + test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); + } - /*@Test*/ public void testOxford() throws Exception + @Test public void testOxford() throws Exception { + File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); + test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } - @Test public void generateXMLOxford() throws IOException +// /*@Test*/ public void testOxford() throws Exception +// { +// Model model = loadOxfordModel(); +// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); +// // answers are not included at least in the first query TODO: check, why +// testData.generateAnswers(null, null, model); +// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); +// newTestData.generateAnswers(null, null, model); +// for(int i : testData.id2Question.keySet()) +// { +// logger.info("Comparing answers for question "+testData.id2Question.get(i)); +// String referenceQuery = testData.id2Query.get(i); +// String newQuery = newTestData.id2Query.get(i); +// if(!referenceQuery.equals(newQuery)) +// { +// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); +// Collection<String> referenceAnswers = testData.id2Answers.get(i); +// Collection<String> newAnswers = newTestData.id2Answers.get(i); +// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); +// } +// } +// } + + /** For debugging one question in particular. + */ + /*@Test*/ public void testSingleQueryOxford() { - boolean ADD_POS_TAGS = false; - PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "houses/NNS for/IN less/JJR than/IN 900000/CD pounds/NNS"; + //question = new StanfordPartOfSpeechTagger().tag(question); + Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, model, getOxfordMappingIndex(), true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + + /*@Test*/ public void generateXMLOxford() throws IOException + { + boolean ADD_POS_TAGS = true; + PartOfSpeechTagger posTagger = null; + if(ADD_POS_TAGS) {posTagger=new StanfordPartOfSpeechTagger();} + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; @@ -132,18 +181,23 @@ SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); Iterator<String> it = questions.iterator(); for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} - MappingBasedIndex mappingIndex= new MappingBasedIndex( + MappingBasedIndex mappingIndex= getOxfordMappingIndex(); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); + testData.writeQaldXml(new File("log/test.xml")); + } + + public static MappingBasedIndex getOxfordMappingIndex() + { + return new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); - testData.generateAnswers(null, null, model); - testData.writeQaldXml(new File("log/test.xml")); } - private Model loadOxfordModel() + public static Model loadOxfordModel() { // load it into a model because we can and it's faster and doesn't rely on endpoint availability // the files are located in the paper svn under question-answering-iswc-2012/data @@ -189,7 +243,7 @@ // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} - m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + m.read(SPARQLTemplateBasedLearner3Test.class.getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -215,9 +269,10 @@ } } - public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) + throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - generateTestDataIfNecessaryAndEvaluateAndWrite(title,referenceXML,endpoint,cache); + evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -245,26 +300,31 @@ learnedTestData.write();*/ } - private void generateTestDataIfNecessaryAndEvaluateAndWrite(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException + private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { String dir = "cache/"+getClass().getSimpleName()+"/"; - new File(dir).mkdirs(); File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) { - logger.info("Generating updated reference for "+title); + logger.info("Generating updated reference."); generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } + return updatedReferenceXML; + } + private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, + Knowledgebase kb, Model model, MappingBasedIndex index) + { QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, kb,model,index,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache,null); + learnedTestData.generateAnswers(endpoint,cache,model); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -756,7 +816,7 @@ public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} -// if(endpoint==null) {throw new AssertionError("endpoint is null");} + // if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); @@ -851,6 +911,8 @@ // // return kb; // } + /** @author konrad + * Learns a query for a question and puts it into the given testData object. * */ private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; @@ -897,7 +959,7 @@ @Override public LearnStatus call() { - + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ki...@us...> - 2012-10-30 16:10:51
|
Revision: 3861 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3861&view=rev Author: kirdie Date: 2012-10-30 16:10:36 +0000 (Tue, 30 Oct 2012) Log Message: ----------- corrected the oxford benchmarking. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-10-30 12:53:54 UTC (rev 3860) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-10-30 16:10:36 UTC (rev 3861) @@ -1,27 +1,19 @@ package org.dllearner.algorithm.tbsl.learning; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URL; +import hmm.HiddenMarkovModel; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import org.apache.commons.collections15.MultiMap; import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; @@ -31,13 +23,7 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Filter; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Pair; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -46,13 +32,10 @@ import org.dllearner.algorithm.tbsl.util.Knowledgebase; import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; -import org.dllearner.common.index.SOLRIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -63,19 +46,13 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; - import com.hp.hpl.jena.ontology.OntModelSpec; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -85,10 +62,6 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.expr.ExprAggregator; -import com.hp.hpl.jena.sparql.expr.ExprVar; -import com.hp.hpl.jena.sparql.expr.aggregate.AggCount; -import com.hp.hpl.jena.sparql.expr.aggregate.Aggregator; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -516,358 +489,88 @@ return relevantKeywords; } - private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.debug("Generating SPARQL query candidates..."); - - Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { - - @Override - public int compare(Slot o1, Slot o2) { - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; + // just for testing the HMM integration, getWeightedSPARQLQueriesOld is the original one + private SortedSet<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates) + { + // for testing + for(Template template: templates) + { + { + ArrayList<String> keywords = new ArrayList<String>(); + for(Slot slot: template.getSlots()) + { + keywords.add(slot.getWords().get(0)); } + if(template.getSlots().size()!=3) {continue;} + if(!keywords.contains("Mean Hamster Software")) {continue;} + if(!keywords.contains("published")) {continue;} + System.out.println("\"keywords\": "+keywords); } - }); - slot2Allocations = Collections.synchronizedMap(new HashMap<Slot, Set<Allocation>>()); - - - SortedSet<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); - boolean containsRegex = t.getQuery().toString().toLowerCase().contains("(regex("); - - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<Map<Slot, SortedSet<Allocation>>>> list = new ArrayList<Future<Map<Slot, SortedSet<Allocation>>>>(); - - long startTime = System.currentTimeMillis(); - - for (Slot slot : t.getSlots()) { - if(!slot2Allocations.containsKey(slot)){//System.out.println(slot + ": " + slot.hashCode());System.out.println(slot2Allocations); - Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); - Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); - list.add(submit); - } - } - - for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { - try { - Map<Slot, SortedSet<Allocation>> result = future.get(); - Entry<Slot, SortedSet<Allocation>> item = result.entrySet().iterator().next(); - slot2Allocations.put(item.getKey(), item.getValue()); - } catch (InterruptedException e) { - e.printStackTrace(); - } catch (ExecutionException e) { - e.printStackTrace(); + System.out.println(template); + SortedSet<WeightedQuery> queries = new TreeSet<WeightedQuery>(); + Query query = template.getQuery(); + double score = 0; + + Map<List<String>,Collection<String>> segmentToURIs = new HashMap<List<String>,Collection<String>>(); + for(Slot slot: template.getSlots()) + { + List<String> segment = new LinkedList<String>(); + segment.add(slot.getWords().get(0)); // TODO: split it up? + + Set<String> uris = new HashSet<String>(); + + for(IndexResultItem item : getIndexResultItems(slot)) + { + uris.add(item.getUri()); } + segmentToURIs.put(segment,uris); } + HiddenMarkovModel hmm = new HiddenMarkovModel(); + hmm.initialization(); + hmm.startMarkovModel(segmentToURIs,true); + MultiMap<Double,List<String>> paths = hmm.getPaths(); - executor.shutdown(); + // System.out.println(hmm.getPaths()); + // die keywords jetzt in sadeehs algorithmus reinwerfen + // da kommen jetzt pfade raus mit unterschiedlichen wahrscheinlichkeiten + // HiddenMarkovModel HMM = new HiddenMarkovModel(); + // HMM.StartMarkovModel(); + // jetzt die variablen aus der query ersetzen mit den kandidaten + // ranked list der pfade, die die observation sequence generieren - - /*for(Slot slot : t.getSlots()){ - allocations = slot2Allocations2.get(slot); - if(allocations == null){ - allocations = computeAllocations(slot, 10); - slot2Allocations2.put(slot, allocations); - } - slot2Allocations.put(slot, allocations); - - //for tests add the property URI with http://dbpedia.org/property/ namespace - //TODO should be replaced by usage of a separate SOLR index - Set<Allocation> tmp = new HashSet<Allocation>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(Allocation a : allocations){ - String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); - Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); - newA.setScore(a.getScore()-0.000001); - tmp.add(newA); + for(Double d : paths.keySet()) + { + for(List<String> path : paths.get(d)) + { + Query q = new Query(query); + // TODO: which variable stands for which resource? do it randomly now to check if the replacement works and then correct the order later + System.out.println(q.getVariablesAsStringList()); + System.out.println(); + int i = 0; + for(String var : q.getVariablesAsStringList()) + { + q.replaceVarWithURI(var, path.get(i)); + i++; } - } - allocations.addAll(tmp); - }*/ - logger.debug("Time needed: " + (System.currentTimeMillis() - startTime) + "ms"); + System.out.println(q); - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); + WeightedQuery wQuery = new WeightedQuery(q, score); + queries.add(wQuery); } } - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY || slot.getSlotType() == SlotType.DATATYPEPROPERTY){ - sortedSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - //add for each SYMPROPERTY Slot the reversed query - for(Slot slot : sortedSlots){ - for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ - Query reversedQuery = new Query(wQ.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - tmp.add(new WeightedQuery(reversedQuery)); - } - tmp.add(wQ); - } - queries.clear(); - queries.addAll(tmp); - tmp.clear(); - } + //System.exit(0); + return queries; + // >> SLOTS: + // y0: RESOURCE {Mean Hamster Software} + // p0: OBJECTPROPERTY {published,print} + // p1: CLASS {video games} - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - boolean drop = false; - if(useDomainRangeRestriction){ - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); - // System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ - // System.out.println(typeTriple); - if(true){//reasoner.isObjectProperty(a.getUri())){ - Description range = reasoner.getRange(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(range != null){ - Set<Description> allRanges = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(range instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(range); - allRanges.addAll(superClasses); - } else { - for(Description nc : range.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allRanges.addAll(superClasses); - } - } - allRanges.add(range); - allRanges.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ - Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); - // System.out.println(a); - if(domain != null){ - Set<Description> allDomains = new HashSet<Description>(); - SortedSet<Description> superClasses; - if(domain instanceof NamedClass){ - superClasses = reasoner.getSuperClasses(domain); - allDomains.addAll(superClasses); - } else { - for(Description nc : domain.getChildren()){ - superClasses = reasoner.getSuperClasses(nc); - allDomains.addAll(superClasses); - } - } - allDomains.add(domain); - allDomains.remove(new NamedClass(Thing.instance.getURI())); - - Set<Description> allTypes = new HashSet<Description>(); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Description type = new NamedClass(typeURI); - superClasses = reasoner.getSuperClasses(type); - allTypes.addAll(superClasses); - allTypes.add(type); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - - } - } - } - } - } - } - - if(!drop){ - if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - } - } else { - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - } - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); - } - - - } - } - //lower queries with FILTER-REGEX - if(containsRegex){ - for(WeightedQuery wQ : tmp){ - wQ.setScore(wQ.getScore() - 0.01); - } - } - - queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); - tmp.clear(); - } else {//Add REGEX FILTER if resource slot is empty and predicate is datatype property - if(slot.getSlotType() == SlotType.RESOURCE){ - for(WeightedQuery query : queries){ - Query q = query.getQuery(); - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - SPARQL_Value object = triple.getValue(); - if(object.isVariable() && object.getName().equals(slot.getAnchor())){//only consider triple where SLOT is in object position - SPARQL_Property predicate = triple.getProperty(); - if(!predicate.isVariable()){//only consider triple where predicate is URI - String predicateURI = predicate.getName().replace("<", "").replace(">", ""); - if(isDatatypeProperty(predicateURI)){//if data property - q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - object, "'" + slot.getWords().get(0) + "'", SPARQL_PairType.REGEX))); - } - } - } - } - - } - - } else { - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - for(WeightedQuery wQ : queries){ - List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor()); - for(SPARQL_Triple triple : triples){ - String typeVar; - String resourceURI; - SymPropertyDirection direction; - if(triple.getValue().isVariable()){ - direction = SymPropertyDirection.VAR_RIGHT; - typeVar = triple.getValue().getName(); - resourceURI = triple.getVariable().getName(); - } else { - direction = SymPropertyDirection.VAR_LEFT; - typeVar = triple.getVariable().getName(); - resourceURI = triple.getValue().getName(); - } - resourceURI = resourceURI.replace("<", "").replace(">", ""); - List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); - for(SPARQL_Triple typeTriple : typeTriples){ - String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); - // List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); - // for(Entry<String, Integer> property : mostFrequentProperties){ - // wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); - // wQ.setScore(wQ.getScore() + 0.1); - // } - } - - } - } - } - } - // else if(slot.getSlotType() == SlotType.CLASS){ - // String token = slot.getWords().get(0); - // if(slot.getToken().contains("house")){ - // String regexToken = token.replace("houses", "").replace("house", "").trim(); - // try { - // Map<Slot, SortedSet<Allocation>> ret = new SlotProcessor(new Slot(null, SlotType.CLASS, Collections.singletonList("house"))).call(); - // SortedSet<Allocation> alloc = ret.entrySet().iterator().next().getValue(); - // if(alloc != null && !alloc.isEmpty()){ - // String uri = alloc.first().getUri(); - // for(WeightedQuery query : queries){ - // Query q = query.getQuery(); - // for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - // SPARQL_Term subject = triple.getVariable(); - // SPARQL_Term object = new SPARQL_Term("desc"); - // object.setIsVariable(true); - // object.setIsURI(false); - // q.addCondition(new SPARQL_Triple(subject, new SPARQL_Property("<http://purl.org/goodrelations/v1#description>"), object)); - // q.addFilter(new SPARQL_Filter(new SPARQL_Pair( - // object, "'" + regexToken + "'", SPARQL_PairType.REGEX))); - // } - // q.replaceVarWithURI(slot.getAnchor(), uri); - // - // } - // } - // } catch (Exception e) { - // e.printStackTrace(); - // } - // } - // } - - - } - - } - for (Iterator<WeightedQuery> iterator = queries.iterator(); iterator.hasNext();) { - WeightedQuery wQ = iterator.next(); - if(dropZeroScoredQueries){ - if(wQ.getScore() <= 0){ - iterator.remove(); - } - } else { - if(t.getSlots().size()==0) throw new AssertionError("no slots for query "+wQ); - wQ.setScore(wQ.getScore()/t.getSlots().size()); - } - - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); + // System.out.println(template); } - logger.debug("...done in "); - return allQueries; + // + return null; } private double getProminenceValue(String uri, SlotType type){ @@ -1142,101 +845,51 @@ } - class SlotProcessor implements Callable<Map<Slot, SortedSet<Allocation>>>{ - - private Slot slot; - - public SlotProcessor(Slot slot) { - this.slot = slot; - } - - @Override - public Map<Slot, SortedSet<Allocation>> call() throws Exception { - Map<Slot, SortedSet<Allocation>> result = new HashMap<Slot, SortedSet<Allocation>>(); - result.put(slot, computeAllocations(slot)); - return result; - } - - private SortedSet<Allocation> computeAllocations(Slot slot){ - logger.debug("Computing allocations for slot: " + slot); - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - Index index = getIndexBySlotType(slot); - - IndexResultSet rs; - for(String word : slot.getWords()){ - rs = new IndexResultSet(); - if(mappingIndex != null){ - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - rs.add(mappingIndex.getClassesWithScores(word)); - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - rs.add(mappingIndex.getPropertiesWithScores(word)); - } else if(type == SlotType.DATATYPEPROPERTY){ - rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); - } else if(type == SlotType.OBJECTPROPERTY){ - rs.add(mappingIndex.getObjectPropertiesWithScores(word)); - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - rs.add(mappingIndex.getResourcesWithScores(word)); - } + private Set<IndexResultItem> getIndexResultItems(Slot slot) + { +// List<String> uris = new LinkedList<String>(); + Set<IndexResultItem> indexResultItems = new HashSet<IndexResultItem>(); + + Index index = getIndexBySlotType(slot); + + for(String word : slot.getWords()) + { + IndexResultSet rs = new IndexResultSet(); + if(mappingIndex != null){ + SlotType type = slot.getSlotType(); + if(type == SlotType.CLASS){ + rs.add(mappingIndex.getClassesWithScores(word)); + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + rs.add(mappingIndex.getPropertiesWithScores(word)); + } else if(type == SlotType.DATATYPEPROPERTY){ + rs.add(mappingIndex.getDatatypePropertiesWithScores(word)); + } else if(type == SlotType.OBJECTPROPERTY){ + rs.add(mappingIndex.getObjectPropertiesWithScores(word)); + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + rs.add(mappingIndex.getResourcesWithScores(word)); } - //use the non manual indexes only if mapping based resultset is not empty and option is set - if(!useManualMappingsIfExistOnly || rs.isEmpty()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 20)); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - rs.add(index.getResourcesWithScores(word, 20)); + } + //use the non manual indexes only if mapping based resultset is not empty and option is set + if(!useManualMappingsIfExistOnly || rs.isEmpty()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs.add(index.getResourcesWithScores(word, 20)); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); } + rs.add(index.getResourcesWithScores(word, 20)); } - - - for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - // //get the labels of the redirects and compute the highest similarity - // if(slot.getSlotType() == SlotType.RESOURCE){ - // Set<String> labels = getRedirectLabels(item.getUri()); - // for(String label : labels){ - // double tmp = Similarity.getSimilarity(word, label); - // if(tmp > similarity){ - // similarity = tmp; - // } - // } - // } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - } - - normProminenceValues(allocations); - - computeScore(allocations); - logger.debug("Found " + allocations.size() + " allocations for slot " + slot); - return new TreeSet<Allocation>(allocations); +// for(IndexResultItem item: rs.getItems()) +// { +// uris.add(item.getUri()); +// } + indexResultItems.addAll(rs.getItems()); } - - private Index getIndexBySlotType(Slot slot){ - Index index = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - index = classesIndex; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - index = propertiesIndex; - } else if(type == SlotType.DATATYPEPROPERTY){ - index = datatypePropertiesIndex; - } else if(type == SlotType.OBJECTPROPERTY){ - index = objectPropertiesIndex; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - index = resourcesIndex; - } - return index; - } - + return indexResultItems; } + public String getTaggedInput() { if(templateGenerator==null) {throw new AssertionError("Learner not initialized. Please call init();");} @@ -1255,33 +908,33 @@ return isDatatypeProperty; } - /** - * @param args - * @throws NoTemplateFoundException - * @throws IOException - * @throws FileNotFoundException - * @throws InvalidFileFormatException - */ - public static void main(String[] args) throws Exception { - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), - Collections.<String>singletonList(""), Collections.<String>emptyList()); - Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); - Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); - Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // /** + // * @param args + // * @throws NoTemplateFoundException + // * @throws IOException + // * @throws FileNotFoundException + // * @throws InvalidFileFormatException + // */ + // public static void main(String[] args) throws Exception { + // SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), + // Collections.<String>singletonList(""), Collections.<String>emptyList()); + // Index resourcesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_resources"); + // Index classesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_classes"); + // Index propertiesIndex = new SOLRIndex("http://139.18.2.173:8080/solr/dbpedia_properties"); + // + // SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + // learner.init(); + // + // String question = "What is the highest mountain?"; + // + // learner.setQuestion(question); + // learner.learnSPARQLQueries(); + // System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); + // System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); + // System.out.println(learner.getLearnedPosition()); + // + // } - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); - learner.init(); - String question = "What is the highest mountain?"; - learner.setQuestion(question); - learner.learnSPARQLQueries(); - System.out.println("Learned query:\n" + learner.getBestSPARQLQuery()); - System.out.println("Lexical answer type is: " + learner.getTemplates().iterator().next().getLexicalAnswerType()); - System.out.println(learner.getLearnedPosition()); - - } - - - } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-10-30 12:53:54 UTC (rev 3860) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2012-10-30 16:10:36 UTC (rev 3861) @@ -58,7 +58,7 @@ unions = new HashSet<SPARQL_Union>(); } - //copy constructor + /** copy constructor*/ public Query(Query query){ this.qt = query.getQt(); Set<SPARQL_Term> selTerms = new HashSet<SPARQL_Term>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-10-30 12:53:54 UTC (rev 3860) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-10-30 16:10:36 UTC (rev 3861) @@ -114,7 +114,7 @@ test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); } - @Test public void testOxford() throws Exception + /*@Test*/ public void testOxford() throws Exception { File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); @@ -160,6 +160,26 @@ logger.info("learned query: "+testData.id2Query.get(0)); } + /** For debugging one question in particular. + */ + @Test public void testSingleQueryDBpedia() + { +// Logger.getLogger(Templator.class).setLevel(Level.DEBUG); +// Logger.getLogger(Parser.class).setLevel(Level.DEBUG); +// Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "Give/VB me/PRP all/DT video/JJ games/NNS published/VBN by/IN Mean/NNP Hamster/NNP Software/NNP"; +// String question = "give me all video games published by mean hamster software"; +// String question = "Give me all video games published by Mean Hamster Software"; +// question = new StanfordPartOfSpeechTagger().tag(question); +// System.out.println(question); + +// Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, dbpediaLiveKnowledgebase, true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + /*@Test*/ public void generateXMLOxford() throws IOException { boolean ADD_POS_TAGS = true; @@ -935,6 +955,7 @@ learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} learner.setUseIdealTagger(pretagged); + learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex"}); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-01-22 09:41:05
|
Revision: 3882 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3882&view=rev Author: lorenz_b Date: 2013-01-22 09:40:58 +0000 (Tue, 22 Jan 2013) Log Message: ----------- Moving QTL algorithm to core module. Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/ trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2013-02-25 11:55:12
|
Revision: 3903 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3903&view=rev Author: lorenz_b Date: 2013-02-25 11:55:05 +0000 (Mon, 25 Feb 2013) Log Message: ----------- Removed TBSL algorithm. Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/ trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultItemComparator.java trunk/components-ext/src/test/java/org/dllearner/algorithm/ Deleted: trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultItemComparator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultItemComparator.java 2013-02-25 11:47:56 UTC (rev 3902) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/IndexResultItemComparator.java 2013-02-25 11:55:05 UTC (rev 3903) @@ -1,48 +0,0 @@ -package org.dllearner.common.index; - -import java.util.Comparator; -import java.util.HashMap; -import java.util.Map; - -import org.dllearner.algorithm.tbsl.util.Similarity; - -public class IndexResultItemComparator implements Comparator<IndexResultItem>{ - private String s; - private Map<String, Double> cache; - - public IndexResultItemComparator(String s) { - this.s = s; - cache = new HashMap<String, Double>(); - } - - @Override - public int compare(IndexResultItem item1, IndexResultItem item2) { - - double sim1 = 0; - if(cache.containsKey(item1.getLabel())){ - sim1 = cache.get(item1.getLabel()); - } else { - sim1 = Similarity.getSimilarity(s, item1.getLabel()); - cache.put(item1.getLabel(), sim1); - } - double sim2 = 0; - if(cache.containsKey(item2.getLabel())){ - sim2 = cache.get(item2.getLabel()); - } else { - sim2 = Similarity.getSimilarity(s, item2.getLabel()); - cache.put(item2.getLabel(), sim2); - } - - if(sim1 < sim2){ - return 1; - } else if(sim1 > sim2){ - return -1; - } else { - int val = item1.getLabel().compareTo(item2.getLabel()); - if(val == 0){ - return item1.getUri().compareTo(item2.getUri()); - } - return val; - } - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |