From: <ki...@us...> - 2012-09-17 12:02:27
|
Revision: 3844 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3844&view=rev Author: kirdie Date: 2012-09-17 12:02:15 +0000 (Mon, 17 Sep 2012) Log Message: ----------- Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -494,6 +494,7 @@ max = a.getProminence(); } } + if(min==max) {return;} for(Allocation a : allocations){ double prominence = a.getProminence()/(max-min); a.setProminence(prominence); @@ -905,13 +906,14 @@ if(popularity == null){ popularity = Integer.valueOf(0); } + System.out.println(popularity); // if(cnt == 0){ // return 0; // } // return Math.log(cnt); - if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} + if(popularity!=popularity) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Added: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml =================================================================== (Binary files differ) Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/oxford_working_questions.xml ___________________________________________________________________ Added: svn:mime-type + application/xml Modified: trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt =================================================================== --- trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/main/resources/tbsl/oxford_dataproperty_mappings.txt 2012-09-17 12:02:15 UTC (rev 3844) @@ -1,4 +1,4 @@ http://www.w3.org/2006/vcard/ns#street-address|address, location, postal code http://purl.org/goodrelations/v1#description|description -http://purl.org/goodrelations/v1#hasPrice|has price, price +http://diadem.cs.ox.ac.uk/ontologies/real-estate#hasPrice|has price, price http://diadem.cs.ox.ac.uk/ontologies/real-estate#receptions|receptions, reception room, reception rooms \ No newline at end of file Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/OxfordEvaluation.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -12,6 +12,8 @@ import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.common.index.MappingBasedIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.VirtuosoClassesIndex; @@ -46,7 +48,10 @@ OxfordEvaluation.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); +// SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(endpoint, resourcesIndex, classesIndex, propertiesIndex); + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2 + (SPARQLTemplateBasedLearner3Test.loadOxfordModel(),SPARQLTemplateBasedLearner3Test.getOxfordMappingIndex(), new StanfordPartOfSpeechTagger()); + learner.setMappingIndex(mappingIndex); learner.init(); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -35,7 +35,7 @@ import org.xml.sax.SAXException; import com.hp.hpl.jena.rdf.model.Model; -class QueryTestData implements Serializable +public class QueryTestData implements Serializable { public SortedMap<Integer, String> id2Question = new ConcurrentSkipListMap<Integer, String>(); public SortedMap<Integer, String> id2Query = new ConcurrentSkipListMap<Integer, String>(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-12 22:56:17 UTC (rev 3843) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-09-17 12:02:15 UTC (rev 3844) @@ -81,7 +81,8 @@ import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP; -/** Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. +/** If you just want to test the standard queries, activate justTestTheLastWorkingOnesDBpedia() and testOxford(). + * Tests TSBL against the qald2 benchmark test data with the DBpedia endpoint. * The qald2 endpoint is not used because it may not always be available. * To speed up the process at first the test file is read and an updated copy of it is saved that * only contains the questions where the reference query does not return a nonempty list of resources. @@ -105,18 +106,66 @@ private static final boolean OXFORD_PRETAGGED = false; /*@Test*/ public void testDBpedia() throws Exception - {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), - SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + { + File file = generateTestDataIfNecessary( + new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), + SparqlEndpoint.getEndpointDBpedia(), + dbpediaLiveCache); + test("QALD 2 Benchmark ideally tagged", file,SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache,dbpediaLiveKnowledgebase,null,null); + } - /*@Test*/ public void testOxford() throws Exception + @Test public void testOxford() throws Exception { + File file = new File(getClass().getClassLoader().getResource("tbsl/evaluation/oxford_working_questions.xml").getFile()); + test("Oxford 19 working questions", file,null,null,null,loadOxfordModel(),getOxfordMappingIndex()); } - @Test public void generateXMLOxford() throws IOException +// /*@Test*/ public void testOxford() throws Exception +// { +// Model model = loadOxfordModel(); +// QueryTestData testData = QueryTestData.readQaldXml(new File("log/oxford_working_questions.xml")); +// // answers are not included at least in the first query TODO: check, why +// testData.generateAnswers(null, null, model); +// QueryTestData newTestData = generateTestDataMultiThreaded(testData.id2Question, null, model,getOxfordMappingIndex() , OXFORD_PRETAGGED); +// newTestData.generateAnswers(null, null, model); +// for(int i : testData.id2Question.keySet()) +// { +// logger.info("Comparing answers for question "+testData.id2Question.get(i)); +// String referenceQuery = testData.id2Query.get(i); +// String newQuery = newTestData.id2Query.get(i); +// if(!referenceQuery.equals(newQuery)) +// { +// logger.warn("not equal, reference query: "+referenceQuery+", new query: "+newQuery); +// Collection<String> referenceAnswers = testData.id2Answers.get(i); +// Collection<String> newAnswers = newTestData.id2Answers.get(i); +// if(!referenceAnswers.equals(newAnswers)) fail("not equal, reference answers: "+referenceAnswers+", new answers: "+newAnswers); +// } +// } +// } + + /** For debugging one question in particular. + */ + /*@Test*/ public void testSingleQueryOxford() { - boolean ADD_POS_TAGS = false; - PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Logger.getLogger(Templator.class).setLevel(Level.DEBUG); + Logger.getLogger(Parser.class).setLevel(Level.DEBUG); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.DEBUG); + // String question = "houses for less than 900000 pounds"; + String question = "houses/NNS for/IN less/JJR than/IN 900000/CD pounds/NNS"; + //question = new StanfordPartOfSpeechTagger().tag(question); + Model model = loadOxfordModel(); + QueryTestData testData = new QueryTestData(); + new LearnQueryCallable(question, 0, testData, model, getOxfordMappingIndex(), true).call(); + logger.info("learned query: "+testData.id2Query.get(0)); + } + + /*@Test*/ public void generateXMLOxford() throws IOException + { + boolean ADD_POS_TAGS = true; + PartOfSpeechTagger posTagger = null; + if(ADD_POS_TAGS) {posTagger=new StanfordPartOfSpeechTagger();} + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; @@ -132,18 +181,23 @@ SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); Iterator<String> it = questions.iterator(); for(int i=0;i<questions.size();i++) {id2Question.put(i, it.next());} - MappingBasedIndex mappingIndex= new MappingBasedIndex( + MappingBasedIndex mappingIndex= getOxfordMappingIndex(); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); + testData.writeQaldXml(new File("log/test.xml")); + } + + public static MappingBasedIndex getOxfordMappingIndex() + { + return new MappingBasedIndex( SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_class_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_resource_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); - testData.generateAnswers(null, null, model); - testData.writeQaldXml(new File("log/test.xml")); } - private Model loadOxfordModel() + public static Model loadOxfordModel() { // load it into a model because we can and it's faster and doesn't rely on endpoint availability // the files are located in the paper svn under question-answering-iswc-2012/data @@ -189,7 +243,7 @@ // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} - m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} + m.read(SPARQLTemplateBasedLearner3Test.class.getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } // test("Oxford evaluation questions", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), @@ -215,9 +269,10 @@ } } - public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException + public void test(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache,Knowledgebase kb, Model model, MappingBasedIndex index) + throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException, NoTemplateFoundException { - generateTestDataIfNecessaryAndEvaluateAndWrite(title,referenceXML,endpoint,cache); + evaluateAndWrite(title,referenceXML,endpoint,cache,kb,model,index); generateHTML(title); // if(evaluation.numberOfCorrectAnswers<3) {fail("only " + evaluation.numberOfCorrectAnswers+" correct answers.");} @@ -245,26 +300,31 @@ learnedTestData.write();*/ } - private void generateTestDataIfNecessaryAndEvaluateAndWrite(String title, final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException, ComponentInitException + private File generateTestDataIfNecessary(final File referenceXML,final SparqlEndpoint endpoint,ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException { String dir = "cache/"+getClass().getSimpleName()+"/"; - new File(dir).mkdirs(); File updatedReferenceXML=new File(dir+"updated_"+referenceXML.getName()); + if(!updatedReferenceXML.exists()) { - logger.info("Generating updated reference for "+title); + logger.info("Generating updated reference."); generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } + return updatedReferenceXML; + } + private void evaluateAndWrite(String title,final File updatedReferenceXML, final SparqlEndpoint endpoint,ExtractionDBCache cache, + Knowledgebase kb, Model model, MappingBasedIndex index) + { QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); logger.info(title+" subset loaded with "+referenceTestData.id2Question.size()+" questions."); long startLearning = System.currentTimeMillis(); - QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); + QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, kb,model,index,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache,null); + learnedTestData.generateAnswers(endpoint,cache,model); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -756,7 +816,7 @@ public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} -// if(endpoint==null) {throw new AssertionError("endpoint is null");} + // if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); @@ -851,6 +911,8 @@ // // return kb; // } + /** @author konrad + * Learns a query for a question and puts it into the given testData object. * */ private static class LearnQueryCallable implements Callable<LearnStatus> { private final String question; @@ -897,7 +959,7 @@ @Override public LearnStatus call() { - + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |