From: <ki...@us...> - 2012-08-23 14:35:44
|
Revision: 3835 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3835&view=rev Author: kirdie Date: 2012-08-23 14:35:33 +0000 (Thu, 23 Aug 2012) Log Message: ----------- more work on the oxford test. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -911,6 +911,7 @@ // return 0; // } // return Math.log(cnt); + if(Double.isNaN(popularity)) {throw new AssertionError("prominence NaN for uri "+uri+", slot type "+type);} return popularity; } Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/QueryTestData.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -33,6 +33,7 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import com.hp.hpl.jena.rdf.model.Model; class QueryTestData implements Serializable { @@ -68,12 +69,12 @@ catch (ClassNotFoundException e){throw new RuntimeException(e);} } - public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache) + public QueryTestData generateAnswers(SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) { if(!id2Answers.isEmpty()) {throw new AssertionError("Answers already existing.");} for(int i:id2Query.keySet()) { - Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache); + Set<String> uris = SPARQLTemplateBasedLearner3Test.getUris(endpoint, id2Query.get(i),cache,model); id2Answers.put(i, uris); // empty answer set better transfers intended meaning and doesn't cause NPEs in html generation :-) if(!uris.isEmpty()) {/*id2Answers.put(i, uris);*/} else {id2LearnStatus.put(i, LearnStatus.QUERY_RESULT_EMPTY);} Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 13:19:03 UTC (rev 3834) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-08-23 14:35:33 UTC (rev 3835) @@ -50,6 +50,7 @@ import org.apache.log4j.SimpleLayout; import org.dllearner.algorithm.tbsl.ltag.parser.Parser; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; +import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.SynchronizedStanfordPartOfSpeechTagger; import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.templator.Templator; @@ -70,8 +71,11 @@ import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import cern.colt.Arrays; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -104,17 +108,25 @@ {test("QALD 2 Benchmark ideally tagged", new File(getClass().getClassLoader().getResource("tbsl/evaluation/qald2-dbpedia-train-tagged(ideal).xml").getFile()), SparqlEndpoint.getEndpointDBpedia(),dbpediaLiveCache);} + /*@Test*/ public void testOxford() throws Exception + { + } + @Test public void generateXMLOxford() throws IOException { - Model m = loadOxfordModel(); + boolean ADD_POS_TAGS = false; + PartOfSpeechTagger posTagger = new StanfordPartOfSpeechTagger(); + Model model = loadOxfordModel(); List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries.txt")))); int j=0; for(String line;(line=in.readLine())!=null;) { j++; - if(j>1) break; - if(!line.isEmpty()) {questions.add(line.replace("question: ", ""));} + // if(j>5) break; // TODO: remove later + String question = line.replace("question: ", "").trim(); + if(ADD_POS_TAGS&&!OXFORD_PRETAGGED) {question = posTagger.tag(question);} + if(!line.trim().isEmpty()) {questions.add(question);} } in.close(); SortedMap<Integer,String> id2Question = new TreeMap<Integer, String>(); @@ -126,7 +138,8 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,m,mappingIndex,OXFORD_PRETAGGED); + QueryTestData testData = generateTestDataMultiThreaded(id2Question, null,model,mappingIndex,ADD_POS_TAGS||OXFORD_PRETAGGED); + testData.generateAnswers(null, null, model); testData.writeQaldXml(new File("log/test.xml")); } @@ -175,7 +188,7 @@ String type = (ending.equals("ttl")||ending.equals("nt"))?"TURTLE":ending.equals("owl")?"RDF/XML":String.valueOf(Integer.valueOf("filetype "+ending+" not handled.")); // switch(type) {case "ttl":type="TURTLE";break;case "owl":type="RDF/XML";break;default:throw new RuntimeException("filetype "+ending+" not handled.");} // no Java 1.7 :-( try{ -// m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} + // m.read(new FileInputStream(new File("/home/lorenz/arbeit/papers/question-answering-iswc-2012/data/"+s)), null, type);}catch (FileNotFoundException e) {} m.read(getClass().getClassLoader().getResourceAsStream("oxford/"+s),null, type);} catch(RuntimeException e) {throw new RuntimeException("Could not read into model: "+s,e);} } @@ -241,7 +254,7 @@ if(!updatedReferenceXML.exists()) { logger.info("Generating updated reference for "+title); - generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache); + generateUpdatedXML(referenceXML,updatedReferenceXML,endpoint,cache,null); } QueryTestData referenceTestData = QueryTestData.readQaldXml(updatedReferenceXML); @@ -251,7 +264,7 @@ QueryTestData learnedTestData = generateTestDataMultiThreaded(referenceTestData.id2Question, dbpediaLiveKnowledgebase,null,null,DBPEDIA_PRETAGGED); long endLearning = System.currentTimeMillis(); logger.info("finished learning after "+(endLearning-startLearning)/1000.0+"s"); - learnedTestData.generateAnswers(endpoint,cache); + learnedTestData.generateAnswers(endpoint,cache,null); long endGeneratingAnswers = System.currentTimeMillis(); logger.info("finished generating answers in "+(endGeneratingAnswers-endLearning)/1000.0+"s"); Evaluation evaluation = evaluate(referenceTestData, learnedTestData); @@ -259,6 +272,11 @@ evaluation.write(); } + private void evaluateAndWrite() + { + + } + /** evaluates a data set against a reference. * @param reference the test data assumed to be correct. needs to contain the answers for all queries. * @param suspect the test data to compare with the reference. @@ -542,7 +560,8 @@ } catch (ExecutionException e) { - throw new RuntimeException("question="+question,e); + testData.id2LearnStatus.put(i, new LearnStatus(LearnStatus.Type.EXCEPTION, e)); + //throw new RuntimeException("question="+question,e); } catch (TimeoutException e) { @@ -590,7 +609,7 @@ * @throws SAXException * @throws TransformerException */ - private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache) throws ParserConfigurationException, SAXException, IOException, TransformerException + private void generateUpdatedXML(File originalFile, File updatedFile,SparqlEndpoint endpoint, ExtractionDBCache cache,Model model) throws ParserConfigurationException, SAXException, IOException, TransformerException { logger.info(String.format("Updating question file \"%s\" by removing questions without nonempty resource list answer and adding answers.\n" + " Saving the result to file \"%s\"",originalFile.getPath(),updatedFile.getPath())); @@ -629,7 +648,7 @@ if(!query.equals("OUT OF SCOPE")) // marker in qald benchmark file, will create holes interval of ids (e.g. 1,2,5,7) { - Set<String> uris = getUris(endpoint, query,cache); + Set<String> uris = getUris(endpoint, query,cache,model); if(!uris.isEmpty()) { // remove reference answers of the benchmark because they are obtained from an other endpoint @@ -687,7 +706,7 @@ // private SPARQLTemplateBasedLearner2 oxfordLearner; // private SPARQLTemplateBasedLearner2 dbpediaLiveLearner; -// private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); + // private final ExtractionDBCache oxfordCache = new ExtractionDBCache("cache"); private final static ExtractionDBCache dbpediaLiveCache = new ExtractionDBCache("cache"); private final Knowledgebase dbpediaLiveKnowledgebase = createDBpediaLiveKnowledgebase(dbpediaLiveCache); @@ -725,7 +744,7 @@ Logger.getRootLogger().setLevel(Level.WARN); Logger.getLogger(Templator.class).setLevel(Level.WARN); Logger.getLogger(Parser.class).setLevel(Level.WARN); - Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); + Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.WARN); // Logger.getLogger(SPARQLTemplateBasedLearner2.class).setLevel(Level.INFO); logger.setLevel(Level.INFO); // TODO: remove when finishing implementation of this class logger.addAppender(new FileAppender(new SimpleLayout(), "log/"+this.getClass().getSimpleName()+".log", false)); @@ -734,16 +753,21 @@ // oxfordLearner = new SPARQLTemplateBasedLearner2(createOxfordKnowledgebase(oxfordCache)); } - public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache) + public static Set<String> getUris(final SparqlEndpoint endpoint, final String query, ExtractionDBCache cache, Model model) { if(query==null) {throw new AssertionError("query is null");} - if(endpoint==null) {throw new AssertionError("endpoint is null");} +// if(endpoint==null) {throw new AssertionError("endpoint is null");} if(!query.contains("SELECT")&&!query.contains("select")) {return Collections.<String>emptySet();} // abort when not a select query Set<String> uris = new HashSet<String>(); // QueryEngineHTTP qe = new QueryEngineHTTP(DBPEDIA_LIVE_ENDPOINT_URL_STRING, query); + ResultSet rs; // try{rs = qe.execSelect();} - try{rs = executeSelect(endpoint, query, cache);} + try + { + if(model!=null) {rs = QueryExecutionFactory.create(QueryFactory.create(query, Syntax.syntaxARQ), model).execSelect();} + else {rs = executeSelect(endpoint, query, cache);} + } catch(QueryExceptionHTTP e) { logger.error("Error getting uris for query "+query+" at endpoint "+endpoint,e); @@ -780,7 +804,7 @@ } return uris; } - + private static String urlDecode(String url){ String decodedURL = null; try { @@ -836,7 +860,7 @@ static private class POSTaggerHolder {static public final PartOfSpeechTagger posTagger = new SynchronizedStanfordPartOfSpeechTagger();} - + static private final WordNet wordnet = new WordNet(); static private final Options options = new Options(); private final SPARQLTemplateBasedLearner2 learner; @@ -848,6 +872,7 @@ this.testData=testData; learner = new SPARQLTemplateBasedLearner2(knowledgeBase,pretagged?null:POSTaggerHolder.posTagger,wordnet,options); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); } public LearnQueryCallable(String question, int id, QueryTestData testData, Model model,MappingBasedIndex index,boolean pretagged) @@ -861,16 +886,18 @@ SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_dataproperty_mappings.txt").getPath(), SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/oxford_objectproperty_mappings.txt").getPath() ); - + learner = new SPARQLTemplateBasedLearner2(model,mappingIndex,pretagged?null:POSTaggerHolder.posTagger); try {learner.init();} catch (ComponentInitException e) {throw new RuntimeException(e);} + learner.setUseIdealTagger(pretagged); learner.setGrammarFiles(new String[]{"tbsl/lexicon/english.lex","tbsl/lexicon/english_oxford.lex"}); learner.setUseDomainRangeRestriction(false); } @Override public LearnStatus call() - { + { + logger.trace("learning question: "+question); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |