From: <lor...@us...> - 2011-06-14 13:31:24
|
Revision: 2872 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2872&view=rev Author: lorenz_b Date: 2011-06-14 13:31:18 +0000 (Tue, 14 Jun 2011) Log Message: ----------- Some changes in Eval script. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-14 08:24:56 UTC (rev 2871) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-14 13:31:18 UTC (rev 2872) @@ -40,8 +40,10 @@ import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.vocabulary.FOAF; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; @@ -92,7 +94,7 @@ private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); - private int maxQueryExecutionTimeInSeconds = 10; + private int maxQueryExecutionTimeInSeconds = 20; public SPARQLTemplateBasedLearner(){ @@ -432,19 +434,19 @@ SortedSet<String> tmp; List<String> uris; - //prune the word list with lemmatizer only when slot type is not RESOURCE + //prune the word list with only when slot type is not RESOURCE List<String> words; if(slot.getSlotType() == SlotType.RESOURCE){ words = slot.getWords(); } else { - words = getLemmatizedWords(slot.getWords()); + words = pruneList(slot.getWords());//getLemmatizedWords(slot.getWords()); } for(String word : words){ tmp = new TreeSet<String>(new StringSimilarityComparator(word)); uris = uriCache.get(word); if(uris == null){ - uris = index.getResources("label:\"" + word + "\""); + uris = index.getResources("label:\"" + word + "\"~0.7"); uriCache.put(word, uris); } tmp.addAll(uris); @@ -457,6 +459,27 @@ return sortedURIs; } + private List<String> pruneList(List<String> words){ + List<String> prunedList = new ArrayList<String>(); + for(String w1 : words){ + boolean smallest = true; + for(String w2 : words){ + if(!w1.equals(w2)){ + if(w2.contains(w1)){ + smallest = false; + break; + } + } + } + if(smallest){ + prunedList.add(w1); + } + } + logger.info("Pruned list: " + prunedList); +// return getLemmatizedWords(words); + return prunedList; + } + private List<String> getLemmatizedWords(List<String> words){ logger.info("Pruning word list " + words + "..."); mon.start(); @@ -618,6 +641,7 @@ } private List<String> getResultFromRemoteEndpoint(String query){ + System.out.println(query); List<String> resources = new ArrayList<String>(); try { ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query + " LIMIT 10")); @@ -656,11 +680,11 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Give me all school types."; + String question = "Who are the presidents of the United States?"; // String question = "Give me all films starring Brad Pitt"; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), - Collections.<String>singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + Collections.<String>singletonList("http://live.dbpedia.org"), Collections.<String>emptyList()); learner.setEndpoint(endpoint); learner.setQuestion(question); learner.learnSPARQLQueries(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-14 08:24:56 UTC (rev 2871) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-06-14 13:31:18 UTC (rev 2872) @@ -15,7 +15,7 @@ static final String[] genericReplacements = { "\"", "", "'", "", "[!?.,;]", "" }; static final String[] englishReplacements = { "don't", "do not", "doesn't", "does not" }; - static NER ner = new LingPipeNER(false);//not case sensitive best solution? + static NER ner = new LingPipeNER(true);//not case sensitive best solution? public Preprocessor() { } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java 2011-06-14 08:24:56 UTC (rev 2871) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/LatexWriter.java 2011-06-14 13:31:18 UTC (rev 2872) @@ -13,19 +13,22 @@ private static String NL = System.getProperty("line.separator"); private static final String PRAEAMBEL_FILE = "tbsl/evaluation/praeambel.tex"; private StringBuilder sb; + private StringBuilder summary; public LatexWriter() { sb = new StringBuilder(); + summary = new StringBuilder(); - loadPraeambel(); + beginSummaryTable(); } - private void loadPraeambel(){ + private String loadPraeambel(){ + StringBuilder praeamble = new StringBuilder(); try { Scanner scanner = new Scanner(new FileInputStream(this.getClass().getClassLoader().getResource(PRAEAMBEL_FILE).getPath())); try { while (scanner.hasNextLine()){ - sb.append(scanner.nextLine() + NL); + praeamble.append(scanner.nextLine() + NL); } } finally{ @@ -34,6 +37,7 @@ } catch (FileNotFoundException e) { e.printStackTrace(); } + return praeamble.toString(); } public void makeTitle(){ @@ -95,11 +99,38 @@ sb.append(text).append("\n"); } + public void beginSummaryTable(){ + summary.append("\\begin{tabular}{| c | l | c | c |}\\hline\n"); + summary.append("id & question & P & R \\\\\\hline\\hline\n"); + } + + public void endSummaryTable(){ + + summary.append("\\end{tabular}\n"); + } + + public void addSummaryTableEntry(int id, String question, double precision, double recall){ + summary.append(id).append(" & ").append(question).append(" & ").append(precision).append(" & ").append(recall).append("\\\\\\hline\n"); + } + public void write(String file){ + endSummaryTable(); + StringBuilder latex = new StringBuilder(); + latex.append(loadPraeambel()); + latex.append("\\begin{document}"); + latex.append("\\maketitle\n"); + latex.append("\\newpage\n"); + latex.append(summary.toString()); + latex.append("\\newpage\n"); + latex.append("\\tableofcontents\n"); + latex.append("\\newpage\n"); + latex.append(sb.toString()); + latex.append("\\end{document}"); + try { Writer output = new BufferedWriter(new FileWriter(file)); try { - output.write( sb.toString() ); + output.write( latex.toString() ); } finally { output.close(); Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-14 08:24:56 UTC (rev 2871) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-14 13:31:18 UTC (rev 2872) @@ -3,9 +3,12 @@ import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -116,6 +119,7 @@ String question; Object answer; for(Entry<Integer, String> entry : id2Query.entrySet()){ + if(entry.getKey() != 23)continue; questionId = entry.getKey(); question = entry.getValue(); try { @@ -150,7 +154,7 @@ answer = new HashSet<String>(); if(!query.contains("LIMIT")){ query = query + " LIMIT 200"; - } + }System.out.println(query); ResultSet rs = endpoint.executeSelect(query); String variable; if(rs.getResultVars().size() == 1){ @@ -158,6 +162,7 @@ } else { variable = targetVar; } + QuerySolution qs; RDFNode node; while(rs.hasNext()){ @@ -177,6 +182,12 @@ public void setEndpoint(SparqlEndpoint endpoint){ this.endpoint = endpoint; + try { + stbl.setEndpoint(new org.dllearner.kb.sparql.SparqlEndpoint( + new URL(endpoint.id().substring(endpoint.id().indexOf("_")+1)), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList())); + } catch (MalformedURLException e) { + e.printStackTrace(); + } } public void setUseRemoteValidation(boolean useRemoteValidation){ @@ -188,23 +199,26 @@ int topN2Print = 10; - int questionId; - String question; - String query; - Object answer; + int questionId = -1; + String question = ""; + String targetQuery; + Object targetAnswer; + double precision = -1; + double recall = -1; LatexWriter latex = new LatexWriter(); - latex.beginDocument(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ -// if(i++ == 1)break; + if(entry.getKey() != 23)continue; try { questionId = entry.getKey(); question = entry.getValue(); - query = id2Query.get(questionId); - answer = id2Answer.get(questionId); + targetQuery = id2Query.get(questionId); + targetAnswer = id2Answer.get(questionId); + precision = -1; + recall = -1; logger.info("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); logger.info("QUESTION: " + question + "\n"); - logger.info("TARGET QUERY:\n" + query + "\n"); + logger.info("TARGET QUERY:\n" + targetQuery + "\n"); //write new section for query @@ -213,10 +227,10 @@ latex.beginSubsection("Target"); //write subsubsection for target query latex.beginSubSubsection("Query"); - latex.addListing(query); + latex.addListing(targetQuery); //write subsubsection for target result - latex.beginSubSubsection("Result" + ((answer instanceof Collection<?>) ? "(" + ((Collection)answer).size()+")" : "")); - latex.addText(escapeAnswerString(answer)); + latex.beginSubSubsection("Result" + ((targetAnswer instanceof Collection<?>) ? "(" + ((Collection)targetAnswer).size()+")" : "")); + latex.addText(escapeAnswerString(targetAnswer)); //set the question stbl.setQuestion(question); @@ -269,29 +283,73 @@ k++; } - //write solution subsection if exists if(learnedQuery != null){ latex.beginSubsection("Solution"); latex.beginSubSubsection("Query"); latex.addListing(learnedQuery); latex.beginSubSubsection("Result" + ((learnedAnswer instanceof Collection<?>) ? "(" + ((Collection)learnedAnswer).size()+")" : "")); - latex.addText(escapeAnswerString(learnedAnswer, answer)); + latex.addText(escapeAnswerString(learnedAnswer, targetAnswer)); + precision = computePrecision(targetAnswer, learnedAnswer); + recall = computeRecall(targetAnswer, learnedAnswer); } - + latex.addSummaryTableEntry(questionId, question, precision, recall); } catch (NoTemplateFoundException e) { e.printStackTrace(); logger.error("Template generation failed"); + latex.addSummaryTableEntry(questionId, question, precision, recall); } catch(Exception e){ e.printStackTrace(); logger.error("ERROR"); + latex.addSummaryTableEntry(questionId, question, precision, recall); } } - latex.endDocument(); latex.write("log/evaluation.tex"); } + private double computeRecall(Object targetAnswer, Object answer){ + if(answer == null){ + return -1; + } + double recall = 0; + if(targetAnswer instanceof Collection<?> && answer instanceof Collection<?>){ + Set<String> targetAnswerColl = new HashSet<String>((Collection<? extends String>) targetAnswer); + Set<String> answerColl = new HashSet<String>((Collection<? extends String>) answer); + int targetSize = targetAnswerColl.size(); + targetAnswerColl.retainAll(answerColl); + recall = targetAnswerColl.size() / targetSize; + } else { + if(targetAnswer.equals(answer)){ + recall = 1; + } else { + recall = 0; + } + } + return recall; + } + + private double computePrecision(Object targetAnswer, Object answer){ + if(answer == null){ + return -1; + } + double precision = 0; + if(targetAnswer instanceof Collection<?> && answer instanceof Collection<?>){ + Set<String> targetAnswerColl = new HashSet<String>((Collection<? extends String>) targetAnswer); + Set<String> answerColl = new HashSet<String>((Collection<? extends String>) answer); + int learnedSize = targetAnswerColl.size(); + targetAnswerColl.retainAll(answerColl); + precision = targetAnswerColl.size() / learnedSize; + } else { + if(targetAnswer.equals(answer)){ + precision = 1; + } else { + precision = 0; + } + } + return precision; + } + public void run_without_testing_answer(){ int topN2Print = 25; @@ -304,7 +362,7 @@ latex.beginDocument(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ -// if(i++ == 1)break; + if(entry.getKey() != 23)continue; try { questionId = entry.getKey(); question = entry.getValue(); @@ -445,8 +503,7 @@ File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); - SparqlEndpoint endpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint("http://live.dbpedia.org/sparql/", "http://dbpedia.org/sparql"), "cache"); - + SparqlEndpoint endpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint("http://139.18.2.96:8910/sparql", "http://dbpedia.org"), "cache");System.out.println(endpoint.id()); Evaluation eval = new Evaluation(file); eval.setEndpoint(endpoint); eval.setUseRemoteValidation(true); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |