From: <lor...@us...> - 2011-06-15 08:51:04
|
Revision: 2877 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2877&view=rev Author: lorenz_b Date: 2011-06-15 08:50:57 +0000 (Wed, 15 Jun 2011) Log Message: ----------- Fixed bug in evaluation output. Made evaluation configurable with properties file. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java Added Paths: ----------- trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-15 07:04:54 UTC (rev 2876) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-06-15 08:50:57 UTC (rev 2877) @@ -8,7 +8,6 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -40,10 +39,8 @@ import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.vocabulary.FOAF; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; @@ -66,7 +63,7 @@ private Ranking ranking = Ranking.SIMILARITY; private boolean useRemoteEndpointValidation = true; private boolean stopIfQueryResultNotEmpty = true; - private int maxQueriesPerTemplate = 25; + private int maxTestedQueriesPerTemplate = 25; private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private ExtractionDBCache cache = new ExtractionDBCache("cache"); @@ -148,6 +145,14 @@ this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; } + public int getMaxTestedQueriesPerTemplate() { + return maxTestedQueriesPerTemplate; + } + + public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { + this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; + } + public void setRanking(Ranking ranking) { this.ranking = ranking; } @@ -579,7 +584,7 @@ private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ List<Query> queries = new ArrayList<Query>(); for(Entry<Template, Collection<? extends Query>> entry : template2Queries.entrySet()){ - int max = Math.min(maxQueriesPerTemplate, entry.getValue().size()); + int max = Math.min(maxTestedQueriesPerTemplate, entry.getValue().size()); int i = 0; for(Query q : entry.getValue()){ queries.add(q); @@ -681,11 +686,10 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Give me all actors starring in Batman Begins."; -// String question = "Give me all films starring Brad Pitt"; + String question = "Give me all school types."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), - Collections.<String>singletonList("http://live.dbpedia.org"), Collections.<String>emptyList()); + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://greententacle.techfak.uni-bielefeld.de:5171/sparql"), + Collections.<String>singletonList(""), Collections.<String>emptyList()); learner.setEndpoint(endpoint); learner.setQuestion(question); learner.learnSPARQLQueries(); Added: trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties =================================================================== --- trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties (rev 0) +++ trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties 2011-06-15 08:50:57 UTC (rev 2877) @@ -0,0 +1,10 @@ +#endpoint http://live.dbpedia.org/sparql +endpointURL=http://greententacle.techfak.uni-bielefeld.de:5171/sparql +defaultGraphURI= +# validate against remote endpoint if 'true', otherwise load a model and validate against it +useRemoteEndpointValidation=true +# number of tested SPARQL queries per template +maxTestedQueriesPerTemplate=25 +# max execution time for a SPARQL query before canceled +maxQueryExecutionTimeInSeconds=20 + Property changes on: trunk/components-ext/src/main/resources/tbsl/evaluation/evaluation.properties ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java =================================================================== --- trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-15 07:04:54 UTC (rev 2876) +++ trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java 2011-06-15 08:50:57 UTC (rev 2877) @@ -1,11 +1,11 @@ package org.dllearner.algorithm.tbsl; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; -import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -13,6 +13,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Properties; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -47,9 +48,8 @@ public class Evaluation{ private static Logger logger = Logger.getLogger(Evaluation.class); + private static String PROPERTIES_PATH = "tbsl/evaluation/evaluation.properties"; - private File evaluationFile; - private SortedMap<Integer, String> id2Question = new TreeMap<Integer, String>(); private SortedMap<Integer, String> id2Query = new TreeMap<Integer, String>(); private SortedMap<Integer, Object> id2Answer = new TreeMap<Integer, Object>(); @@ -58,14 +58,41 @@ private SPARQLTemplateBasedLearner stbl; - public Evaluation(File ... evaluationFiles){ + private int testID = -1; + + public Evaluation(File ... evaluationFiles) throws FileNotFoundException, IOException{ for(File file : evaluationFiles){ readQueries(file); } stbl = new SPARQLTemplateBasedLearner(); + + init(); } - public void init(){ + public void init() throws FileNotFoundException, IOException{ + //load properties for evaluation + Properties props = new Properties(); + props.load(new FileInputStream(this.getClass().getClassLoader().getResource(PROPERTIES_PATH).getPath())); + + String endpointURL = props.getProperty("endpointURL", "http://live.dbpedia.org/sparql"); + String defaultGraphURI = props.getProperty("defaultGraphURI", "http://live.dbpedia.org"); + this.endpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint(endpointURL, defaultGraphURI), "cache"); + try { + stbl.setEndpoint(new org.dllearner.kb.sparql.SparqlEndpoint( + new URL(endpointURL), Collections.singletonList(defaultGraphURI), Collections.<String>emptyList())); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + + boolean useRemoteEndpointValidation = Boolean.parseBoolean(props.getProperty("useRemoteEndpointValidation", "True")); + stbl.setUseRemoteEndpointValidation(useRemoteEndpointValidation); + + int maxTestedQueriesPerTemplate = Integer.parseInt(props.getProperty("maxTestedQueriesPerTemplate", "25")); + stbl.setMaxTestedQueriesPerTemplate(maxTestedQueriesPerTemplate); + + int maxQueryExecutionTimeInSeconds = Integer.parseInt(props.getProperty("maxQueryExecutionTimeInSeconds", "20")); + stbl.setMaxQueryExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + loadAnswers(); } @@ -119,7 +146,7 @@ String question; Object answer; for(Entry<Integer, String> entry : id2Query.entrySet()){ - if(entry.getKey() != 23)continue; + if(testID != -1 && entry.getKey() != testID)continue; questionId = entry.getKey(); question = entry.getValue(); try { @@ -153,7 +180,7 @@ } else { answer = new HashSet<String>(); if(!query.contains("LIMIT")){ - query = query + " LIMIT 200"; + query = query + " LIMIT 500"; }System.out.println(query); ResultSet rs = endpoint.executeSelect(query); String variable; @@ -168,12 +195,13 @@ while(rs.hasNext()){ qs = rs.next(); node = qs.get(variable); - if(node.isURIResource()){ - ((HashSet)answer).add(node.asResource().getURI()); - } else if(node.isLiteral()){ - ((HashSet)answer).add(node.asLiteral().getLexicalForm()); + if(node != null){ + if(node.isURIResource()){ + ((HashSet)answer).add(node.asResource().getURI()); + } else if(node.isLiteral()){ + ((HashSet)answer).add(node.asLiteral().getLexicalForm()); + } } - } } logger.debug("Answer: " + answer); @@ -194,7 +222,15 @@ stbl.setUseRemoteEndpointValidation(useRemoteValidation); } + public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds){ + stbl.setMaxQueryExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); + } + public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { + stbl.setMaxTestedQueriesPerTemplate(maxTestedQueriesPerTemplate); + } + + public void run(){ int topN2Print = 10; @@ -208,7 +244,7 @@ LatexWriter latex = new LatexWriter(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ - if(entry.getKey() != 23)continue; + if(testID != -1 && entry.getKey() != testID)continue; try { questionId = entry.getKey(); question = entry.getValue(); @@ -308,19 +344,20 @@ latex.write("log/evaluation.tex"); } - private double computeRecall(Object targetAnswer, Object answer){ - if(answer == null){ + private double computeRecall(Object targetAnswer, Object learnedAnswer){ + if(learnedAnswer == null){ return -1; } double recall = 0; - if(targetAnswer instanceof Collection<?> && answer instanceof Collection<?>){ + if(targetAnswer instanceof Collection<?> && learnedAnswer instanceof Collection<?>){ Set<String> targetAnswerColl = new HashSet<String>((Collection<? extends String>) targetAnswer); - Set<String> answerColl = new HashSet<String>((Collection<? extends String>) answer); + Set<String> learnedAnswerColl = new HashSet<String>((Collection<? extends String>) learnedAnswer); int targetSize = targetAnswerColl.size(); - targetAnswerColl.retainAll(answerColl); - recall = targetAnswerColl.size() / targetSize; + targetAnswerColl.retainAll(learnedAnswerColl); + recall = (double)targetAnswerColl.size() / (double)targetSize; + recall = Math.round( recall * 100. ) / 100.; } else { - if(targetAnswer.equals(answer)){ + if(targetAnswer.equals(learnedAnswer)){ recall = 1; } else { recall = 0; @@ -329,19 +366,20 @@ return recall; } - private double computePrecision(Object targetAnswer, Object answer){ - if(answer == null){ + private double computePrecision(Object targetAnswer, Object learnedAnswer){ + if(learnedAnswer == null){ return -1; } double precision = 0; - if(targetAnswer instanceof Collection<?> && answer instanceof Collection<?>){ + if(targetAnswer instanceof Collection<?> && learnedAnswer instanceof Collection<?>){ Set<String> targetAnswerColl = new HashSet<String>((Collection<? extends String>) targetAnswer); - Set<String> answerColl = new HashSet<String>((Collection<? extends String>) answer); - int learnedSize = targetAnswerColl.size(); - targetAnswerColl.retainAll(answerColl); - precision = targetAnswerColl.size() / learnedSize; + Set<String> learnedAnswerColl = new HashSet<String>((Collection<? extends String>) learnedAnswer); + int learnedSize = learnedAnswerColl.size(); + targetAnswerColl.retainAll(learnedAnswerColl); + precision = (double)targetAnswerColl.size() / (double)learnedSize; + precision = Math.round( precision * 100. ) / 100.; } else { - if(targetAnswer.equals(answer)){ + if(targetAnswer.equals(learnedAnswer)){ precision = 1; } else { precision = 0; @@ -362,7 +400,7 @@ latex.beginDocument(); int i = 0; for(Entry<Integer, String> entry : id2Question.entrySet()){ - if(entry.getKey() != 23)continue; + if(entry.getKey() != testID)continue; try { questionId = entry.getKey(); question = entry.getValue(); @@ -501,13 +539,14 @@ fileAppender.setThreshold(Level.INFO); Logger.getRootLogger().addAppender(fileAppender); + if(args.length == 0){ + System.out.println("Usage: Evaluation <file>"); + System.exit(0); + } - File file = new File("src/main/resources/tbsl/evaluation/dbpedia-train.xml"); - SparqlEndpoint endpoint = new CachingSparqlEndpoint(new HttpSparqlEndpoint("http://139.18.2.96:8910/sparql", "http://dbpedia.org"), "cache");System.out.println(endpoint.id()); + File file = new File(Evaluation.class.getClassLoader().getResource(args[0]).getPath()); + Evaluation eval = new Evaluation(file); - eval.setEndpoint(endpoint); - eval.setUseRemoteValidation(true); - eval.init(); eval.run(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |