From: <lor...@us...> - 2011-02-27 18:14:39
|
Revision: 2673 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2673&view=rev Author: lorenz_b Date: 2011-02-27 18:14:32 +0000 (Sun, 27 Feb 2011) Log Message: ----------- Fixed bug. Added 2 more metrics for string comparison. Added condition which selects positive examples from answer set, if the # in the search is lower than the threshold. Added constant SIMILARITY_THRESHOLD in evaluation script. Modified Paths: -------------- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-27 16:08:41 UTC (rev 2672) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-27 18:14:32 UTC (rev 2673) @@ -5,17 +5,15 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; -import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; import java.util.List; @@ -34,12 +32,6 @@ import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.util.Version; -import org.apache.solr.analysis.StopFilterFactory; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; @@ -59,9 +51,7 @@ import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.sparqlquerygenerator.operations.lgg.LGGGeneratorImpl; -import org.dllearner.sparqlquerygenerator.util.ExactMatchFilter; import org.dllearner.sparqlquerygenerator.util.QuestionBasedStatementFilter; -import org.dllearner.sparqlquerygenerator.util.QuestionBasedStatementSelector; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -72,16 +62,11 @@ import de.simba.ner.WordnetQuery; -import edu.stanford.nlp.ling.HasWord; -import edu.stanford.nlp.ling.Sentence; -import edu.stanford.nlp.ling.TaggedWord; -import edu.stanford.nlp.tagger.maxent.MaxentTagger; - public class EvaluationWithNLQueriesScript { private static Logger logger = Logger.getLogger(EvaluationWithNLQueriesScript.class); private static final boolean USE_SYNONYMS = false; - private static final boolean USE_WIKIPEDIA_SEARCH = false; + private static final boolean USE_WIKIPEDIA_SEARCH = true; private static final String SOLR_SERVER_URL = "http://139.18.2.164:8983/solr/dbpediaCore/"; private static final String QUERY_ANSWERS_FILE_PATH = "evaluation/dbpedia-train_cleaned.xml"; @@ -96,7 +81,9 @@ private static final int TOP_K = 20; + private static final double SIMILARITY_THRESHOLD = 0.5; + private Map<String, String> question2query = new Hashtable<String, String>(); private SortedMap<String, Set<String>> question2Answers = new TreeMap<String, Set<String>>(); @@ -122,13 +109,30 @@ List<String> predicateFilters = new ArrayList<String>(); predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - exFinder = new ExampleFinder(new SPARQLEndpointEx( - new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), //new URL("http://lod.openlinksw.com/sparql"), - Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()), null, null, predicateFilters), selectCache, constructCache); - schemaIndex = new DBpediaSchemaIndex(SCHEMA_FILE_PATH); + //prefixes and baseURI to improve readability of trees + String baseURI = "http://dbpedia.org/resource/"; + Map<String,String> prefixes = new HashMap<String,String>(); + prefixes.put("dbo","http://dbpedia.org/ontology/"); + prefixes.put("dbprop","http://dbpedia.org/property/"); + prefixes.put("rdfs","http://www.w3.org/2000/01/rdf-schema#"); + prefixes.put("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + prefixes.put("skos","http://www.w3.org/2004/02/skos/core#"); + prefixes.put("geo","http://www.w3.org/2003/01/geo/wgs84_pos#"); + prefixes.put("georss","http://www.georss.org/georss/"); + prefixes.put("owl","http://www.w3.org/2002/07/owl#"); + prefixes.put("yago","http://dbpedia.org/class/yago/"); + prefixes.put("cyc","http://sw.opencyc.org/concept/"); + prefixes.put("foaf","http://xmlns.com/foaf/0.1/"); + exFinder = new ExampleFinder(new SPARQLEndpointEx(new URL("http://live.dbpedia.org/sparql"), //new URL("http://lod.openlinksw.com/sparql"), + Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList(), null, baseURI, prefixes, predicateFilters), selectCache, constructCache); +// schemaIndex = new DBpediaSchemaIndex(SCHEMA_FILE_PATH); luceneSearch = new LuceneSearch(LUCENE_INDEX_DIRECTORY); luceneSearch.setHitsPerPage(TOP_K); wordNet = new WordnetQuery(WORDNET_DICTIONARY); + + + + } catch (MalformedURLException e) { e.printStackTrace(); } @@ -297,7 +301,9 @@ logger.info("Answers (" + answers.size() + "): " + answers); //preprocess question to extract only relevant words and set them as filter for statements relevantWords = getRelevantWords(question); - exFinder.setStatementFilter(new QuestionBasedStatementFilter(new HashSet<String>(relevantWords))); + QuestionBasedStatementFilter filter = new QuestionBasedStatementFilter(new HashSet<String>(relevantWords)); + filter.setThreshold(SIMILARITY_THRESHOLD); + exFinder.setStatementFilter(filter); // exFinder.setStatementSelector(new QuestionBasedStatementSelector(new HashSet<String>(relevantWords))); //expand with synonyms @@ -338,6 +344,17 @@ } } } + //if there are not enough positive examples in search we select some from the answer set which simulates manually addition of user + if(posExamples.size() < NR_OF_POS_START_EXAMPLES_COUNT){ + logger.info("Found only " + posExamples.size() + " positive example(s) in search result. Adding more from the answer set..."); + for(String answer : answers){ + posExamples.add(answer); + if(posExamples.size() == NR_OF_POS_START_EXAMPLES_COUNT){ + break; + } + } + + } if(posExamples.isEmpty()){ logger.warn("Current search returned no positive example in the Top " + TOP_K + ".\n" + "Skipping query..."); Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java 2011-02-27 16:08:41 UTC (rev 2672) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java 2011-02-27 18:14:32 UTC (rev 2673) @@ -47,8 +47,9 @@ QueryTree<String> tree = treeFactory.getQueryTree(uri, model); System.out.println("Tree without filtering:\n" + tree.getStringRepresentation()); - treeFactory.setStatementSelector(new QuestionBasedStatementSelector(new HashSet<String>(relevantWords))); +// treeFactory.setStatementSelector(new QuestionBasedStatementSelector(new HashSet<String>(relevantWords))); treeFactory.setStatementFilter(new QuestionBasedStatementFilter(new HashSet<String>(relevantWords))); + QueryTree<String> filteredTree = treeFactory.getQueryTree(uri, model); System.out.println("Tree with filtering:\n" + filteredTree.getStringRepresentation()); Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2011-02-27 16:08:41 UTC (rev 2672) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2011-02-27 18:14:32 UTC (rev 2673) @@ -11,6 +11,7 @@ import org.dllearner.autosparql.client.model.Example; import org.dllearner.autosparql.server.exception.TimeOutException; import org.dllearner.autosparql.server.util.SPARQLEndpointEx; +import org.dllearner.autosparql.server.util.TreeHelper; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.sparqlquerygenerator.SPARQLQueryGeneratorCached; @@ -97,7 +98,7 @@ // logger.info("Fetching model for resource: " + resource); model = modelCache.getModel(resource); queryTree = queryTreeCache.getQueryTree(resource, model); - System.out.println(queryTree.getStringRepresentation()); + System.out.println(TreeHelper.getAbbreviatedTreeRepresentation(queryTree, endpoint.getBaseURI(), endpoint.getPrefixes())); posExampleTrees.add(queryTree); } for(String resource : negExamples){ Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-02-27 16:08:41 UTC (rev 2672) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-02-27 18:14:32 UTC (rev 2673) @@ -128,7 +128,7 @@ resource2Statements.put(st.getSubject().toString(), statements); } statements.add(st); - if(st.getObject().isURIResource()){ + if(st.getObject().isURIResource() && !resource2Statements.containsKey(st.getObject().asResource().getURI())){ fillMap(st.getObject().asResource(), model, resource2Statements); } } Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-27 16:08:41 UTC (rev 2672) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-27 18:14:32 UTC (rev 2673) @@ -3,6 +3,8 @@ import java.util.Set; import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; +import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein; import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; import com.hp.hpl.jena.rdf.model.Statement; @@ -11,14 +13,20 @@ public class QuestionBasedStatementFilter extends Filter<Statement> { private Set<String> questionWords; - private AbstractStringMetric metric; + + private AbstractStringMetric qGramMetric; + private AbstractStringMetric levensteinMetric; + private AbstractStringMetric jaroWinklerMetric; + private double threshold = 0.3; int cnt = 0; public QuestionBasedStatementFilter(Set<String> questionWords){ this.questionWords = questionWords; - metric = new QGramsDistance(); + qGramMetric = new QGramsDistance(); + levensteinMetric = new Levenshtein(); + jaroWinklerMetric = new JaroWinkler(); } @@ -32,7 +40,10 @@ } private boolean areSimiliar(String s1, String s2){//cnt++;System.out.println(cnt); - float sim = metric.getSimilarity(s1, s2); + float qSim = qGramMetric.getSimilarity(s1, s2); + float lSim = levensteinMetric.getSimilarity(s1, s2); + float jSim = jaroWinklerMetric.getSimilarity(s1, s2); + float sim = Math.max(Math.max(qSim, lSim), jSim); return sim >= threshold; } @@ -52,5 +63,9 @@ return false; } + + public void setThreshold(double threshold){ + this.threshold = threshold; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |