From: <lor...@us...> - 2011-02-28 17:01:17
|
Revision: 2691 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2691&view=rev Author: lorenz_b Date: 2011-02-28 17:01:10 +0000 (Mon, 28 Feb 2011) Log Message: ----------- Added filter for query trees. Modified Paths: -------------- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java trunk/autosparql/src/main/java/org/dllearner/autosparql/server/search/QuestionProcessor.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java Added Paths: ----------- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedQueryTreeFilter.java Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-28 15:48:53 UTC (rev 2690) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -53,6 +53,7 @@ import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; import org.dllearner.sparqlquerygenerator.operations.lgg.LGGGenerator; import org.dllearner.sparqlquerygenerator.operations.lgg.LGGGeneratorImpl; +import org.dllearner.sparqlquerygenerator.util.QuestionBasedQueryTreeFilter; import org.dllearner.sparqlquerygenerator.util.QuestionBasedStatementFilter; import org.w3c.dom.DOMException; import org.w3c.dom.Document; @@ -84,7 +85,7 @@ private static final int TOP_K = 20; - private static final double SIMILARITY_THRESHOLD = 0.3; + private static final double SIMILARITY_THRESHOLD = 0.4; private Map<String, String> question2query = new Hashtable<String, String>(); @@ -322,7 +323,10 @@ relevantWords = getRelevantWords(question); QuestionBasedStatementFilter filter = new QuestionBasedStatementFilter(new HashSet<String>(relevantWords)); filter.setThreshold(SIMILARITY_THRESHOLD); + QuestionBasedQueryTreeFilter treeFilter = new QuestionBasedQueryTreeFilter(new HashSet<String>(relevantWords)); + treeFilter.setThreshold(SIMILARITY_THRESHOLD); exFinder.setStatementFilter(filter); + exFinder.setQueryTreeFilter(treeFilter); // exFinder.setStatementSelector(new QuestionBasedStatementSelector(new HashSet<String>(relevantWords))); //expand with synonyms Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java 2011-02-28 15:48:53 UTC (rev 2690) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/QueryTreeFilterEvaluation.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -15,19 +15,22 @@ import org.dllearner.sparqlquerygenerator.impl.QueryTreeFactoryImpl; import org.dllearner.sparqlquerygenerator.util.ModelGenerator; import org.dllearner.sparqlquerygenerator.util.ModelGenerator.Strategy; +import org.dllearner.sparqlquerygenerator.util.QuestionBasedQueryTreeFilter; import org.dllearner.sparqlquerygenerator.util.QuestionBasedStatementFilter; import org.dllearner.sparqlquerygenerator.util.QuestionBasedStatementSelector; import com.hp.hpl.jena.rdf.model.Model; public class QueryTreeFilterEvaluation { + + private static double THRESHOLD = 0.4; /** * @param args */ public static void main(String[] args) { - String question = "Give me all actors starring in Batman Begins.";//"Give me all European Capitals!"; - String uri = "http://dbpedia.org/resource/Christian_Bale";//"http://dbpedia.org/resource/Vienna"; + String question = "Give me all soccer clubs in the Premier League.";//"Give me all European Capitals!"; + String uri = "http://dbpedia.org/resource/Fulham_F.C.";//"http://dbpedia.org/resource/Vienna"; System.out.println("Question: \"" + question + "\""); System.out.println("Resource: " + uri); @@ -64,11 +67,16 @@ QueryTree<String> tree = treeFactory.getQueryTree(uri, model); System.out.println("Tree without filtering:\n" + TreeHelper.getAbbreviatedTreeRepresentation(tree, baseURI, prefixes)); -// treeFactory.setStatementSelector(new QuestionBasedStatementSelector(new HashSet<String>(relevantWords))); - treeFactory.setStatementFilter(new QuestionBasedStatementFilter(new HashSet<String>(relevantWords))); + QuestionBasedStatementFilter filter = new QuestionBasedStatementFilter(new HashSet<String>(relevantWords)); + filter.setThreshold(THRESHOLD); + treeFactory.setStatementFilter(filter); QueryTree<String> filteredTree = treeFactory.getQueryTree(uri, model); - System.out.println("Tree with filtering:\n" + TreeHelper.getAbbreviatedTreeRepresentation(filteredTree, baseURI, prefixes)); + System.out.println("Tree with filtering before creation:\n" + TreeHelper.getAbbreviatedTreeRepresentation(filteredTree, baseURI, prefixes)); + +// QuestionBasedQueryTreeFilter treeFilter = new QuestionBasedQueryTreeFilter(new HashSet<String>(relevantWords)); +// filteredTree = treeFilter.getFilteredQueryTree(filteredTree); +// System.out.println("Tree with filtering after creation:\n" + TreeHelper.getAbbreviatedTreeRepresentation(filteredTree, baseURI, prefixes)); } Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2011-02-28 15:48:53 UTC (rev 2690) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/ExampleFinder.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -29,6 +29,7 @@ import org.dllearner.sparqlquerygenerator.operations.nbr.strategy.GreedyNBRStrategy; import org.dllearner.sparqlquerygenerator.util.Filter; import org.dllearner.sparqlquerygenerator.util.ModelGenerator; +import org.dllearner.sparqlquerygenerator.util.QuestionBasedQueryTreeFilter; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSetRewindable; @@ -62,6 +63,8 @@ private LGGGenerator<String> lggGen; private NBR<String> nbrGen; + private QuestionBasedQueryTreeFilter treeFilter; + private boolean makeAlwaysNBR = false; private static final int MAX_NBR_COMPUTING_TIME = 100; @@ -94,11 +97,15 @@ posExampleTrees.add(queryTree); } lgg = lggGen.getLGG(posExampleTrees); + if(treeFilter != null){ + lgg = treeFilter.getFilteredQueryTree(lgg); + } currentQuery = lgg.toSPARQLQueryString(); System.out.println("LGG: \n" + TreeHelper.getAbbreviatedTreeRepresentation(lgg, endpoint.getBaseURI(), endpoint.getPrefixes())); return lgg; } + public Example findSimilarExample(List<String> posExamples, List<String> negExamples) throws SPARQLQueryException, TimeOutException{ logger.info("Searching similiar example"); @@ -425,6 +432,9 @@ List<QueryTree<String>> negExamplesTrees){ LGGGenerator<String> lggGen = new LGGGeneratorImpl<String>(); lgg = lggGen.getLGG(posExamplesTrees); + if(treeFilter != null){ + lgg = treeFilter.getFilteredQueryTree(lgg); + } logger.info("LGG(Tree): \n" + TreeHelper.getAbbreviatedTreeRepresentation( lgg, endpoint.getBaseURI(), endpoint.getPrefixes())); logger.info("LGG(Query):\n" + lgg.toSPARQLQueryString()); @@ -542,6 +552,10 @@ nbrGen.setStatementFilter(filter); } + public void setQueryTreeFilter(QuestionBasedQueryTreeFilter filter){ + treeFilter = filter; + } + public void setStatementSelector(Selector selector){ queryTreeCache.setStatementSelector(selector); } Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/server/search/QuestionProcessor.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/server/search/QuestionProcessor.java 2011-02-28 15:48:53 UTC (rev 2690) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/server/search/QuestionProcessor.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -56,7 +56,7 @@ List<String> words = new ArrayList<String>(); List<ArrayList<? extends HasWord>> sentences = tagger.tokenizeText(new BufferedReader(new StringReader(question))); for (ArrayList<? extends HasWord> sentence : sentences) { - ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence); + ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);System.out.println(tSentence); String nounPhrase = ""; boolean firstWord = true; for(TaggedWord tWord : tSentence){ @@ -68,7 +68,7 @@ firstWord = false; } //if words belongs to noun phrase treat them as one single term - if(tWord.tag().equals("NNP")){ + if(tWord.tag().equals("NNP") || tWord.tag().startsWith("NN")){ nounPhrase += " " + tWord.word(); } else { if(!nounPhrase.isEmpty()){ Added: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedQueryTreeFilter.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedQueryTreeFilter.java (rev 0) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedQueryTreeFilter.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -0,0 +1,81 @@ +package org.dllearner.sparqlquerygenerator.util; + +import java.util.Set; + +import org.dllearner.sparqlquerygenerator.datastructures.QueryTree; +import org.dllearner.sparqlquerygenerator.datastructures.impl.QueryTreeImpl; + +import uk.ac.shef.wit.simmetrics.similaritymetrics.AbstractStringMetric; +import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; +import uk.ac.shef.wit.simmetrics.similaritymetrics.Levenshtein; +import uk.ac.shef.wit.simmetrics.similaritymetrics.QGramsDistance; + +public class QuestionBasedQueryTreeFilter { + +private Set<String> questionWords; + + private AbstractStringMetric qGramMetric; + private AbstractStringMetric levensteinMetric; + private AbstractStringMetric jaroWinklerMetric; + + private double threshold = 0.4; + + public QuestionBasedQueryTreeFilter(Set<String> questionWords){ + this.questionWords = questionWords; + qGramMetric = new QGramsDistance(); + levensteinMetric = new Levenshtein(); + jaroWinklerMetric = new JaroWinkler(); + } + + public QueryTree<String> getFilteredQueryTree(QueryTree<String> tree){ + QueryTree<String> copy = new QueryTreeImpl<String>(tree); + filterTree(copy); + return copy; + } + + public void setThreshold(double threshold){ + this.threshold = threshold; + } + + private void filterTree(QueryTree<String> tree){ + String edge; + for(QueryTree<String> child : tree.getChildren()){ + if(child.getUserObject().equals("?")){ + edge = (String) tree.getEdge(child); + if(!isSimiliar2QuestionWord(getFragment(edge))){ + child.getParent().removeChild((QueryTreeImpl<String>) child); + } + } else { + filterTree(child); + } + } + } + + private boolean isSimiliar2QuestionWord(String s){ + for(String word : questionWords){ + if(areSimiliar(word, s)){ + return true; + } + } + return false; + } + + private String getFragment(String uri){ + int i = uri.lastIndexOf("#"); + if(i > 0){ + return uri.substring(i+1); + } else { + return uri.substring(uri.lastIndexOf("/")+1); + } + } + + private boolean areSimiliar(String s1, String s2){//cnt++;System.out.println(cnt); + float qSim = qGramMetric.getSimilarity(s1, s2); + float lSim = levensteinMetric.getSimilarity(s1, s2); +// float jSim = jaroWinklerMetric.getSimilarity(s1, s2); + float sim = Math.max(qSim, lSim); +// sim = Math.max(sim, jSim); + return sim >= threshold; + } + +} Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-28 15:48:53 UTC (rev 2690) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-28 17:01:10 UTC (rev 2691) @@ -18,7 +18,7 @@ private AbstractStringMetric levensteinMetric; private AbstractStringMetric jaroWinklerMetric; - private double threshold = 0.7; + private double threshold = 0.4; int cnt = 0; @@ -42,7 +42,7 @@ private boolean areSimiliar(String s1, String s2){//cnt++;System.out.println(cnt); float qSim = qGramMetric.getSimilarity(s1, s2); float lSim = levensteinMetric.getSimilarity(s1, s2); - float jSim = jaroWinklerMetric.getSimilarity(s1, s2); +// float jSim = jaroWinklerMetric.getSimilarity(s1, s2); float sim = Math.max(qSim, lSim); // sim = Math.max(sim, jSim); return sim >= threshold; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |