From: <lor...@us...> - 2011-02-26 13:18:18
|
Revision: 2667 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=2667&view=rev Author: lorenz_b Date: 2011-02-26 13:18:11 +0000 (Sat, 26 Feb 2011) Log Message: ----------- Added option to enable synonym expansion in eval script. Modified Paths: -------------- trunk/autosparql/pom.xml trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java Modified: trunk/autosparql/pom.xml =================================================================== --- trunk/autosparql/pom.xml 2011-02-26 12:27:36 UTC (rev 2666) +++ trunk/autosparql/pom.xml 2011-02-26 13:18:11 UTC (rev 2667) @@ -23,6 +23,10 @@ <url>http://maven.mse.jhu.edu/m2repository/</url> </repository> <repository> +<id>opennlp.sf.net</id> +<url>http://opennlp.sourceforge.net/maven2</url> +</repository> + <repository> <id>Sesame</id> <url>http://repo.aduna-software.org/maven2/releases/</url> </repository> @@ -44,6 +48,11 @@ <artifactId>bliki-core</artifactId> <version>3.0.16</version> </dependency> + <dependency> +<groupId>opennlp</groupId> +<artifactId>tools</artifactId> +<version>1.5.0</version> +</dependency> <!-- GWT dependencies (from central repo) --> <dependency> Modified: trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java =================================================================== --- trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-26 12:27:36 UTC (rev 2666) +++ trunk/autosparql/src/main/java/org/dllearner/autosparql/evaluation/EvaluationWithNLQueriesScript.java 2011-02-26 13:18:11 UTC (rev 2667) @@ -14,6 +14,7 @@ import java.net.URLConnection; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Hashtable; @@ -68,6 +69,8 @@ import com.hp.hpl.jena.query.ResultSet; +import de.simba.ner.WordnetQuery; + import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.Sentence; import edu.stanford.nlp.ling.TaggedWord; @@ -81,6 +84,7 @@ private static final String QUERY_ANSWERS_FILE_PATH = "evaluation/dbpedia-train_cleaned.xml"; private static final String SCHEMA_FILE_PATH = "evaluation/dbpedia_schema.owl"; private static final String LUCENE_INDEX_DIRECTORY = "/opt/autosparql/index"; + private static final String WORDNET_DICTIONARY = "src/main/resources/de/simba/ner/dictionary"; private static final SparqlEndpoint ENDPOINT = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); private static final int NR_OF_POS_START_EXAMPLES_COUNT = 3; @@ -101,6 +105,7 @@ private DBpediaSchemaIndex schemaIndex; private LuceneSearch luceneSearch; + private WordnetQuery wordNet; private QuestionProcessor qProcessor = new QuestionProcessor(); @@ -118,6 +123,7 @@ Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()), null, null, predicateFilters), selectCache, constructCache); schemaIndex = new DBpediaSchemaIndex(SCHEMA_FILE_PATH); luceneSearch = new LuceneSearch(LUCENE_INDEX_DIRECTORY); + wordNet = new WordnetQuery(WORDNET_DICTIONARY); } catch (MalformedURLException e) { e.printStackTrace(); } @@ -241,6 +247,14 @@ return elements; } + private Set<String> getSynonyms(Collection<String> words){ + Set<String> synonyms = new HashSet<String>(); + for(String w : words){ + synonyms.addAll(wordNet.getSynset(w)); + } + return synonyms; + } + private Set<String> getResourcesBySPARQLQuery(String query){ logger.info("Sending query..."); long startTime = System.currentTimeMillis(); @@ -271,6 +285,11 @@ //preprocess question to extract only relevant words and set them as filter for statements relevantWords = getRelevantWords(question); exFinder.setStatementFilter(new QuestionBasedStatementFilter(new HashSet<String>(relevantWords))); + //expand with synonyms + if(USE_SYNONYMS){ + relevantWords.addAll(getSynonyms(relevantWords)); + logger.info("Extended with synonyms: " + relevantWords); + } question = ""; for(String word : relevantWords){ question += " " + word; Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-02-26 12:27:36 UTC (rev 2666) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/impl/QueryTreeFactoryImpl.java 2011-02-26 13:18:11 UTC (rev 2667) @@ -99,7 +99,8 @@ Statement st; SortedSet<Statement> statements; - for(Iterator<Statement> it = model.listStatements(statementFilter); it.hasNext();){ + Iterator<Statement> it = model.listStatements(statementFilter); + while(it.hasNext()){ st = it.next(); statements = resource2Statements.get(st.getSubject().toString()); if(statements == null){ Modified: trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java =================================================================== --- trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-26 12:27:36 UTC (rev 2666) +++ trunk/sparql-query-generator/src/main/java/org/dllearner/sparqlquerygenerator/util/QuestionBasedStatementFilter.java 2011-02-26 13:18:11 UTC (rev 2667) @@ -30,7 +30,7 @@ String object = null; if(s.getObject().isURIResource()){ object = s.getObject().asResource().getURI(); - object = object.substring(object.lastIndexOf("/")); + object = object.substring(object.lastIndexOf("/")+1); } else if(s.getObject().isLiteral()){ object = s.getObject().asLiteral().getLexicalForm(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |