From: <ki...@us...> - 2012-11-19 12:06:06
|
Revision: 3876 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3876&view=rev Author: kirdie Date: 2012-11-19 12:05:59 +0000 (Mon, 19 Nov 2012) Log Message: ----------- SPARQL indices now ignore case. SPARQLClassesIndex now counts all instances as a class that are instances of owl:Class or have at least one instance (some classes are not modelled as owl:Class). Added a test class for the SPARQLClassesIndex. SPARQLTemplatedBasedLearner2 now uses the SPARQLClassesIndex for Classes instead of the normal SPARQL index. The benchmark results should now be better because this applies to many of the 'near' or 'close to' questions. Modified Paths: -------------- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java Added Paths: ----------- branches/hmm/components-ext/src/test/java/org/dllearner/common/ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java Modified: branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -6,14 +6,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -55,6 +53,7 @@ import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.MappingBasedIndex; +import org.dllearner.common.index.SPARQLClassesIndex; import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; import org.dllearner.common.index.SPARQLIndex; import org.dllearner.common.index.SPARQLObjectPropertiesIndex; @@ -65,10 +64,6 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.core.owl.ObjectProperty; -import org.dllearner.core.owl.Thing; import org.dllearner.kb.LocalModelBasedSparqlEndpointKS; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -76,7 +71,6 @@ import org.dllearner.kb.sparql.SparqlQuery; import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.Options; -import org.openjena.atlas.logging.Log; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.util.SimpleIRIShortFormProvider; import com.hp.hpl.jena.ontology.OntModelSpec; @@ -255,7 +249,7 @@ public SPARQLTemplateBasedLearner2(Model model, MappingBasedIndex mappingBasedIndex, PartOfSpeechTagger posTagger) { - this(model, new SPARQLIndex(model),new SPARQLIndex(model),new SPARQLIndex(model),posTagger); + this(model, new SPARQLIndex(model),new SPARQLClassesIndex(model),new SPARQLIndex(model),posTagger); setMappingIndex(mappingBasedIndex); } @@ -674,7 +668,9 @@ // get candidates for slot if(!slot2Allocations.containsKey(slot)) { - slot2Allocations.put(slot,new SlotProcessor(slot).computeAllocations(slot)); + SortedSet<Allocation> allocations = new SlotProcessor(slot).computeAllocations(slot); + logger.info("allocations for slot "+slot+": "+allocations); + slot2Allocations.put(slot,allocations); } } } @@ -717,6 +713,7 @@ { try { SortedSet<Allocation> result = future.get(); + logger.debug("allocations: "+result); slot2Allocations.put(futureToSlot.get(future), result); } catch (InterruptedException e) {e.printStackTrace();} catch (ExecutionException e) {e.printStackTrace();throw new RuntimeException(e);} } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLClassesIndex.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -2,8 +2,8 @@ import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; - import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.vocabulary.OWL; public class SPARQLClassesIndex extends SPARQLIndex{ @@ -26,13 +26,13 @@ super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s a ?uri.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s a ?uri.\n" + + super.queryWithLabelTemplate = "SELECT DISTINCT ?uri ?label WHERE {\n" + + "{?uri a <"+ OWL.Class.getURI() + ">.} UNION {?s a ?uri.}\n"+ "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; } } Modified: branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -30,13 +30,13 @@ protected String queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?uri a ?type.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; protected String queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + "?uri a ?type.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s'))}\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; @@ -108,8 +108,7 @@ public IndexResultSet getResourcesWithScores(String searchTerm, int limit, int offset) { IndexResultSet irs = new IndexResultSet(); - String query = String.format(queryWithLabelTemplate, searchTerm, limit, offset); - + String query = String.format(queryWithLabelTemplate, searchTerm, limit, offset); ResultSet rs = executeSelect(query); QuerySolution qs; Modified: branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-19 11:15:47 UTC (rev 3875) +++ branches/hmm/components-ext/src/test/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner3Test.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -132,8 +132,8 @@ protected static final int QUESTION_OFFSET = 0; protected static final int QUESTION_LIMIT = Integer.MAX_VALUE; - protected static final boolean WHITELIST_ONLY = false; - protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {4}))); + protected static final boolean WHITELIST_ONLY = true; + protected static final Set<Integer> WHITELIST = Collections.unmodifiableSet(new HashSet<Integer>(Arrays.asList(new Integer[] {24}))); protected static final boolean GENERATE_HTML_ONLY = false; protected static final int MAX_THREADS = 4; @@ -364,13 +364,15 @@ /**more will be left out of the xml file */ List<String> questions = new LinkedList<String>(); BufferedReader in = new BufferedReader((new InputStreamReader(getClass().getClassLoader().getResourceAsStream("tbsl/oxford_eval_queries_parsed.txt")))); - int count=0; + int count=-1; for(String line;count<(QUESTION_LIMIT+QUESTION_OFFSET)&&(line=in.readLine())!=null;) { + count++; + if(WHITELIST_ONLY&&!WHITELIST.contains(Integer.valueOf(count))) {continue;} logger.info(count+": "+line); - if(count<QUESTION_OFFSET) {count++;continue;} + if(count<QUESTION_OFFSET) {continue;} String question = line.replace("question: ", "").trim(); - if(!line.trim().isEmpty()) {questions.add(question);count++;} + if(!line.trim().isEmpty()) {questions.add(question);} } in.close(); Model model = loadOxfordModel(); Added: branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java =================================================================== --- branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java (rev 0) +++ branches/hmm/components-ext/src/test/java/org/dllearner/common/index/SPARQLClassesIndexTest.java 2012-11-19 12:05:59 UTC (rev 3876) @@ -0,0 +1,24 @@ +/** **/ +package org.dllearner.common.index; + +import static org.junit.Assert.*; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner3Test; +import org.junit.Test; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +/** @author konrad + * */ +public class SPARQLClassesIndexTest +{ + + @Test public void test() + { + Model m = ModelFactory.createDefaultModel(); + m.read(SPARQLTemplateBasedLearner3Test.class.getClassLoader().getResourceAsStream("oxford/schema/LGD-Dump-110406-Ontology.nt"),null, "TURTLE"); + SPARQLClassesIndex index = new SPARQLClassesIndex(m); + assertFalse(index.getResourcesWithScores("pharmacy").getItems().isEmpty()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |