From: <lor...@us...> - 2012-05-14 13:24:23
|
Revision: 3712 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3712&view=rev Author: lorenz_b Date: 2012-05-14 13:24:12 +0000 (Mon, 14 May 2012) Log Message: ----------- Started integration of classes for non-SOLR indexes to be more flexible in QTL and TBSL. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/ trunk/components-ext/src/main/java/org/dllearner/common/index/ trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -784,7 +784,7 @@ Literal l; while(iter.hasNext()){ l = iter.next(); - if(l.getDatatype() == XSDDatatype.XSDinteger){ + if(l.getDatatype() == XSDDatatype.XSDinteger || l.getDatatype() == XSDDatatype.XSDint){ min = (l.getInt() < min.getInt()) ? l : min; } else if(l.getDatatype() == XSDDatatype.XSDdouble){ min = (l.getDouble() < min.getDouble()) ? l : min; @@ -801,7 +801,7 @@ Literal l; while(iter.hasNext()){ l = iter.next(); - if(l.getDatatype() == XSDDatatype.XSDinteger){ + if(l.getDatatype() == XSDDatatype.XSDinteger || l.getDatatype() == XSDDatatype.XSDint){ max = (l.getInt() > max.getInt()) ? l : max; } else if(l.getDatatype() == XSDDatatype.XSDdouble){ max = (l.getDouble() > max.getDouble()) ? l : max; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -245,7 +245,10 @@ // subTree = new QueryTreeImpl<String>(lit.toString()); subTree.setId(nodeId++); subTree.setLiteralNode(true); - if(lit.getDatatype() == XSDDatatype.XSDinteger || lit.getDatatype() == XSDDatatype.XSDdouble || lit.getDatatype() == XSDDatatype.XSDdate){ + if(lit.getDatatype() == XSDDatatype.XSDinteger + || lit.getDatatype() == XSDDatatype.XSDdouble + || lit.getDatatype() == XSDDatatype.XSDdate + || lit.getDatatype() == XSDDatatype.XSDint){ subTree.addLiteral(lit); } tree.addChild(subTree, st.getPredicate().toString()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -35,9 +35,11 @@ import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.sparql.expr.E_Equals; import com.hp.hpl.jena.sparql.expr.E_LogicalNot; import com.hp.hpl.jena.sparql.expr.ExprVar; @@ -62,6 +64,7 @@ private ExtractionDBCache selectCache; private SparqlEndpoint endpoint; + private Model model; private String query; private int limit; @@ -94,6 +97,12 @@ noSequences = new ArrayList<List<QueryTreeChange>>(); } + public NBR(Model model){ + this.model = model; + + noSequences = new ArrayList<List<QueryTreeChange>>(); + } + public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds){ this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; } @@ -1385,16 +1394,22 @@ } private ResultSet executeSelectQuery(String query){ - ExtendedQueryEngineHTTP queryExecution = new ExtendedQueryEngineHTTP(endpoint.getURL().toString(), query); - queryExecution.setTimeOut(maxExecutionTimeInSeconds * 1000); - for (String dgu : endpoint.getDefaultGraphURIs()) { - queryExecution.addDefaultGraph(dgu); - } - for (String ngu : endpoint.getNamedGraphURIs()) { - queryExecution.addNamedGraph(ngu); - } - ResultSet resultset = queryExecution.execSelect(); - return resultset; + ResultSet rs; + if(model == null){ + ExtendedQueryEngineHTTP queryExecution = new ExtendedQueryEngineHTTP(endpoint.getURL().toString(), query); + queryExecution.setTimeOut(maxExecutionTimeInSeconds * 1000); + for (String dgu : endpoint.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : endpoint.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + rs = queryExecution.execSelect(); + } else { + rs = QueryExecutionFactory.create(query, model).execSelect(); + } + + return rs; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -11,6 +11,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -18,6 +19,10 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; @@ -37,7 +42,6 @@ import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -60,14 +64,36 @@ import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; +import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.core.Var; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.syntax.Element; +import com.hp.hpl.jena.sparql.syntax.ElementAssign; +import com.hp.hpl.jena.sparql.syntax.ElementBind; +import com.hp.hpl.jena.sparql.syntax.ElementDataset; +import com.hp.hpl.jena.sparql.syntax.ElementExists; +import com.hp.hpl.jena.sparql.syntax.ElementFetch; +import com.hp.hpl.jena.sparql.syntax.ElementFilter; +import com.hp.hpl.jena.sparql.syntax.ElementGroup; +import com.hp.hpl.jena.sparql.syntax.ElementMinus; +import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph; +import com.hp.hpl.jena.sparql.syntax.ElementNotExists; +import com.hp.hpl.jena.sparql.syntax.ElementOptional; +import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; +import com.hp.hpl.jena.sparql.syntax.ElementService; +import com.hp.hpl.jena.sparql.syntax.ElementSubQuery; +import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; +import com.hp.hpl.jena.sparql.syntax.ElementUnion; +import com.hp.hpl.jena.sparql.syntax.ElementVisitor; import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -667,10 +693,29 @@ logger.info("Processing template:\n" + t.toString()); allocations = new TreeSet<Allocation>(); - for(Slot slot : t.getSlots()){ + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); + List<Future<SortedSet<Allocation>>> list = new ArrayList<Future<SortedSet<Allocation>>>(); + + for (Slot slot : t.getSlots()) { + Callable<SortedSet<Allocation>> worker = new SlotProcessor(slot); + Future<SortedSet<Allocation>> submit = executor.submit(worker); + list.add(submit); + } + +// for (Future<SortedSet<Allocation>> future : list) { +// try { +// future.get(); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } catch (ExecutionException e) { +// e.printStackTrace(); +// } +// } + + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ - allocations = computeAllocations(slot, 20); + allocations = computeAllocations(slot, 10); slot2Allocations2.put(slot, allocations); } slot2Allocations.put(slot, allocations); @@ -687,7 +732,7 @@ } } allocations.addAll(tmp); - } + }*/ Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); @@ -868,7 +913,7 @@ if(slot.getSlotType() == SlotType.RESOURCE){ rs = index.getResourcesWithScores(word, 250); } else { - rs = index.getResourcesWithScores(word, 30); + rs = index.getResourcesWithScores(word, 20); } @@ -1514,7 +1559,7 @@ * @throws InvalidFileFormatException */ public static void main(String[] args) throws NoTemplateFoundException, InvalidFileFormatException, FileNotFoundException, IOException { -// Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); + // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; @@ -1578,7 +1623,22 @@ // TODO Auto-generated method stub } + + class SlotProcessor implements Callable<SortedSet<Allocation>>{ + + private Slot slot; + + public SlotProcessor(Slot slot) { + this.slot = slot; + } + @Override + public SortedSet<Allocation> call() throws Exception { + return computeAllocations(slot); + } + + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -165,7 +165,13 @@ lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ - items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), (Float) d.get("score"))); + float score = 0; + if(d.get("score") instanceof ArrayList){ + score = ((Float)((ArrayList)d.get("score")).get(1)); + } else { + score = (Float) d.get("score"); + } + items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), score)); } } catch (SolrServerException e) { e.printStackTrace(); @@ -183,4 +189,8 @@ this.hitsPerPage = hitsPerPage; } + public static void main(String[] args) { + new SolrSearch("http://139.18.2.173:8080/apache-solr-3.3.0/dbpedia_classes").getResources("Leipzig"); + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,9 @@ +package org.dllearner.common.index; + +import java.util.List; + +public interface Index { + List<String> getResources(String queryString); + List<String> getResources(String queryString, int limit); + List<String> getResources(String queryString, int limit, int offset); +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,61 @@ +package org.dllearner.common.index; + +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.BinaryRequestWriter; +import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.ModifiableSolrParams; + +public class SOLRIndex implements Index{ + +private CommonsHttpSolrServer server; + + private static final int DEFAULT_LIMIT = 10; + private static final int DEFAULT_OFFSET = 0; + + public SOLRIndex(String solrServerURL){ + try { + server = new CommonsHttpSolrServer(solrServerURL); + server.setRequestWriter(new BinaryRequestWriter()); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + + @Override + public List<String> getResources(String queryString) { + return getResources(queryString, DEFAULT_LIMIT); + } + + @Override + public List<String> getResources(String queryString, int limit) { + return getResources(queryString, limit, DEFAULT_OFFSET); + } + + @Override + public List<String> getResources(String queryString, int limit, int offset) { + List<String> resources = new ArrayList<String>(); + QueryResponse response; + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("q", queryString); + params.set("rows", limit); + params.set("start", offset); + response = server.query(params); + SolrDocumentList docList = response.getResults(); + for(SolrDocument d : docList){ + resources.add((String) d.get("uri")); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return resources; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,90 @@ +package org.dllearner.common.index; + +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; + +public class SPARQLIndex implements Index{ + + private static final int DEFAULT_LIMIT = 10; + private static final int DEFAULT_OFFSET = 0; + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + + private Model model; + + protected String queryTemplate = "SELECT DISTINCT(?uri) WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s'))}\n" + + "LIMIT %d OFFSET %d"; + + + public SPARQLIndex(SparqlEndpoint endpoint) { + this(endpoint, null); + } + + public SPARQLIndex(Model model) { + this.model = model; + } + + public SPARQLIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + } + + @Override + public List<String> getResources(String queryString) { + return getResources(queryString, DEFAULT_LIMIT); + } + + @Override + public List<String> getResources(String queryString, int limit) { + return getResources(queryString, limit, DEFAULT_OFFSET); + } + + @Override + public List<String> getResources(String queryString, int limit, int offset) { + List<String> resources = new ArrayList<String>(); + + String query = buildResourcesQuery(queryString, limit, offset); + + ResultSet rs; + if(model == null){ + if(cache == null){ + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), queryString); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + rs = qe.execSelect(); + } else { + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + } + } else { + rs = QueryExecutionFactory.create(queryString, model).execSelect(); + } + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + RDFNode uriNode = qs.get("uri"); + if(uriNode.isURIResource()){ + resources.add(uriNode.asResource().getURI()); + } + } + return resources; + } + + protected String buildResourcesQuery(String searchTerm, int limit, int offset){ + return String.format(queryTemplate, searchTerm, limit, offset); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-01 09:28:08
|
Revision: 3724 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3724&view=rev Author: lorenz_b Date: 2012-06-01 09:27:59 +0000 (Fri, 01 Jun 2012) Log Message: ----------- Added additional constructor. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-05-22 00:31:06 UTC (rev 3723) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-01 09:27:59 UTC (rev 3724) @@ -140,12 +140,17 @@ } public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(endpoint, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(SparqlEndpoint endpoint, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.endpoint = endpoint; this.resourcesIndex = resourcesIndex; this.classesIndex = classesIndex; this.propertiesIndex = propertiesIndex; this.posTagger = posTagger; this.wordNet = wordNet; + this.cache = cache; setOptions(options); } @@ -163,12 +168,17 @@ } public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options){ + this(model, resourcesIndex, classesIndex, propertiesIndex, posTagger, wordNet, options, new ExtractionDBCache("cache")); + } + + public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex, PartOfSpeechTagger posTagger, WordNet wordNet, Options options, ExtractionDBCache cache){ this.model = model; this.resourcesIndex = resourcesIndex; this.classesIndex = classesIndex; this.propertiesIndex = propertiesIndex; this.posTagger = posTagger; this.wordNet = wordNet; + this.cache = cache; setOptions(options); } @@ -1406,31 +1416,6 @@ return rs; } - private List<String> getResultFromRemoteEndpoint(String query){ - List<String> resources = new ArrayList<String>(); - try { - String queryString = query; - if(!query.contains("LIMIT") && !query.contains("ASK")){ - queryString = query + " LIMIT 10"; - } - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, queryString)); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - if(qs.get(projectionVar).isLiteral()){ - resources.add(qs.get(projectionVar).asLiteral().getLexicalForm()); - } else if(qs.get(projectionVar).isURIResource()){ - resources.add(qs.get(projectionVar).asResource().getURI()); - } - - } - } catch (Exception e) {e.printStackTrace(); - logger.error("Query execution failed.", e); - } - return resources; - } public int getLearnedPosition() { if(learnedPos >= 0){ @@ -1438,15 +1423,7 @@ } return learnedPos; } - - - - - - - - @Override public void start() { } Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-05-22 00:31:06 UTC (rev 3723) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-01 09:27:59 UTC (rev 3724) @@ -72,7 +72,7 @@ QuerySolution qs; while(rs.hasNext()){ - qs = rs.next();System.out.println(qs); + qs = rs.next(); RDFNode uriNode = qs.get("uri"); if(uriNode.isURIResource()){ resources.add(uriNode.asResource().getURI()); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-05-22 00:31:06 UTC (rev 3723) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-01 09:27:59 UTC (rev 3724) @@ -15,8 +15,9 @@ "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s ?uri ?o.\n" + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + +// "?s ?uri ?o.\n" + + "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; @@ -31,8 +32,9 @@ "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; - super.queryWithLabelTemplate = "SELECT DISTINCT * WHERE {\n" + - "?s ?uri ?o.\n" + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + +// "?s ?uri ?o.\n" + + "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-12 13:19:15
|
Revision: 3734 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3734&view=rev Author: lorenz_b Date: 2012-06-12 13:19:03 +0000 (Tue, 12 Jun 2012) Log Message: ----------- Some modification to deal with new template generation. Added separate index classes for object and datatype properties. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -4,7 +4,6 @@ import java.io.IOException; import java.net.URL; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -32,8 +31,6 @@ import org.dllearner.algorithm.tbsl.nlp.WordNet; import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.RatedQuery; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; @@ -43,12 +40,13 @@ import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; -import org.dllearner.common.index.IndexResultItemComparator; import org.dllearner.common.index.IndexResultSet; import org.dllearner.common.index.SOLRIndex; +import org.dllearner.common.index.SPARQLDatatypePropertiesIndex; +import org.dllearner.common.index.SPARQLObjectPropertiesIndex; +import org.dllearner.common.index.SPARQLPropertiesIndex; import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; -import org.dllearner.core.Oracle; import org.dllearner.core.SparqlQueryLearningAlgorithm; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; @@ -71,22 +69,14 @@ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - //for debugging - List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", - "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); - enum Ranking{ LUCENE, SIMILARITY, NONE } - private static final String OPTIONS_FILE = SPARQLTemplateBasedLearner2.class.getClassLoader().getResource("tbsl/tbsl.properties").getPath(); private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - private static final int MAX_URIS_PER_SLOT = 10; - - private Ranking ranking; private boolean useRemoteEndpointValidation; private boolean stopIfQueryResultNotEmpty; private int maxTestedQueriesPerTemplate = 50; @@ -103,6 +93,9 @@ private Index classesIndex; private Index propertiesIndex; + private Index datatypePropertiesIndex; + private Index objectPropertiesIndex; + private Templator templateGenerator; private Lemmatizer lemmatizer; private PartOfSpeechTagger posTagger; @@ -111,12 +104,6 @@ private String question; private int learnedPos = -1; - private Oracle oracle; - - private Map<String, IndexResultSet> resourcesURICache; - private Map<String, IndexResultSet> classesURICache; - private Map<String, IndexResultSet> propertiesURICache; - private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; private Collection<Query> sparqlQueryCandidates; @@ -153,6 +140,14 @@ this.cache = cache; setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } } public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ @@ -181,6 +176,14 @@ this.cache = cache; setOptions(options); + + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } } @Override @@ -202,7 +205,6 @@ maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); @@ -244,15 +246,8 @@ this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; } - public void setRanking(Ranking ranking) { - this.ranking = ranking; - } - private void reset(){ learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, IndexResultSet>(); - classesURICache = new HashMap<String, IndexResultSet>(); - propertiesURICache = new HashMap<String, IndexResultSet>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); } @@ -273,10 +268,6 @@ logger.info(t); } -// //generate SPARQL query candidates, but select only a fixed number per template -// template2Queries = getSPARQLQueryCandidates(templates, ranking); -// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - //get the weighted query candidates generatedQueries = getWeightedSPARQLQueries(templates); sparqlQueryCandidates = new ArrayList<Query>(); @@ -315,38 +306,6 @@ return topNQueries; } - public List<String> getSPARQLQueries() throws NoTemplateFoundException{ - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - - //generate SPARQL query candidates - logger.info("Generating SPARQL query candidates..."); - mon.start(); - Map<Template, Collection<? extends Query>> template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - public Set<Template> getTemplates(){ return templates; } @@ -368,228 +327,6 @@ return slot2URI; } - - private Map<Template,Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ - switch(ranking){ - case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); - case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); - case NONE: return getSPARQLQueryCandidates(templates); - default: return null; - } - } - - /* - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - double alpha = 0.8; - double beta = 1 - alpha; - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allAllocations; - for(Template t : templates){ - allAllocations = new HashSet<Allocation>(); - - for(Slot slot : t.getSlots()){ - Set<Allocation> allocations = computeAllocation(slot); - allAllocations.addAll(allocations); - slot2Allocations.put(slot, allocations); - } - - int min = Integer.MAX_VALUE; - int max = Integer.MIN_VALUE; - for(Allocation a : allAllocations){ - if(a.getInDegree() < min){ - min = a.getInDegree(); - } - if(a.getInDegree() > max){ - max = a.getInDegree(); - } - } - for(Allocation a : allAllocations){ - double prominence = a.getInDegree()/(max-min); - a.setProminence(prominence); - - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - - } -// System.out.println(allAllocations); - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - //check if the query is possible - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - - boolean drop = false; - for(SPARQL_Triple triple : reversedQuery.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - } - - if(!drop){ - reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(reversedQuery); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - } - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - } - } - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - return allQueries; - } - */ - private void normProminenceValues(Set<Allocation> allocations){ double min = 0; double max = 0; @@ -621,7 +358,7 @@ private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ logger.info("Generating SPARQL query candidates..."); - Map<Slot, Set<Allocation>> slot2Allocations2 = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { + Map<Slot, Set<Allocation>> slot2Allocations = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { @Override public int compare(Slot o1, Slot o2) { @@ -634,8 +371,6 @@ }); - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); Set<Allocation> allocations; @@ -654,7 +389,7 @@ Callable<Map<Slot, SortedSet<Allocation>>> worker = new SlotProcessor(slot); Future<Map<Slot, SortedSet<Allocation>>> submit = executor.submit(worker); list.add(submit); - } + } } for (Future<Map<Slot, SortedSet<Allocation>>> future : list) { @@ -838,106 +573,6 @@ return allQueries; } -/* - * for(SPARQL_Triple triple : t.getQuery().getTriplesWithVar(slot.getAnchor())){System.out.println(triple); - for(SPARQL_Triple typeTriple : t.getQuery().getRDFTypeTriples(triple.getVariable().getName())){ - System.out.println(typeTriple); - for(Allocation a : allocations){ - Set<String> domains = getDomains(a.getUri()); - System.out.println(a); - System.out.println(domains); - for(Slot s : classSlots){ - if(s.getAnchor().equals(triple.getVariable().getName())){ - for(Allocation all : slot2Allocations.get(s)){ - if(!domains.contains(all.getUri())){ - System.out.println("DROP " + a); - } - } - } - } - } - - - } - */ - - private SortedSet<Allocation> computeAllocations(Slot slot){ - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - Index index = getIndexBySlotType(slot); - - IndexResultSet rs; - for(String word : slot.getWords()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 250); - } else { - if(slot.getSlotType() == SlotType.CLASS){ - word = PlingStemmer.stem(word); - } - rs = index.getResourcesWithScores(word, 20); - } - - - //debugging -// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ -// SolrQueryResultItem item = iter.next(); -// if(exclusions.contains(item.getUri())){ -// iter.remove(); -// } -// } - - for(IndexResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - - } - - normProminenceValues(allocations); - - computeScore(allocations); - return new TreeSet<Allocation>(allocations); - } - - private Set<Allocation> computeAllocations(Slot slot, int limit){ - logger.info("Computing allocations for " + slot); - SortedSet<Allocation> allocations = computeAllocations(slot); - - if(allocations.isEmpty()){ - logger.info("...done."); - return allocations; - } - - ArrayList<Allocation> l = new ArrayList<Allocation>(allocations); - Collections.sort(l, new Comparator<Allocation>() { - - @Override - public int compare(Allocation o1, Allocation o2) { - double dif = o1.getScore() - o2.getScore(); - if(dif < 0){ - return 1; - } else if(dif > 0){ - return -1; - } else { - return o1.getUri().compareTo(o2.getUri()); - } - } - }); - logger.info("...done."); - return new TreeSet<Allocation>(l.subList(0, Math.min(limit, allocations.size()))); - } - private Set<String> getRedirectLabels(String uri){ Set<String> labels = new HashSet<String>(); String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); @@ -956,7 +591,8 @@ String query = null; if(type == SlotType.CLASS){ query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY + || type == SlotType.DATATYPEPROPERTY || type == SlotType.OBJECTPROPERTY){ query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; @@ -979,233 +615,7 @@ } - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Set<Query> queries = new HashSet<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - for(Template template : templates){ - queries = new HashSet<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - Set<Query> tmp = new HashSet<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Query query : queries){ - Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, item.getUri()); - tmp.add(newQuery); - } - } - if(!words.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Map<String, Float> query2Score = new HashMap<String, Float>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - query2Score.put(query.toString(), Float.valueOf(0)); - for(Slot slot : template.getSlots()){ - Map<String, Float> tmp = new HashMap<String, Float>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); - } - } - if(!words.isEmpty()){ - query2Score.clear(); - query2Score.putAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return query2Score; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - ratedQueries = new TreeSet<RatedQuery>(); - ratedQueries.add(new RatedQuery(query, 0)); - template2Queries.put(template, ratedQueries); - for(Slot slot : template.getSlots()){ - Set<RatedQuery> tmp = new HashSet<RatedQuery>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(IndexResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(RatedQuery rQ : ratedQueries){ - RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, item.getUri()); - newRQ.setScore(newRQ.getScore() + item.getScore()); - tmp.add(newRQ); - } - } - if(!words.isEmpty()){ - ratedQueries.clear(); - ratedQueries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - List<Query> queries = new ArrayList<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - List<String> uriCandidates; - for(Template template : templates){ - queries = new ArrayList<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - List<Query> tmp = new ArrayList<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - SPARQL_Prefix prefix = null; - uriCandidates = getCandidateURIsSortedBySimilarity(slot); - for(String uri : uriCandidates){ -// for(Entry<String, String> uri2prefix : prefixMap.entrySet()){ -// if(uri.startsWith(uri2prefix.getKey())){ -// prefix = new SPARQL_Prefix(uri2prefix.getValue(), uri2prefix.getKey()); -// uri = uri.replace(uri2prefix.getKey(), uri2prefix.getValue() + ":"); -// break; -// } -// } - for(Query query : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query); - reversedQuery.getTriplesWithVar(var).iterator().next().reverse(); -// logger.info("NORMAL QUERY:\n" + query.toString()); -// logger.info("REVERSED QUERY:\n" + reversedQuery.toString()); - if(prefix != null){ - reversedQuery.addPrefix(prefix); - reversedQuery.replaceVarWithPrefixedURI(var, uri); - } else { - reversedQuery.replaceVarWithURI(var, uri); - } - tmp.add(reversedQuery); - } - Query newQuery = new Query(query); - if(prefix != null){ - newQuery.addPrefix(prefix); - newQuery.replaceVarWithPrefixedURI(var, uri); - } else { - newQuery.replaceVarWithURI(var, uri); - } - tmp.add(newQuery); - } - prefix = null; - } - if(!words.isEmpty() && !uriCandidates.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private IndexResultSet getCandidateURIs(Slot slot, int limit){ - logger.info("Generating candidate URIs for " + slot.getWords() + "..."); - mon.start(); - Index index = null; - if(slot.getSlotType() == SlotType.CLASS){ - index = classesIndex; - } else if(slot.getSlotType() == SlotType.PROPERTY){ - index = propertiesIndex; - } else if(slot.getSlotType() == SlotType.RESOURCE){ - index = resourcesIndex; - } - IndexResultSet rs = new IndexResultSet(); - for(String word : slot.getWords()){ - rs.add(index.getResourcesWithScores(word, limit)); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return rs; - } - - private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ - logger.info("Generating URI candidates for " + slot.getWords() + "..."); - mon.start(); - List<String> sortedURIs = new ArrayList<String>(); - //get the appropriate index based on slot type - Index index = getIndexBySlotType(slot); - //get the appropriate cache for URIs to avoid redundant queries to index - Map<String, IndexResultSet> uriCache = getCacheBySlotType(slot); - - SortedSet<IndexResultItem> tmp; - IndexResultSet rs; - - //prune the word list only when slot type is not RESOURCE - List<String> words; - if(slot.getSlotType() == SlotType.RESOURCE){ - words = slot.getWords(); - } else { -// words = pruneList(slot.getWords());//getLemmatizedWords(slot.getWords()); - words = pruneList(slot.getWords()); - } - - for(String word : words){ - tmp = new TreeSet<IndexResultItem>(new IndexResultItemComparator(word)); - rs = uriCache.get(word); - - if(rs == null){ - rs = index.getResourcesWithScores(word, 50); - uriCache.put(word, rs); - } - - tmp.addAll(rs.getItems()); - - for(IndexResultItem item : tmp){ - if(!sortedURIs.contains(item.getUri())){ - sortedURIs.add(item.getUri()); - } - if(sortedURIs.size() == MAX_URIS_PER_SLOT){ - break; - } - - } - tmp.clear(); - - } - - slot2URI.put(slot, sortedURIs); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("URIs: " + sortedURIs); - return sortedURIs; - } - private List<String> pruneList(List<String> words){ List<String> prunedList = new ArrayList<String>(); for(String w1 : words){ @@ -1257,65 +667,16 @@ index = classesIndex; } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ index = propertiesIndex; + } else if(type == SlotType.DATATYPEPROPERTY){ + index = datatypePropertiesIndex; + } else if(type == SlotType.OBJECTPROPERTY){ + index = objectPropertiesIndex; } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ index = resourcesIndex; } return index; } - private Map<String, IndexResultSet> getCacheBySlotType(Slot slot){ - Map<String, IndexResultSet> cache = null; - SlotType type = slot.getSlotType(); - if(type == SlotType.CLASS){ - cache = classesURICache; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - cache = propertiesURICache; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - cache = resourcesURICache; - } - return cache; - } - - private IndexResultSet getCandidateURIsWithScore(Slot slot){ - logger.info("Generating candidate URIs for " + slot.getWords() + "..."); - mon.start(); - Index index = null; - Map<String, Float> uri2Score = new HashMap<String, Float>(); - boolean sorted = false; - if(slot.getSlotType() == SlotType.CLASS){ - index = classesIndex; - } else if(slot.getSlotType() == SlotType.PROPERTY){ - index = propertiesIndex; - } else if(slot.getSlotType() == SlotType.RESOURCE){ - index = resourcesIndex; - sorted = true; - } - IndexResultSet resultSet = new IndexResultSet(); - for(String word : slot.getWords()){ - resultSet.add(index.getResourcesWithScores("label:" + word)); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Candidate URIs: " + uri2Score.keySet()); - return resultSet; - } - - private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ - List<Query> queries = new ArrayList<Query>(); - for(Entry<Template, Collection<? extends Query>> entry : template2Queries.entrySet()){ - int max = Math.min(maxTestedQueriesPerTemplate, entry.getValue().size()); - int i = 0; - for(Query q : entry.getValue()){ - queries.add(q); - i++; - if(i == max){ - break; - } - } - } - return queries; - } - private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ List<String> queryStrings = new ArrayList<String>(); SPARQL_QueryType queryType = SPARQL_QueryType.SELECT; @@ -1471,6 +832,48 @@ return result; } + private SortedSet<Allocation> computeAllocations(Slot slot){ + logger.info("Computing allocations for slot: " + slot); + SortedSet<Allocation> allocations = new TreeSet<Allocation>(); + + Index index = getIndexBySlotType(slot); + + IndexResultSet rs; + for(String word : slot.getWords()){ + if(slot.getSlotType() == SlotType.RESOURCE){ + rs = index.getResourcesWithScores(word, 50); + } else { + if(slot.getSlotType() == SlotType.CLASS){ + word = PlingStemmer.stem(word); + } + rs = index.getResourcesWithScores(word, 20); + } + + for(IndexResultItem item : rs.getItems()){ + double similarity = Similarity.getSimilarity(word, item.getLabel()); + //get the labels of the redirects and compute the highest similarity + if(slot.getSlotType() == SlotType.RESOURCE){ + Set<String> labels = getRedirectLabels(item.getUri()); + for(String label : labels){ + double tmp = Similarity.getSimilarity(word, label); + if(tmp > similarity){ + similarity = tmp; + } + } + } + double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + normProminenceValues(allocations); + + computeScore(allocations); + logger.info("Found " + allocations.size() + " allocations for slot " + slot); + return new TreeSet<Allocation>(allocations); + } + } /** Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLDatatypePropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -0,0 +1,41 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class SPARQLDatatypePropertiesIndex extends SPARQLPropertiesIndex{ + + public SPARQLDatatypePropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public SPARQLDatatypePropertiesIndex(Model model) { + super(model); + init(); + } + + public SPARQLDatatypePropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:DatatypeProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + } + + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -47,6 +47,14 @@ this.model = model; } + public SPARQLIndex(SPARQLIndex index) { + if(index.getModel() != null){ + this.model = index.getModel(); + } else { + this.endpoint = index.getEndpoint(); + } + } + public SPARQLIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { this.endpoint = endpoint; this.cache = cache; @@ -115,7 +123,7 @@ return irs; } - private ResultSet executeSelect(String query){System.out.println(query); + private ResultSet executeSelect(String query){//System.out.println(query); ResultSet rs; if(model == null){ if(cache == null){ @@ -131,4 +139,12 @@ return rs; } + public SparqlEndpoint getEndpoint() { + return endpoint; + } + + public Model getModel() { + return model; + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLObjectPropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -0,0 +1,42 @@ +package org.dllearner.common.index; + +import org.dllearner.kb.sparql.SparqlEndpoint; + +import com.hp.hpl.jena.rdf.model.Model; + +public class SPARQLObjectPropertiesIndex extends SPARQLPropertiesIndex{ + + public SPARQLObjectPropertiesIndex(SparqlEndpoint endpoint) { + super(endpoint); + init(); + } + + public SPARQLObjectPropertiesIndex(Model model) { + super(model); + init(); + } + + public SPARQLObjectPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT ?uri WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + + super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + + "?s ?uri ?o.\n" + + "?uri a owl:ObjectProperty." + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + + "LIMIT %d OFFSET %d"; + } + + + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-11 19:01:59 UTC (rev 3733) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLPropertiesIndex.java 2012-06-12 13:19:03 UTC (rev 3734) @@ -8,33 +8,29 @@ public SPARQLPropertiesIndex(SparqlEndpoint endpoint) { super(endpoint); - - super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + - "?s ?uri ?o.\n" + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + - "LIMIT %d OFFSET %d"; - - super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + - "?s ?uri ?o.\n" + -// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + - "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + - "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + - "LIMIT %d OFFSET %d"; + init(); } public SPARQLPropertiesIndex(Model model) { super(model); - - super.queryTemplate = "SELECT ?uri WHERE {\n" + + init(); + } + + public SPARQLPropertiesIndex(SPARQLIndex index) { + super(index); + init(); + } + + private void init(){ + super.queryTemplate = "SELECT DISTINCT ?uri WHERE {\n" + "?s ?uri ?o.\n" + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; super.queryWithLabelTemplate = "PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT * WHERE {\n" + -// "?s ?uri ?o.\n" + - "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + + "?s ?uri ?o.\n" + +// "{?uri a owl:DatatypeProperty.} UNION {?uri a owl:ObjectProperty.} " + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + "FILTER(REGEX(STR(?label), '%s', 'i'))}\n" + "LIMIT %d OFFSET %d"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-06-26 14:35:07
|
Revision: 3765 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3765&view=rev Author: lorenz_b Date: 2012-06-26 14:34:56 +0000 (Tue, 26 Jun 2012) Log Message: ----------- Some extensions for new TBSL UI. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-25 13:21:58 UTC (rev 3764) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-06-26 14:34:56 UTC (rev 3765) @@ -268,6 +268,19 @@ this.resourcesIndex = knowledgebase.getResourceIndex(); this.classesIndex = knowledgebase.getClassIndex(); this.propertiesIndex = knowledgebase.getPropertyIndex(); + this.mappingIndex = knowledgebase.getMappingIndex(); + if(propertiesIndex instanceof SPARQLPropertiesIndex){ + if(propertiesIndex instanceof VirtuosoPropertiesIndex){ + datatypePropertiesIndex = new VirtuosoDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new VirtuosoObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } else { + datatypePropertiesIndex = new SPARQLDatatypePropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + objectPropertiesIndex = new SPARQLObjectPropertiesIndex((SPARQLPropertiesIndex)propertiesIndex); + } + } else { + datatypePropertiesIndex = propertiesIndex; + objectPropertiesIndex = propertiesIndex; + } } /* @@ -545,7 +558,7 @@ //add for each SYMPROPERTY Slot the reversed query for(Slot slot : sortedSlots){ for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ + if(slot.getSlotType() == SlotType.SYMPROPERTY || slot.getSlotType() == SlotType.OBJECTPROPERTY){ Query reversedQuery = new Query(wQ.getQuery()); reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); tmp.add(new WeightedQuery(reversedQuery)); Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-06-25 13:21:58 UTC (rev 3764) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-06-26 14:34:56 UTC (rev 3765) @@ -5,6 +5,7 @@ import java.util.List; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.BinaryRequestWriter; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; @@ -20,8 +21,11 @@ private static final int DEFAULT_LIMIT = 10; private static final int DEFAULT_OFFSET = 0; - private String searchField; + private String primarySearchField; + private String secondarySearchField; + private String sortField; + public SOLRIndex(String solrServerURL){ try { server = new CommonsHttpSolrServer(solrServerURL); @@ -31,10 +35,19 @@ } } - public void setSearchField(String searchField) { - this.searchField = searchField; + public void setSearchFields(String primarySearchField, String secondarySearchField){ + this.primarySearchField = primarySearchField; + this.secondarySearchField = secondarySearchField; } + public void setPrimarySearchField(String primarySearchField) { + this.primarySearchField = primarySearchField; + } + + public void setSecondarySearchField(String secondarySearchField) { + this.secondarySearchField = secondarySearchField; + } + @Override public List<String> getResources(String queryString) { return getResources(queryString, DEFAULT_LIMIT); @@ -81,9 +94,16 @@ QueryResponse response; try { - SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); + String solrString = queryString; + if(primarySearchField != null){ + solrString = primarySearchField + ":" + "\"" + queryString + "\"" + "^2 " + queryString; + } + SolrQuery query = new SolrQuery(solrString); query.setRows(limit); query.setStart(offset); + if(sortField != null){ + query.addSortField(sortField, ORDER.desc); + } query.addField("score"); response = server.query(query); SolrDocumentList docList = response.getResults(); @@ -102,5 +122,9 @@ } return rs; } + + public void setSortField(String sortField){ + this.sortField = sortField; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <lor...@us...> - 2012-07-15 17:50:53
|
Revision: 3792 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3792&view=rev Author: lorenz_b Date: 2012-07-15 17:50:46 +0000 (Sun, 15 Jul 2012) Log Message: ----------- Added more disambiguation. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/UnknownPropertyHelper.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-15 17:49:04 UTC (rev 3791) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java 2012-07-15 17:50:46 UTC (rev 3792) @@ -24,7 +24,6 @@ import java.util.concurrent.Future; import org.apache.log4j.Logger; -import org.dllearner.algorithm.tbsl.ltag.parser.GrammarFilter; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; @@ -38,7 +37,6 @@ import org.dllearner.algorithm.tbsl.sparql.SPARQL_PairType; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Property; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Term; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Value; import org.dllearner.algorithm.tbsl.sparql.Slot; @@ -50,6 +48,8 @@ import org.dllearner.algorithm.tbsl.util.PopularityMap; import org.dllearner.algorithm.tbsl.util.PopularityMap.EntityType; import org.dllearner.algorithm.tbsl.util.Similarity; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper; +import org.dllearner.algorithm.tbsl.util.UnknownPropertyHelper.SymPropertyDirection; import org.dllearner.common.index.Index; import org.dllearner.common.index.IndexResultItem; import org.dllearner.common.index.IndexResultSet; @@ -64,6 +64,11 @@ import org.dllearner.core.ComponentInitException; import org.dllearner.core.LearningProblem; import org.dllearner.core.SparqlQueryLearningAlgorithm; +import org.dllearner.core.owl.Description; +import org.dllearner.core.owl.Intersection; +import org.dllearner.core.owl.NamedClass; +import org.dllearner.core.owl.ObjectProperty; +import org.dllearner.core.owl.Thing; import org.dllearner.kb.SparqlEndpointKS; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; @@ -71,6 +76,7 @@ import org.dllearner.reasoning.SPARQLReasoner; import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; +import org.semanticweb.HermiT.Configuration.DirectBlockingType; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; @@ -78,6 +84,7 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.shared.UnknownPropertyException; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; @@ -85,10 +92,12 @@ public class SPARQLTemplateBasedLearner2 implements SparqlQueryLearningAlgorithm{ - enum Ranking{ - LUCENE, SIMILARITY, NONE + + enum Mode{ + BEST_QUERY, BEST_NON_EMPTY_QUERY } + private Mode mode = Mode.BEST_QUERY; private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner2.class); private Monitor templateMon = MonitorFactory.getTimeMonitor("template"); @@ -123,12 +132,12 @@ private String question; private int learnedPos = -1; - private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; - private Collection<Query> sparqlQueryCandidates; private Map<Template, Collection<? extends Query>> template2Queries; private Map<Slot, List<String>> slot2URI; + private Collection<WeightedQuery> sparqlQueryCandidates; + private SortedSet<WeightedQuery> learnedSPARQLQueries; private SortedSet<WeightedQuery> generatedQueries; private SPARQLReasoner reasoner; @@ -217,6 +226,7 @@ datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint), cache); } public SPARQLTemplateBasedLearner2(Model model, Index resourcesIndex, Index classesIndex, Index propertiesIndex){ @@ -292,6 +302,7 @@ datatypePropertiesIndex = propertiesIndex; objectPropertiesIndex = propertiesIndex; } + reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); } /* @@ -349,7 +360,7 @@ } private void reset(){ - learnedSPARQLQueries = new HashMap<String, Object>(); + learnedSPARQLQueries = new TreeSet<WeightedQuery>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); relevantKeywords = new HashSet<String>(); @@ -383,24 +394,36 @@ //get the weighted query candidates generatedQueries = getWeightedSPARQLQueries(templates); - sparqlQueryCandidates = new ArrayList<Query>(); + sparqlQueryCandidates = new ArrayList<WeightedQuery>(); int i = 0; for(WeightedQuery wQ : generatedQueries){ System.out.println(wQ.explain()); - sparqlQueryCandidates.add(wQ.getQuery()); + sparqlQueryCandidates.add(wQ); if(i == maxTestedQueries){ break; } i++; } - //test candidates - if(useRemoteEndpointValidation){ //on remote endpoint - validateAgainstRemoteEndpoint(sparqlQueryCandidates); - } else {//on local model - + if(mode == Mode.BEST_QUERY){ + double bestScore = -1; + for(WeightedQuery candidate : generatedQueries){ + double score = candidate.getScore(); + if(score >= bestScore){ + bestScore = score; + learnedSPARQLQueries.add(candidate); + } else { + break; + } + } + } else if(mode == Mode.BEST_NON_EMPTY_QUERY){ + //test candidates + if(useRemoteEndpointValidation){ //on remote endpoint + validateAgainstRemoteEndpoint(sparqlQueryCandidates); + } else {//on local model + + } } - } public SortedSet<WeightedQuery> getGeneratedQueries() { @@ -425,8 +448,8 @@ public List<String> getGeneratedSPARQLQueries(){ List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); + for(WeightedQuery wQ : sparqlQueryCandidates){ + queries.add(wQ.getQuery().toString()); } return queries; @@ -595,7 +618,7 @@ for(WeightedQuery query : queries){ Query q = new Query(query.getQuery()); - boolean drop = false;/* + boolean drop = false; if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ String objectVar = triple.getValue().getName(); @@ -603,34 +626,34 @@ // System.out.println(triple); for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ // System.out.println(typeTriple); - if(isObjectProperty(a.getUri())){ - Set<String> ranges = getRanges(a.getUri()); + if(true){//reasoner.isObjectProperty(a.getUri())){ + Description range = reasoner.getRange(new ObjectProperty(a.getUri())); // System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); + if(range != null){ + Set<Description> allRanges = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(range instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(range); + allRanges.addAll(superClasses); + } else { + for(Description nc : range.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allRanges.addAll(superClasses); + } } - allRanges.addAll(ranges); - allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); + allRanges.add(range); + allRanges.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeURI); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// } + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ drop = true; -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Software>")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// System.out.println("DROPPING: \n" + q.toString()); -// } - } else { - - } + } } } else { drop = true; @@ -638,34 +661,40 @@ } for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); + Description domain = reasoner.getDomain(new ObjectProperty(a.getUri())); // System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); + if(domain != null){ + Set<Description> allDomains = new HashSet<Description>(); + SortedSet<Description> superClasses; + if(domain instanceof NamedClass){ + superClasses = reasoner.getSuperClasses(domain); + allDomains.addAll(superClasses); + } else { + for(Description nc : domain.getChildren()){ + superClasses = reasoner.getSuperClasses(nc); + allDomains.addAll(superClasses); + } } - allDomains.addAll(domains); - allDomains.remove("http://www.w3.org/2002/07/owl#Thing"); + allDomains.add(domain); + allDomains.remove(new NamedClass(Thing.instance.getURI())); + + Set<Description> allTypes = new HashSet<Description>(); String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("DOMAINS: " + allDomains); -// System.out.println("TYPES: " + allTypes); -// } + Description type = new NamedClass(typeURI); + superClasses = reasoner.getSuperClasses(type); + allTypes.addAll(superClasses); + allTypes.add(type); if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ drop = true; -// System.out.println("DROPPING: \n" + q.toString()); +// System.err.println("DROPPING: \n" + q.toString()); } else { } } } } - }*/ + } if(!drop){ if(slot.getSlotType() == SlotType.RESOURCE){//avoid queries where predicate is data property and object resource->add REGEX filter in this case @@ -732,7 +761,40 @@ } - } + } else { + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + for(WeightedQuery wQ : queries){ + List<SPARQL_Triple> triples = wQ.getQuery().getTriplesWithVar(slot.getAnchor()); + for(SPARQL_Triple triple : triples){ + String typeVar; + String resourceURI; + SymPropertyDirection direction; + if(triple.getValue().isVariable()){ + direction = SymPropertyDirection.VAR_RIGHT; + typeVar = triple.getValue().getName(); + resourceURI = triple.getVariable().getName(); + } else { + direction = SymPropertyDirection.VAR_LEFT; + typeVar = triple.getVariable().getName(); + resourceURI = triple.getValue().getName(); + } + resourceURI = resourceURI.replace("<", "").replace(">", ""); + List<SPARQL_Triple> typeTriples = wQ.getQuery().getRDFTypeTriples(typeVar); + for(SPARQL_Triple typeTriple : typeTriples){ + String typeURI = typeTriple.getValue().getName().replace("<", "").replace(">", ""); + System.out.println(typeURI + "---" + resourceURI); + List<Entry<String, Integer>> mostFrequentProperties = UnknownPropertyHelper.getMostFrequentProperties(endpoint, cache, typeURI, resourceURI, direction); + for(Entry<String, Integer> property : mostFrequentProperties){ + System.out.println(property); + wQ.getQuery().replaceVarWithURI(slot.getAnchor(), property.getKey()); + wQ.setScore(wQ.getScore() + 0.1); + } + } + + } + } + } + } // else if(slot.getSlotType() == SlotType.CLASS){ // String token = slot.getWords().get(0); // if(slot.getToken().contains("house")){ @@ -913,31 +975,23 @@ return index; } - private void validateAgainstRemoteEndpoint(Collection<? extends Query> queries){ - List<String> queryStrings = new ArrayList<String>(); - SPARQL_QueryType queryType = SPARQL_QueryType.SELECT; - for(Query query : queries){ - if(query.getQt() == SPARQL_QueryType.ASK){ - queryType = SPARQL_QueryType.ASK; - } else if(query.getQt() == SPARQL_QueryType.SELECT){ - queryType = SPARQL_QueryType.SELECT; - } - queryStrings.add(query.toString()); - } - validate(queryStrings, queryType); + private void validateAgainstRemoteEndpoint(Collection<WeightedQuery> queries){ + SPARQL_QueryType queryType = queries.iterator().next().getQuery().getQt(); + validate(queries, queryType); } - private void validate(List<String> queries, SPARQL_QueryType queryType){ + private void validate(Collection<WeightedQuery> queries, SPARQL_QueryType queryType){ logger.info("Testing candidate SPARQL queries on remote endpoint..."); sparqlMon.start(); if(queryType == SPARQL_QueryType.SELECT){ - for(String query : queries){ + for(WeightedQuery query : queries){ + learnedPos++; List<String> results; try { logger.info("Testing query:\n" + query); - com.hp.hpl.jena.query.Query q = QueryFactory.create(query, Syntax.syntaxARQ); + com.hp.hpl.jena.query.Query q = QueryFactory.create(query.getQuery().toString(), Syntax.syntaxARQ); q.setLimit(1); - ResultSet rs = executeSelect(q.toString());//executeSelect(query); + ResultSet rs = executeSelect(q.toString()); results = new ArrayList<String>(); QuerySolution qs; @@ -955,15 +1009,14 @@ if(!results.isEmpty()){ try{ int cnt = Integer.parseInt(results.get(0)); - if(cnt > 0){learnedPos = queries.indexOf(query); - learnedSPARQLQueries.put(query, results); + if(cnt > 0){ + learnedSPARQLQueries.add(query); if(stopIfQueryResultNotEmpty){ return; } } } catch (NumberFormatException e){ - learnedSPARQLQueries.put(query, results); - learnedPos = queries.indexOf(query); + learnedSPARQLQueries.add(query); if(stopIfQueryResultNotEmpty){ return; } @@ -976,14 +1029,15 @@ } } else if(queryType == SPARQL_QueryType.ASK){ - for(String query : queries){ + for(WeightedQuery query : queries){ + learnedPos++; logger.info("Testing query:\n" + query); - boolean result = executeAskQuery(query); - learnedSPARQLQueries.put(query, result); + boolean result = executeAskQuery(query.getQuery().toString()); + learnedSPARQLQueries.add(query); // if(stopIfQueryResultNotEmpty && result){ // return; // } - if(stopIfQueryResultNotEmpty){learnedPos = queries.indexOf(query); + if(stopIfQueryResultNotEmpty){ return; } logger.info("Result: " + result); @@ -1040,19 +1094,25 @@ @Override public List<String> getCurrentlyBestSPARQLQueries(int nrOfSPARQLQueries) { - return new ArrayList<String>(learnedSPARQLQueries.keySet()); + List<String> bestQueries = new ArrayList<String>(); + for(WeightedQuery wQ : learnedSPARQLQueries){ + bestQueries.add(wQ.getQuery().toString()); + } + return bestQueries; } @Override public String getBestSPARQLQuery() { if(!learnedSPARQLQueries.isEmpty()){ - return learnedSPARQLQueries.keySet().iterator().next(); + return learnedSPARQLQueries.iterator().next().getQuery().toString(); } else { return null; } } - + public SortedSet<WeightedQuery> getLearnedSPARQLQueries() { + return learnedSPARQLQueries; + } @Override public LearningProblem getLearningProblem() { @@ -1107,7 +1167,7 @@ //use the non manual indexes only if mapping based resultset is not empty and option is set if(!useManualMappingsIfExistOnly || rs.isEmpty()){ if(slot.getSlotType() == SlotType.RESOURCE){ - rs.add(index.getResourcesWithScores(word, 50)); + rs.add(index.getResourcesWithScores(word, 20)); } else { if(slot.getSlotType() == SlotType.CLASS){ word = PlingStemmer.stem(word); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/UnknownPropertyHelper.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/UnknownPropertyHelper.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/UnknownPropertyHelper.java 2012-07-15 17:50:46 UTC (rev 3792) @@ -0,0 +1,102 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.collections.keyvalue.DefaultMapEntry; +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; +import org.dllearner.utilities.MapUtils; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; + +public class UnknownPropertyHelper { + + public enum SymPropertyDirection { + VAR_LEFT, VAR_RIGHT, UNKNOWN + } + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + + public UnknownPropertyHelper(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + } + + public static void getPopularity(SparqlEndpoint endpoint, ExtractionDBCache cache, String type, String resource){ + String query = String.format("SELECT ?p COUNT(?x) WHERE {?x a <%s>. <%s> ?p ?x.} GROUP BY ?p", type, resource); + System.out.println(query); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + while(rs.hasNext()){ + System.out.println(rs.next()); + } + + query = String.format("SELECT ?p COUNT(?x) WHERE {?x a <%s>. ?x ?p <%s>.} GROUP BY ?p", type, resource); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + while(rs.hasNext()){ + System.out.println(rs.next()); + } + } + + + public static List<Entry<String, Integer>> getMostFrequentProperties(SparqlEndpoint endpoint, ExtractionDBCache cache, String type, String resource, SymPropertyDirection direction){ + Map<String, Integer> property2Frequency = new HashMap<String, Integer>(); + String query; + ResultSet rs; + if(direction == SymPropertyDirection.VAR_LEFT){ + query = String.format("SELECT ?p (COUNT(?x) AS ?cnt) WHERE {?x a <%s>. ?x ?p <%s>.} GROUP BY ?p ORDER BY DESC(?cnt)", type, resource); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String propertyURI = qs.getResource("p").getURI(); + int cnt = qs.getLiteral("cnt").getInt(); + property2Frequency.put(propertyURI, cnt); + } + } else if(direction == SymPropertyDirection.VAR_RIGHT){ + query = String.format("SELECT ?p (COUNT(?x) AS ?cnt) WHERE {?x a <%s>. <%s> ?p ?x.} GROUP BY ?p ORDER BY DESC(?cnt)", type, resource); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + String propertyURI = qs.getResource("p").getURI(); + int cnt = qs.getLiteral("cnt").getInt(); + property2Frequency.put(propertyURI, cnt); + } + } else if(direction == SymPropertyDirection.UNKNOWN){ + + } + List<Entry<String, Integer>> sortedProperty2Frequency = MapUtils.sortByValues(property2Frequency); + return sortedProperty2Frequency; + } + + public static SymPropertyDirection getDirection(SparqlEndpoint endpoint, ExtractionDBCache cache, String typeURI, String propertyURI){ + String query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>. ?x <%s> ?o.}", typeURI, propertyURI); + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classLeftCnt = 0; + while(rs.hasNext()){ + classLeftCnt = rs.next().getLiteral("cnt").getInt(); + } + + query = String.format("SELECT (COUNT(?x) AS ?cnt) WHERE {?x a <%s>. ?o <%s> ?x.}", typeURI, propertyURI); + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + int classRightCnt = 0; + while(rs.hasNext()){ + classRightCnt = rs.next().getLiteral("cnt").getInt(); + } + if(classLeftCnt > classRightCnt){ + return SymPropertyDirection.VAR_LEFT; + } else if(classRightCnt > classLeftCnt){ + return SymPropertyDirection.VAR_RIGHT; + } else { + return SymPropertyDirection.UNKNOWN; + } + } + +} Modified: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-07-15 17:49:04 UTC (rev 3791) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-07-15 17:50:46 UTC (rev 3792) @@ -26,6 +26,8 @@ private String sortField; + private boolean restrictiveSearch = true; + public SOLRIndex(String solrServerURL){ try { server = new CommonsHttpSolrServer(solrServerURL); @@ -96,8 +98,26 @@ try { String solrString = queryString; if(primarySearchField != null){ - solrString = primarySearchField + ":" + "\"" + queryString + "\"" + "^2 " + queryString; + solrString = primarySearchField + ":" + "\"" + queryString + "\"" + "^2 "; + if(restrictiveSearch){ + String[] tokens = queryString.split(" "); + if(tokens.length > 1){ + solrString += " OR ("; + for(int i = 0; i < tokens.length; i++){ + String token = tokens[i]; + solrString += primarySearchField + ":" + token; + if(i < tokens.length-1){ + solrString += " AND "; + } + } + solrString += ")"; + } + + } else { + solrString += queryString; + } } + System.out.println(solrString); SolrQuery query = new SolrQuery(solrString); query.setRows(limit); query.setStart(offset); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |