From: <lor...@us...> - 2012-05-14 13:24:23
|
Revision: 3712 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3712&view=rev Author: lorenz_b Date: 2012-05-14 13:24:12 +0000 (Mon, 14 May 2012) Log Message: ----------- Started integration of classes for non-SOLR indexes to be more flexible in QTL and TBSL. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/ trunk/components-ext/src/main/java/org/dllearner/common/index/ trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/datastructures/impl/QueryTreeImpl.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -784,7 +784,7 @@ Literal l; while(iter.hasNext()){ l = iter.next(); - if(l.getDatatype() == XSDDatatype.XSDinteger){ + if(l.getDatatype() == XSDDatatype.XSDinteger || l.getDatatype() == XSDDatatype.XSDint){ min = (l.getInt() < min.getInt()) ? l : min; } else if(l.getDatatype() == XSDDatatype.XSDdouble){ min = (l.getDouble() < min.getDouble()) ? l : min; @@ -801,7 +801,7 @@ Literal l; while(iter.hasNext()){ l = iter.next(); - if(l.getDatatype() == XSDDatatype.XSDinteger){ + if(l.getDatatype() == XSDDatatype.XSDinteger || l.getDatatype() == XSDDatatype.XSDint){ max = (l.getInt() > max.getInt()) ? l : max; } else if(l.getDatatype() == XSDDatatype.XSDdouble){ max = (l.getDouble() > max.getDouble()) ? l : max; Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/impl/QueryTreeFactoryImpl.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -245,7 +245,10 @@ // subTree = new QueryTreeImpl<String>(lit.toString()); subTree.setId(nodeId++); subTree.setLiteralNode(true); - if(lit.getDatatype() == XSDDatatype.XSDinteger || lit.getDatatype() == XSDDatatype.XSDdouble || lit.getDatatype() == XSDDatatype.XSDdate){ + if(lit.getDatatype() == XSDDatatype.XSDinteger + || lit.getDatatype() == XSDDatatype.XSDdouble + || lit.getDatatype() == XSDDatatype.XSDdate + || lit.getDatatype() == XSDDatatype.XSDint){ subTree.addLiteral(lit); } tree.addChild(subTree, st.getPredicate().toString()); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/operations/NBR.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -35,9 +35,11 @@ import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.sparql.expr.E_Equals; import com.hp.hpl.jena.sparql.expr.E_LogicalNot; import com.hp.hpl.jena.sparql.expr.ExprVar; @@ -62,6 +64,7 @@ private ExtractionDBCache selectCache; private SparqlEndpoint endpoint; + private Model model; private String query; private int limit; @@ -94,6 +97,12 @@ noSequences = new ArrayList<List<QueryTreeChange>>(); } + public NBR(Model model){ + this.model = model; + + noSequences = new ArrayList<List<QueryTreeChange>>(); + } + public void setMaxExecutionTimeInSeconds(int maxExecutionTimeInSeconds){ this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds; } @@ -1385,16 +1394,22 @@ } private ResultSet executeSelectQuery(String query){ - ExtendedQueryEngineHTTP queryExecution = new ExtendedQueryEngineHTTP(endpoint.getURL().toString(), query); - queryExecution.setTimeOut(maxExecutionTimeInSeconds * 1000); - for (String dgu : endpoint.getDefaultGraphURIs()) { - queryExecution.addDefaultGraph(dgu); - } - for (String ngu : endpoint.getNamedGraphURIs()) { - queryExecution.addNamedGraph(ngu); - } - ResultSet resultset = queryExecution.execSelect(); - return resultset; + ResultSet rs; + if(model == null){ + ExtendedQueryEngineHTTP queryExecution = new ExtendedQueryEngineHTTP(endpoint.getURL().toString(), query); + queryExecution.setTimeOut(maxExecutionTimeInSeconds * 1000); + for (String dgu : endpoint.getDefaultGraphURIs()) { + queryExecution.addDefaultGraph(dgu); + } + for (String ngu : endpoint.getNamedGraphURIs()) { + queryExecution.addNamedGraph(ngu); + } + rs = queryExecution.execSelect(); + } else { + rs = QueryExecutionFactory.create(query, model).execSelect(); + } + + return rs; } } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -11,6 +11,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -18,6 +19,10 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import org.apache.log4j.Logger; import org.dllearner.algorithm.qtl.util.ModelGenerator; @@ -37,7 +42,6 @@ import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Triple; import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; @@ -60,14 +64,36 @@ import org.ini4j.InvalidFileFormatException; import org.ini4j.Options; +import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.core.Var; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; +import com.hp.hpl.jena.sparql.syntax.Element; +import com.hp.hpl.jena.sparql.syntax.ElementAssign; +import com.hp.hpl.jena.sparql.syntax.ElementBind; +import com.hp.hpl.jena.sparql.syntax.ElementDataset; +import com.hp.hpl.jena.sparql.syntax.ElementExists; +import com.hp.hpl.jena.sparql.syntax.ElementFetch; +import com.hp.hpl.jena.sparql.syntax.ElementFilter; +import com.hp.hpl.jena.sparql.syntax.ElementGroup; +import com.hp.hpl.jena.sparql.syntax.ElementMinus; +import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph; +import com.hp.hpl.jena.sparql.syntax.ElementNotExists; +import com.hp.hpl.jena.sparql.syntax.ElementOptional; +import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; +import com.hp.hpl.jena.sparql.syntax.ElementService; +import com.hp.hpl.jena.sparql.syntax.ElementSubQuery; +import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; +import com.hp.hpl.jena.sparql.syntax.ElementUnion; +import com.hp.hpl.jena.sparql.syntax.ElementVisitor; import com.hp.hpl.jena.vocabulary.OWL; +import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; import com.jamonapi.Monitor; import com.jamonapi.MonitorFactory; @@ -667,10 +693,29 @@ logger.info("Processing template:\n" + t.toString()); allocations = new TreeSet<Allocation>(); - for(Slot slot : t.getSlots()){ + ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); + List<Future<SortedSet<Allocation>>> list = new ArrayList<Future<SortedSet<Allocation>>>(); + + for (Slot slot : t.getSlots()) { + Callable<SortedSet<Allocation>> worker = new SlotProcessor(slot); + Future<SortedSet<Allocation>> submit = executor.submit(worker); + list.add(submit); + } + +// for (Future<SortedSet<Allocation>> future : list) { +// try { +// future.get(); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } catch (ExecutionException e) { +// e.printStackTrace(); +// } +// } + + /*for(Slot slot : t.getSlots()){ allocations = slot2Allocations2.get(slot); if(allocations == null){ - allocations = computeAllocations(slot, 20); + allocations = computeAllocations(slot, 10); slot2Allocations2.put(slot, allocations); } slot2Allocations.put(slot, allocations); @@ -687,7 +732,7 @@ } } allocations.addAll(tmp); - } + }*/ Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); @@ -868,7 +913,7 @@ if(slot.getSlotType() == SlotType.RESOURCE){ rs = index.getResourcesWithScores(word, 250); } else { - rs = index.getResourcesWithScores(word, 30); + rs = index.getResourcesWithScores(word, 20); } @@ -1514,7 +1559,7 @@ * @throws InvalidFileFormatException */ public static void main(String[] args) throws NoTemplateFoundException, InvalidFileFormatException, FileNotFoundException, IOException { -// Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); + // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); // String question = "Who/WP was/VBD the/DT wife/NN of/IN president/NN Lincoln/NNP"; @@ -1578,7 +1623,22 @@ // TODO Auto-generated method stub } + + class SlotProcessor implements Callable<SortedSet<Allocation>>{ + + private Slot slot; + + public SlotProcessor(Slot slot) { + this.slot = slot; + } + @Override + public SortedSet<Allocation> call() throws Exception { + return computeAllocations(slot); + } + + } + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2012-05-14 04:54:18 UTC (rev 3711) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -165,7 +165,13 @@ lastTotalHits = (int) docList.getNumFound(); for(SolrDocument d : docList){ - items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), (Float) d.get("score"))); + float score = 0; + if(d.get("score") instanceof ArrayList){ + score = ((Float)((ArrayList)d.get("score")).get(1)); + } else { + score = (Float) d.get("score"); + } + items.add(new SolrQueryResultItem((String) d.get(labelField), (String) d.get("uri"), score)); } } catch (SolrServerException e) { e.printStackTrace(); @@ -183,4 +189,8 @@ this.hitsPerPage = hitsPerPage; } + public static void main(String[] args) { + new SolrSearch("http://139.18.2.173:8080/apache-solr-3.3.0/dbpedia_classes").getResources("Leipzig"); + } + } Added: trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/Index.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,9 @@ +package org.dllearner.common.index; + +import java.util.List; + +public interface Index { + List<String> getResources(String queryString); + List<String> getResources(String queryString, int limit); + List<String> getResources(String queryString, int limit, int offset); +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SOLRIndex.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,61 @@ +package org.dllearner.common.index; + +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.BinaryRequestWriter; +import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.ModifiableSolrParams; + +public class SOLRIndex implements Index{ + +private CommonsHttpSolrServer server; + + private static final int DEFAULT_LIMIT = 10; + private static final int DEFAULT_OFFSET = 0; + + public SOLRIndex(String solrServerURL){ + try { + server = new CommonsHttpSolrServer(solrServerURL); + server.setRequestWriter(new BinaryRequestWriter()); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + + @Override + public List<String> getResources(String queryString) { + return getResources(queryString, DEFAULT_LIMIT); + } + + @Override + public List<String> getResources(String queryString, int limit) { + return getResources(queryString, limit, DEFAULT_OFFSET); + } + + @Override + public List<String> getResources(String queryString, int limit, int offset) { + List<String> resources = new ArrayList<String>(); + QueryResponse response; + try { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("q", queryString); + params.set("rows", limit); + params.set("start", offset); + response = server.query(params); + SolrDocumentList docList = response.getResults(); + for(SolrDocument d : docList){ + resources.add((String) d.get("uri")); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return resources; + } + +} Added: trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/common/index/SPARQLIndex.java 2012-05-14 13:24:12 UTC (rev 3712) @@ -0,0 +1,90 @@ +package org.dllearner.common.index; + +import java.util.ArrayList; +import java.util.List; + +import org.dllearner.kb.sparql.ExtractionDBCache; +import org.dllearner.kb.sparql.SparqlEndpoint; +import org.dllearner.kb.sparql.SparqlQuery; + +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; + +public class SPARQLIndex implements Index{ + + private static final int DEFAULT_LIMIT = 10; + private static final int DEFAULT_OFFSET = 0; + + private SparqlEndpoint endpoint; + private ExtractionDBCache cache; + + private Model model; + + protected String queryTemplate = "SELECT DISTINCT(?uri) WHERE {\n" + + "?uri a ?type.\n" + + "?uri <http://www.w3.org/2000/01/rdf-schema#label> ?label\n" + + "FILTER(REGEX(STR(?label), '%s'))}\n" + + "LIMIT %d OFFSET %d"; + + + public SPARQLIndex(SparqlEndpoint endpoint) { + this(endpoint, null); + } + + public SPARQLIndex(Model model) { + this.model = model; + } + + public SPARQLIndex(SparqlEndpoint endpoint, ExtractionDBCache cache) { + this.endpoint = endpoint; + this.cache = cache; + } + + @Override + public List<String> getResources(String queryString) { + return getResources(queryString, DEFAULT_LIMIT); + } + + @Override + public List<String> getResources(String queryString, int limit) { + return getResources(queryString, limit, DEFAULT_OFFSET); + } + + @Override + public List<String> getResources(String queryString, int limit, int offset) { + List<String> resources = new ArrayList<String>(); + + String query = buildResourcesQuery(queryString, limit, offset); + + ResultSet rs; + if(model == null){ + if(cache == null){ + QueryEngineHTTP qe = new QueryEngineHTTP(endpoint.getURL().toString(), queryString); + qe.setDefaultGraphURIs(endpoint.getDefaultGraphURIs()); + rs = qe.execSelect(); + } else { + rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + } + } else { + rs = QueryExecutionFactory.create(queryString, model).execSelect(); + } + QuerySolution qs; + while(rs.hasNext()){ + qs = rs.next(); + RDFNode uriNode = qs.get("uri"); + if(uriNode.isURIResource()){ + resources.add(uriNode.asResource().getURI()); + } + } + return resources; + } + + protected String buildResourcesQuery(String searchTerm, int limit, int offset){ + return String.format(queryTemplate, searchTerm, limit, offset); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |