From: <lor...@us...> - 2012-07-02 11:58:36
|
Revision: 3769 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3769&view=rev Author: lorenz_b Date: 2012-07-02 11:58:25 +0000 (Mon, 02 Jul 2012) Log Message: ----------- Removed some unused classes and add option to filter predicates. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner2.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/LGGTest.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/Evaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/IndexEvaluation.java trunk/components-ext/src/test/java/org/dllearner/algorithm/tbsl/TBSLTest.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/common/index/ModelGenerator.java Removed Paths: ------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/test/java/org/dllearner/algorithm/qtl/ModelCreationTest.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/QTL.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -31,7 +31,6 @@ import org.apache.commons.collections15.ListUtils; import org.apache.log4j.Logger; -import org.dllearner.algorithm.qtl.cache.ModelCache; import org.dllearner.algorithm.qtl.cache.QueryTreeCache; import org.dllearner.algorithm.qtl.datastructures.QueryTree; import org.dllearner.algorithm.qtl.datastructures.impl.QueryTreeImpl; @@ -42,7 +41,6 @@ import org.dllearner.algorithm.qtl.operations.NBR; import org.dllearner.algorithm.qtl.operations.lgg.LGGGenerator; import org.dllearner.algorithm.qtl.operations.lgg.LGGGeneratorImpl; -import org.dllearner.algorithm.qtl.util.ModelGenerator; import org.dllearner.algorithm.qtl.util.SPARQLEndpointEx; import org.dllearner.core.AbstractComponent; import org.dllearner.core.AbstractLearningProblem; @@ -56,6 +54,9 @@ import org.dllearner.core.options.IntegerConfigOption; import org.dllearner.core.owl.Individual; import org.dllearner.kb.SparqlEndpointKS; +import org.dllearner.kb.sparql.CachingConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGenerator; +import org.dllearner.kb.sparql.ConciseBoundedDescriptionGeneratorImpl; import org.dllearner.kb.sparql.ExtractionDBCache; import org.dllearner.kb.sparql.SparqlEndpoint; import org.dllearner.kb.sparql.SparqlQuery; @@ -92,8 +93,6 @@ private ExtractionDBCache cache; private QueryTreeCache treeCache; - private ModelGenerator modelGen; - private ModelCache modelCache; private LGGGenerator<String> lggGenerator; private NBR<String> nbr; @@ -106,6 +105,8 @@ private QueryTreeFilter queryTreeFilter; + private ConciseBoundedDescriptionGenerator cbdGenerator; + private int maxExecutionTimeInSeconds = 60; private int maxQueryTreeDepth = 2; @@ -138,9 +139,8 @@ this.cache = cache; treeCache = new QueryTreeCache(); - modelGen = new ModelGenerator(endpoint, endpoint.getPredicateFilters(), cache); - modelCache = new ModelCache(modelGen); - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); nbr = new NBR<String>(endpoint, cache); @@ -208,7 +208,7 @@ public void setMaxQueryTreeDepth(int maxQueryTreeDepth){ this.maxQueryTreeDepth = maxQueryTreeDepth; - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); } public String getSPARQLQuery(){ @@ -218,6 +218,10 @@ return lgg.toSPARQLQueryString(); } + public void setRestrictToNamespaces(List<String> namespaces){ + cbdGenerator.setRestrictToNamespaces(namespaces); + } + private void generatePositiveExampleTrees(){ posExampleTrees.clear(); posExampleTrees.addAll(getQueryTrees(posExamples)); @@ -236,7 +240,7 @@ if(logger.isDebugEnabled()){ logger.debug("Tree for resource " + resource); } - model = modelCache.getModel(resource); + model = cbdGenerator.getConciseBoundedDescription(resource); tree = treeCache.getQueryTree(resource, model); if(logger.isDebugEnabled()){ logger.debug(tree.getStringRepresentation()); @@ -324,9 +328,8 @@ endpoint = endpointKS.getEndpoint(); treeCache = new QueryTreeCache(); - modelGen = new ModelGenerator(endpoint); - modelCache = new ModelCache(modelGen); - modelCache.setRecursionDepth(maxQueryTreeDepth); + cbdGenerator = new CachingConciseBoundedDescriptionGenerator(new ConciseBoundedDescriptionGeneratorImpl(endpoint, cache)); + cbdGenerator.setRecursionDepth(maxQueryTreeDepth); lggGenerator = new LGGGeneratorImpl<String>(); nbr = new NBR<String>(endpoint); Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/cache/ModelCache.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,46 +0,0 @@ -package org.dllearner.algorithm.qtl.cache; - -import java.util.HashMap; -import java.util.Map; - -import org.dllearner.algorithm.qtl.util.ModelGenerator; -import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; - -import com.hp.hpl.jena.rdf.model.Model; - -public class ModelCache { - - private Map<String, Model> cache; - private ModelGenerator modelGen; - - private int recursionDepth = 2; - - - public ModelCache(ModelGenerator modelGen){ - this.modelGen = modelGen; - - cache = new HashMap<String, Model>(); - } - - public Model getModel(String uri){ - Model model = cache.get(uri); - if(model == null){ - model = modelGen.createModel(uri, Strategy.CHUNKS, recursionDepth); - cache.put(uri, model); - } - return cache.get(uri); - } - - public void setRecursionDepth(int recursionDepth){ - this.recursionDepth = recursionDepth; - } - - public void clear(){ - cache.clear(); - } - - public void dispose(){ - cache = null; - } - -} Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/qtl/util/ModelGenerator.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,248 +0,0 @@ -package org.dllearner.algorithm.qtl.util; - -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; -import java.sql.SQLException; -import java.util.Collections; -import java.util.Iterator; -import java.util.Set; - -import org.apache.log4j.Logger; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; - -import com.hp.hpl.jena.query.Query; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -public class ModelGenerator { - - private static final Logger logger = Logger.getLogger(ModelGenerator.class); - private Monitor queryMonitor = MonitorFactory.getTimeMonitor("SPARQL Query monitor"); - - private SparqlEndpoint endpoint; - private int recursionDepth = 1; - - private static final int CHUNK_SIZE = 1000; - - private ExtractionDBCache cache; - - private Set<String> predicateFilters; - - public enum Strategy{ - INCREMENTALLY, - CHUNKS - } - - public ModelGenerator(SparqlEndpoint endpoint){ - this(endpoint, Collections.<String>emptySet(), null); - } - - public ModelGenerator(SparqlEndpoint endpoint, Set<String> predicateFilters){ - this(endpoint, predicateFilters, null); - } - - public ModelGenerator(SparqlEndpoint endpoint, Set<String> predicateFilters, ExtractionDBCache cache){ - this.endpoint = endpoint; - this.predicateFilters = predicateFilters; - this.cache = cache; - } - - public ModelGenerator(SparqlEndpoint endpoint, ExtractionDBCache cache){ - this(endpoint, Collections.<String>emptySet(), cache); - } - - public ModelGenerator(String endpointURL){ - try { - this.endpoint = new SparqlEndpoint(new URL(endpointURL)); - } catch (MalformedURLException e) { - e.printStackTrace(); - } - } - - public Model createModel(String resource, Strategy strategy, int recursionDepth){ - this.recursionDepth = recursionDepth; - if(strategy == Strategy.INCREMENTALLY){ - return getModelIncrementallyRec(resource, 0); - } else if(strategy == Strategy.CHUNKS){ - return getModelChunked(resource); - } - return ModelFactory.createDefaultModel(); - } - - public void setRecursionDepth(int recursionDepth){ - this.recursionDepth = recursionDepth; - } - - - /** - * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example with a specific recursion depth. - * @param example The example resource for which a CONSTRUCT query is created. - * @return The JENA ARQ Query object. - */ - private String makeConstructQueryOptional(String resource, int limit, int offset, Set<String> predicateFilter){ - StringBuilder sb = new StringBuilder(); - sb.append("CONSTRUCT {\n"); - sb.append("<").append(resource).append("> ").append("?p0 ").append("?o0").append(".\n"); - for(int i = 1; i < recursionDepth; i++){ - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); - } - sb.append("}\n"); - sb.append("WHERE {\n"); - sb.append("<").append(resource).append("> ").append("?p0 ").append("?o0").append(".\n"); - for(int i = 1; i < recursionDepth; i++){ - sb.append("OPTIONAL{\n"); - sb.append("?o").append(i-1).append(" ").append("?p").append(i).append(" ").append("?o").append(i).append(".\n"); - } - for(int i = 1; i < recursionDepth; i++){ - sb.append("}"); - } - - - for(int i = 0; i < recursionDepth; i++){ - for(String predicate : predicateFilter){ - sb.append("FILTER (!REGEX (?p").append(i).append(", \"").append(predicate).append("\"))"); - } - - } - - sb.append("}\n"); -// sb.append("ORDER BY "); -// for(int i = 0; i < recursionDepth; i++){ -// sb.append("?p").append(i).append(" ").append("?o").append(i).append(" "); -// } -// sb.append("\n"); - sb.append("LIMIT ").append(limit).append("\n"); - sb.append("OFFSET ").append(offset); - - Query query = QueryFactory.create(sb.toString()); - - return sb.toString(); - } - - - /** - * A SPARQL CONSTRUCT query is created, to get a RDF graph for the given example. - * @param example The example resource for which a CONSTRUCT query is created. - * @return The JENA ARQ Query object. - */ - private String makeConstructQuery(String example, Set<String> predicateFilters){ - - StringBuilder sb = new StringBuilder(); - sb.append("CONSTRUCT {\n"); - sb.append("<").append(example).append("> ").append("?p ").append("?o").append(".\n"); - sb.append("}\n"); - sb.append("WHERE {\n"); - sb.append("<").append(example).append("> ").append("?p ").append("?o").append(".\n"); - - for(String predicate : predicateFilters){ - sb.append("FILTER (!REGEX (?p, \"").append(predicate).append("\"))"); - } - - sb.append("}\n"); - Query query = QueryFactory.create(sb.toString()); - - return sb.toString(); - } - - - - private Model getModelChunked(String resource){ -// logger.debug("Resource: " + resource); - String query = makeConstructQueryOptional(resource, CHUNK_SIZE, 0, predicateFilters); -// logger.debug("Sending SPARQL query ..."); -// logger.debug("Query:\n" + query.toString()); - queryMonitor.start(); - Model all = ModelFactory.createDefaultModel(); - try { - Model model; - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } -// logger.debug("Got " + model.size() + " new triple in " + queryMonitor.getLastValue() + "ms."); - all.add(model); - queryMonitor.stop(); - int i = 1; - while(model.size() != 0){ - query = makeConstructQueryOptional(resource, CHUNK_SIZE, i * CHUNK_SIZE, predicateFilters); -// logger.debug("Sending SPARQL query ..."); -// logger.debug("Query:\n" + query.toString()); - queryMonitor.start(); - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } - queryMonitor.stop(); -// logger.debug("Got " + model.size() + " new triple in " + queryMonitor.getLastValue() + "ms."); - all.add(model); - i++; - } - } catch (UnsupportedEncodingException e) { - logger.error(e); - } catch (SQLException e) { - logger.error(e); - } - return all; - } - - private Model getModelIncrementallyRec(String resource, int depth){ - logger.debug("Resource: " + resource); - String query = makeConstructQuery(resource, predicateFilters); - logger.debug("Sending SPARQL query ..."); - logger.debug("Query:\n" + query); - queryMonitor.start(); - Model model = null; - try { - if(cache == null){ - model = getModel(query); - } else { - model = cache.executeConstructQuery(endpoint, query); - } - } catch (UnsupportedEncodingException e) { - logger.error(e); - } catch (SQLException e) { - logger.error(e); - } - queryMonitor.stop(); - logger.debug("Got " + model.size() + " new triples in " + queryMonitor.getLastValue() + "ms:"); - Statement st = null; - for(Iterator<Statement> i = model.listStatements();i.hasNext(); st = i.next()){ - logger.debug(st); - } - if(depth < recursionDepth){ - Model tmp = ModelFactory.createDefaultModel(); - for(Iterator<Statement> i = model.listStatements(); i.hasNext();){ - st = i.next(); - if(st.getObject().isURIResource()){ - tmp.add(getModelIncrementallyRec(st.getObject().toString(), depth + 1)); - } - } - model.add(tmp); - } - - return model; - } - - private Model getModel(String query){ - QueryEngineHTTP queryExecution = new QueryEngineHTTP(endpoint.getURL().toString(), query); - for (String dgu : endpoint.getDefaultGraphURIs()) { - queryExecution.addDefaultGraph(dgu); - } - for (String ngu : endpoint.getNamedGraphURIs()) { - queryExecution.addNamedGraph(ngu); - } - Model model = queryExecution.execConstruct(); - return model; - } - - -} Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/cli/CLI.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -7,11 +7,11 @@ import java.net.URL; import java.util.Collections; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; import org.dllearner.algorithm.tbsl.learning.NoTemplateFoundException; -import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner; -import org.dllearner.algorithm.tbsl.templator.Templator; +import org.dllearner.algorithm.tbsl.learning.SPARQLTemplateBasedLearner2; +import org.dllearner.algorithm.tbsl.util.Knowledgebase; +import org.dllearner.common.index.Index; +import org.dllearner.common.index.SOLRIndex; import org.dllearner.kb.sparql.SparqlEndpoint; import org.ini4j.InvalidFileFormatException; @@ -21,11 +21,18 @@ public static void main(String[] args) throws InvalidFileFormatException, FileNotFoundException, IOException { // Logger.getLogger(SPARQLTemplateBasedLearner.class).setLevel(Level.OFF); + SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), Collections.singletonList("http://dbpedia.org"), Collections.<String>emptyList()); + + SOLRIndex resourcesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_resources"); + resourcesIndex.setPrimarySearchField("label"); +// resourcesIndex.setSortField("pagerank"); + Index classesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_classes"); + Index propertiesIndex = new SOLRIndex("http://dbpedia.aksw.org:8080/solr/dbpedia_properties"); + + + Knowledgebase kb = new Knowledgebase(endpoint, "DBpedia Live", "TODO", resourcesIndex, propertiesIndex, classesIndex, null); + SPARQLTemplateBasedLearner2 learner = new SPARQLTemplateBasedLearner2(kb); - SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); - SparqlEndpoint endpoint = new SparqlEndpoint(new URL("http://live.dbpedia.org/sparql"), - Collections.<String>singletonList(""), Collections.<String>emptyList()); - System.out.println("======= TBSL v0.1 ============="); System.out.println("\nType ':q' to quit."); Deleted: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-07-02 11:47:59 UTC (rev 3768) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2012-07-02 11:58:25 UTC (rev 3769) @@ -1,1644 +0,0 @@ -package org.dllearner.algorithm.tbsl.learning; - -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import org.apache.log4j.Logger; -import org.dllearner.algorithm.qtl.util.ModelGenerator; -import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; -import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; -import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; -import org.dllearner.algorithm.tbsl.nlp.PartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.StanfordPartOfSpeechTagger; -import org.dllearner.algorithm.tbsl.nlp.WordNet; -import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; -import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; -import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; -import org.dllearner.algorithm.tbsl.search.SolrSearch; -import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; -import org.dllearner.algorithm.tbsl.sparql.Allocation; -import org.dllearner.algorithm.tbsl.sparql.Query; -import org.dllearner.algorithm.tbsl.sparql.RatedQuery; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; -import org.dllearner.algorithm.tbsl.sparql.SPARQL_QueryType; -import org.dllearner.algorithm.tbsl.sparql.Slot; -import org.dllearner.algorithm.tbsl.sparql.SlotType; -import org.dllearner.algorithm.tbsl.sparql.Template; -import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; -import org.dllearner.algorithm.tbsl.templator.Templator; -import org.dllearner.algorithm.tbsl.util.Prefixes; -import org.dllearner.algorithm.tbsl.util.Similarity; -import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; -import org.dllearner.core.ComponentInitException; -import org.dllearner.core.LearningProblem; -import org.dllearner.core.Oracle; -import org.dllearner.core.SparqlQueryLearningAlgorithm; -import org.dllearner.core.owl.Description; -import org.dllearner.core.owl.NamedClass; -import org.dllearner.kb.SparqlEndpointKS; -import org.dllearner.kb.sparql.ExtractionDBCache; -import org.dllearner.kb.sparql.SparqlEndpoint; -import org.dllearner.kb.sparql.SparqlQuery; -import org.dllearner.reasoning.SPARQLReasoner; -import org.ini4j.InvalidFileFormatException; -import org.ini4j.Options; - -import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.query.QueryExecution; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; -import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.sparql.core.Var; -import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; -import com.hp.hpl.jena.sparql.syntax.Element; -import com.hp.hpl.jena.sparql.syntax.ElementAssign; -import com.hp.hpl.jena.sparql.syntax.ElementBind; -import com.hp.hpl.jena.sparql.syntax.ElementDataset; -import com.hp.hpl.jena.sparql.syntax.ElementExists; -import com.hp.hpl.jena.sparql.syntax.ElementFetch; -import com.hp.hpl.jena.sparql.syntax.ElementFilter; -import com.hp.hpl.jena.sparql.syntax.ElementGroup; -import com.hp.hpl.jena.sparql.syntax.ElementMinus; -import com.hp.hpl.jena.sparql.syntax.ElementNamedGraph; -import com.hp.hpl.jena.sparql.syntax.ElementNotExists; -import com.hp.hpl.jena.sparql.syntax.ElementOptional; -import com.hp.hpl.jena.sparql.syntax.ElementPathBlock; -import com.hp.hpl.jena.sparql.syntax.ElementService; -import com.hp.hpl.jena.sparql.syntax.ElementSubQuery; -import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock; -import com.hp.hpl.jena.sparql.syntax.ElementUnion; -import com.hp.hpl.jena.sparql.syntax.ElementVisitor; -import com.hp.hpl.jena.vocabulary.OWL; -import com.hp.hpl.jena.vocabulary.RDF; -import com.hp.hpl.jena.vocabulary.RDFS; -import com.jamonapi.Monitor; -import com.jamonapi.MonitorFactory; - -public class SPARQLTemplateBasedLearner implements SparqlQueryLearningAlgorithm{ - - //for debugging - List<String> exclusions = Arrays.asList(new String[]{"http://dbpedia.org/ontology/GeopoliticalOrganisation", - "http://dbpedia.org/ontology/Non-ProfitOrganisation"}); - - enum Ranking{ - LUCENE, SIMILARITY, NONE - } - - private static final String OPTIONS_FILE = SPARQLTemplateBasedLearner.class.getClassLoader().getResource("tbsl/tbsl.properties").getPath(); - - private static final Logger logger = Logger.getLogger(SPARQLTemplateBasedLearner.class); - private Monitor mon = MonitorFactory.getTimeMonitor("tbsl"); - - private static final int RECURSION_DEPTH = 2; - private static final int MAX_URIS_PER_SLOT = 10; - - private Ranking ranking; - private boolean useRemoteEndpointValidation; - private boolean stopIfQueryResultNotEmpty; - private int maxTestedQueriesPerTemplate = 50; - private int maxQueryExecutionTimeInSeconds; - - private int maxTestedQueries = 200; - - private SparqlEndpoint endpoint = SparqlEndpoint.getEndpointDBpediaLiveAKSW(); - private ExtractionDBCache cache = new ExtractionDBCache("cache"); - - private SolrSearch resource_index; - private SolrSearch class_index; - private SolrSearch property_index; - private SolrSearch boa_pattern_property_index; - private ModelGenerator modelGenenerator; - private Templator templateGenerator; - - private String question; - private int learnedPos = -1; - - private Oracle oracle; - - private Map<String, SolrQueryResultSet> resourcesURICache; - private Map<String, SolrQueryResultSet> classesURICache; - private Map<String, SolrQueryResultSet> propertiesURICache; - - private Map<String, Object> learnedSPARQLQueries; - private Set<Template> templates; - private Collection<Query> sparqlQueryCandidates; - private Map<Template, Collection<? extends Query>> template2Queries; - private Map<Slot, List<String>> slot2URI; - - private Set<WeightedQuery> generatedQueries; - - private Map<String, String> prefixMap; - - private Lemmatizer lemmatizer = new LingPipeLemmatizer();// StanfordLemmatizer(); - - private SPARQLReasoner reasoner; - - public SPARQLTemplateBasedLearner() throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(OPTIONS_FILE); - } - - public SPARQLTemplateBasedLearner(String optionsFile) throws InvalidFileFormatException, FileNotFoundException, IOException{ - this(new Options(new FileInputStream(optionsFile))); - } - - public SPARQLTemplateBasedLearner(Options options){ - this(options, new StanfordPartOfSpeechTagger()); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger){ - this(options, tagger, new WordNet()); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger, WordNet wordNet){ - this(options, tagger, wordNet, "cache"); - } - - public SPARQLTemplateBasedLearner(Options options, PartOfSpeechTagger tagger, WordNet wordNet, String cacheDir){ - init(options); - - Set<String> predicateFilters = new HashSet<String>(); - predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); - predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - - prefixMap = Prefixes.getPrefixes(); - - modelGenenerator = new ModelGenerator(endpoint, predicateFilters); - - templateGenerator = new Templator(tagger, wordNet); - cache = new ExtractionDBCache(cacheDir); - } - - /* - * Only for Evaluation useful. - */ - public void setUseIdealTagger(boolean value){ - templateGenerator.setUNTAGGED_INPUT(!value); - } - - private void init(Options options){ - String resourcesIndexUrl = options.fetch("solr.resources.url"); - String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); - resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, "label", 1.0, 0.1); - - String classesIndexUrl = options.fetch("solr.classes.url"); - String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - SolrSearch dbpediaClassIndex = new SolrSearch(classesIndexUrl, classesIndexSearchField, "label"); - - String yagoClassesIndexUrl = options.fetch("solr.yago.classes.url"); - String yagoClassesIndexSearchField = options.fetch("solr.yago.classes.searchfield"); - SolrSearch yagoClassIndex = new SolrSearch(yagoClassesIndexUrl, yagoClassesIndexSearchField); - - class_index = new ThresholdSlidingSolrSearch(dbpediaClassIndex);// new HierarchicalSolrSearch(dbpediaClassIndex, yagoClassIndex); - - String propertiesIndexUrl = options.fetch("solr.properties.url"); - String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - SolrSearch labelBasedPropertyIndex = new ThresholdSlidingSolrSearch(propertiesIndexUrl, propertiesIndexSearchField, "label", 1.0, 0.1); - - String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); - String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); - SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField, "nlr-no-var"); - - //first BOA pattern then label based -// property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); - - //first label based then BOA pattern - property_index = new HierarchicalSolrSearch(labelBasedPropertyIndex, patternBasedPropertyIndex); - - int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); - - maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); - cache.setMaxExecutionTimeInSeconds(maxQueryExecutionTimeInSeconds); - - ranking = Ranking.valueOf(options.get("learning.ranking", "similarity").toUpperCase()); - useRemoteEndpointValidation = options.get("learning.validationType", "remote").equals("remote") ? true : false; - stopIfQueryResultNotEmpty = Boolean.parseBoolean(options.get("learning.stopAfterFirstNonEmptyQueryResult", "true")); - maxTestedQueriesPerTemplate = Integer.parseInt(options.get("learning.maxTestedQueriesPerTemplate", "20")); - - String wordnetPath = options.get("wordnet.dictionary", "tbsl/dict"); - wordnetPath = this.getClass().getClassLoader().getResource(wordnetPath).getPath(); - System.setProperty("wordnet.database.dir", wordnetPath); - } - - public void setEndpoint(SparqlEndpoint endpoint){ - this.endpoint = endpoint; - Set<String> predicateFilters = new HashSet<String>(); - predicateFilters.add("http://dbpedia.org/ontology/wikiPageWikiLink"); - predicateFilters.add("http://dbpedia.org/property/wikiPageUsesTemplate"); - modelGenenerator = new ModelGenerator(endpoint, predicateFilters); - - reasoner = new SPARQLReasoner(new SparqlEndpointKS(endpoint)); - reasoner.setCache(cache); - reasoner.prepareSubsumptionHierarchy(); - } - - public void setQuestion(String question){ - this.question = question; - } - - public void setUseRemoteEndpointValidation(boolean useRemoteEndpointValidation){ - this.useRemoteEndpointValidation = useRemoteEndpointValidation; - } - - public int getMaxQueryExecutionTimeInSeconds() { - return maxQueryExecutionTimeInSeconds; - } - - public void setMaxQueryExecutionTimeInSeconds(int maxQueryExecutionTimeInSeconds) { - this.maxQueryExecutionTimeInSeconds = maxQueryExecutionTimeInSeconds; - } - - public int getMaxTestedQueriesPerTemplate() { - return maxTestedQueriesPerTemplate; - } - - public void setMaxTestedQueriesPerTemplate(int maxTestedQueriesPerTemplate) { - this.maxTestedQueriesPerTemplate = maxTestedQueriesPerTemplate; - } - - public void setRanking(Ranking ranking) { - this.ranking = ranking; - } - - private void reset(){ - learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, SolrQueryResultSet>(); - classesURICache = new HashMap<String, SolrQueryResultSet>(); - propertiesURICache = new HashMap<String, SolrQueryResultSet>(); - template2Queries = new HashMap<Template, Collection<? extends Query>>(); - slot2URI = new HashMap<Slot, List<String>>(); - } - - public void learnSPARQLQueries() throws NoTemplateFoundException{ - reset(); - //generate SPARQL query templates - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - -// //generate SPARQL query candidates, but select only a fixed number per template -// template2Queries = getSPARQLQueryCandidates(templates, ranking); -// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - //get the weighted query candidates - generatedQueries = getWeightedSPARQLQueries(templates); - sparqlQueryCandidates = new ArrayList<Query>(); - int i = 0; - for(WeightedQuery wQ : generatedQueries){ - System.out.println(wQ.explain()); - sparqlQueryCandidates.add(wQ.getQuery()); - if(i == maxTestedQueries){ - break; - } - i++; - } - - //test candidates - if(useRemoteEndpointValidation){ //on remote endpoint - validateAgainstRemoteEndpoint(sparqlQueryCandidates); - } else {//on local model - validateAgainstLocalModel(sparqlQueryCandidates); - } - - } - - public Set<WeightedQuery> getGeneratedQueries() { - return generatedQueries; - } - - public Set<WeightedQuery> getGeneratedQueries(int topN) { - Set<WeightedQuery> topNQueries = new TreeSet<WeightedQuery>(); - int max = Math.min(topN, generatedQueries.size()); - for(WeightedQuery wQ : generatedQueries){ - topNQueries.add(wQ); - if(topNQueries.size() == max){ - break; - } - } - return topNQueries; - } - - public List<String> getSPARQLQueries() throws NoTemplateFoundException{ - logger.info("Generating SPARQL query templates..."); - mon.start(); - templates = templateGenerator.buildTemplates(question); - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - if(templates.isEmpty()){ - throw new NoTemplateFoundException(); - } - logger.info("Templates:"); - for(Template t : templates){ - logger.info(t); - } - - //generate SPARQL query candidates - logger.info("Generating SPARQL query candidates..."); - mon.start(); - Map<Template, Collection<? extends Query>> template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); - - - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - - public Set<Template> getTemplates(){ - return templates; - } - - public List<String> getGeneratedSPARQLQueries(){ - List<String> queries = new ArrayList<String>(); - for(Query q : sparqlQueryCandidates){ - queries.add(q.toString()); - } - - return queries; - } - - public Map<Template, Collection<? extends Query>> getTemplates2SPARQLQueries(){ - return template2Queries; - } - - public Map<Slot, List<String>> getSlot2URIs(){ - return slot2URI; - } - - private Model getWorkingModel(List<String> resources){ - logger.info("Generating local model..."); - mon.start(); - Model workingModel = ModelFactory.createDefaultModel(); - Model model; - for(String resource : resources){ - model = modelGenenerator.createModel(resource, Strategy.CHUNKS, RECURSION_DEPTH); - workingModel.add(model); - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Local model contains " + workingModel.size() + " triples."); - return workingModel; - } - - private Map<Template,Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates, Ranking ranking){ - switch(ranking){ - case LUCENE: return getSPARQLQueryCandidatesSortedByLucene(templates); - case SIMILARITY: return getSPARQLQueryCandidatesSortedBySimilarity(templates); - case NONE: return getSPARQLQueryCandidates(templates); - default: return null; - } - } - - /* - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - double alpha = 0.8; - double beta = 1 - alpha; - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allAllocations; - for(Template t : templates){ - allAllocations = new HashSet<Allocation>(); - - for(Slot slot : t.getSlots()){ - Set<Allocation> allocations = computeAllocation(slot); - allAllocations.addAll(allocations); - slot2Allocations.put(slot, allocations); - } - - int min = Integer.MAX_VALUE; - int max = Integer.MIN_VALUE; - for(Allocation a : allAllocations){ - if(a.getInDegree() < min){ - min = a.getInDegree(); - } - if(a.getInDegree() > max){ - max = a.getInDegree(); - } - } - for(Allocation a : allAllocations){ - double prominence = a.getInDegree()/(max-min); - a.setProminence(prominence); - - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - - } -// System.out.println(allAllocations); - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - //check if the query is possible - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(query.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - - boolean drop = false; - for(SPARQL_Triple triple : reversedQuery.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - for(SPARQL_Triple typeTriple : reversedQuery.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + reversedQuery.toString()); - } - } - } - } - - if(!drop){ - reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(reversedQuery); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - } - Query q = new Query(query.getQuery()); - - boolean drop = false; - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("RANGES: " + ranges); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// System.out.println("DOMAINS: " + domains); -// System.out.println("TYPES: " + allTypes); - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; - } else { - System.out.println("DROPPING: \n" + q.toString()); - } - } - } - } - } - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - return allQueries; - } - */ - - private void normProminenceValues(Set<Allocation> allocations){ - double min = 0; - double max = 0; - for(Allocation a : allocations){ - if(a.getProminence() < min){ - min = a.getProminence(); - } - if(a.getProminence() > max){ - max = a.getProminence(); - } - } - for(Allocation a : allocations){ - double prominence = a.getProminence()/(max-min); - a.setProminence(prominence); - } - } - - private void computeScore(Set<Allocation> allocations){ - double alpha = 0.8; - double beta = 1 - alpha; - - for(Allocation a : allocations){ - double score = alpha * a.getSimilarity() + beta * a.getProminence(); - a.setScore(score); - } - - } - - private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ - logger.info("Generating SPARQL query candidates..."); - - Map<Slot, Set<Allocation>> slot2Allocations2 = new TreeMap<Slot, Set<Allocation>>(new Comparator<Slot>() { - - @Override - public int compare(Slot o1, Slot o2) { - if(o1.getSlotType() == o2.getSlotType()){ - return o1.getToken().compareTo(o2.getToken()); - } else { - return -1; - } - } - }); - - - Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); - - Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); - - Set<Allocation> allocations; - - for(Template t : templates){ - logger.info("Processing template:\n" + t.toString()); - allocations = new TreeSet<Allocation>(); - - ExecutorService executor = Executors.newFixedThreadPool(t.getSlots().size()); - List<Future<SortedSet<Allocation>>> list = new ArrayList<Future<SortedSet<Allocation>>>(); - - for (Slot slot : t.getSlots()) { - Callable<SortedSet<Allocation>> worker = new SlotProcessor(slot); - Future<SortedSet<Allocation>> submit = executor.submit(worker); - list.add(submit); - } - -// for (Future<SortedSet<Allocation>> future : list) { -// try { -// future.get(); -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } catch (ExecutionException e) { -// e.printStackTrace(); -// } -// } - - /*for(Slot slot : t.getSlots()){ - allocations = slot2Allocations2.get(slot); - if(allocations == null){ - allocations = computeAllocations(slot, 10); - slot2Allocations2.put(slot, allocations); - } - slot2Allocations.put(slot, allocations); - - //for tests add the property URI with http://dbpedia.org/property/ namespace - //TODO should be replaced by usage of a separate SOLR index - Set<Allocation> tmp = new HashSet<Allocation>(); - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(Allocation a : allocations){ - String uri = "http://dbpedia.org/property/" + a.getUri().substring(a.getUri().lastIndexOf("/")+1); - Allocation newA = new Allocation(uri, a.getSimilarity(), a.getProminence()); - newA.setScore(a.getScore()-0.000001); - tmp.add(newA); - } - } - allocations.addAll(tmp); - }*/ - - - Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); - Query cleanQuery = t.getQuery(); - queries.add(new WeightedQuery(cleanQuery)); - - Set<WeightedQuery> tmp = new TreeSet<WeightedQuery>(); - List<Slot> sortedSlots = new ArrayList<Slot>(); - Set<Slot> classSlots = new HashSet<Slot>(); - for(Slot slot : t.getSlots()){ - if(slot.getSlotType() == SlotType.CLASS){ - sortedSlots.add(slot); - classSlots.add(slot); - } - } - for(Slot slot : t.getSlots()){ - if(!sortedSlots.contains(slot)){ - sortedSlots.add(slot); - } - } - //add for each SYMPROPERTY Slot the reversed query - for(Slot slot : sortedSlots){ - for(WeightedQuery wQ : queries){ - if(slot.getSlotType() == SlotType.SYMPROPERTY){ - Query reversedQuery = new Query(wQ.getQuery()); - reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); - tmp.add(new WeightedQuery(reversedQuery)); - } - tmp.add(wQ); - } - queries.clear(); - queries.addAll(tmp); - tmp.clear(); - } - - for(Slot slot : sortedSlots){ - if(!slot2Allocations.get(slot).isEmpty()){ - for(Allocation a : slot2Allocations.get(slot)){ - for(WeightedQuery query : queries){ - Query q = new Query(query.getQuery()); - - boolean drop = false;/* - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(SPARQL_Triple triple : q.getTriplesWithVar(slot.getAnchor())){ - String objectVar = triple.getValue().getName(); - String subjectVar = triple.getVariable().getName(); -// System.out.println(triple); - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(objectVar)){ -// System.out.println(typeTriple); - if(isObjectProperty(a.getUri())){ - Set<String> ranges = getRanges(a.getUri()); -// System.out.println(a); - if(!ranges.isEmpty()){ - Set<String> allRanges = new HashSet<String>(); - for(String range : ranges){ - allRanges.addAll(getSuperClasses(range)); - } - allRanges.addAll(ranges); - allRanges.remove("http://www.w3.org/2002/07/owl#Thing"); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeURI); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// } - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allRanges, allTypes)){ - drop = true; -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer") && q.toString().contains("/Software>")){ -// System.out.println("RANGES: " + allRanges); -// System.out.println("TYPES: " + allTypes); -// System.out.println("DROPPING: \n" + q.toString()); -// } - } else { - - } - } - } else { - drop = true; - } - - } - for(SPARQL_Triple typeTriple : q.getRDFTypeTriples(subjectVar)){ -// System.out.println(typeTriple); - Set<String> domains = getDomains(a.getUri()); -// System.out.println(a); - if(!domains.isEmpty()){ - Set<String> allDomains = new HashSet<String>(); - for(String domain : domains){ - allDomains.addAll(getSuperClasses(domain)); - } - allDomains.addAll(domains); - allDomains.remove("http://www.w3.org/2002/07/owl#Thing"); - String typeURI = typeTriple.getValue().getName().substring(1,typeTriple.getValue().getName().length()-1); - Set<String> allTypes = getSuperClasses(typeURI); - allTypes.add(typeTriple.getValue().getName()); -// if(typeURI.equals("http://dbpedia.org/ontology/Organisation") && a.getUri().equals("http://dbpedia.org/ontology/developer")){ -// System.out.println("DOMAINS: " + allDomains); -// System.out.println("TYPES: " + allTypes); -// } - - if(!org.mindswap.pellet.utils.SetUtils.intersects(allDomains, allTypes)){ - drop = true; -// System.out.println("DROPPING: \n" + q.toString()); - } else { - - } - } - } - } - }*/ - - - if(!drop){ - q.replaceVarWithURI(slot.getAnchor(), a.getUri()); - WeightedQuery w = new WeightedQuery(q); - double newScore = query.getScore() + a.getScore(); - w.setScore(newScore); - w.addAllocations(query.getAllocations()); - w.addAllocation(a); - tmp.add(w); - } - - - } - } - queries.clear(); - queries.addAll(tmp);//System.out.println(tmp); - tmp.clear(); - } - - } - for(WeightedQuery q : queries){ - q.setScore(q.getScore()/t.getSlots().size()); - } - allQueries.addAll(queries); - List<Query> qList = new ArrayList<Query>(); - for(WeightedQuery wQ : queries){//System.err.println(wQ.getQuery()); - qList.add(wQ.getQuery()); - } - template2Queries.put(t, qList); - } - logger.info("...done in "); - return allQueries; - } - -/* - * for(SPARQL_Triple triple : t.getQuery().getTriplesWithVar(slot.getAnchor())){System.out.println(triple); - for(SPARQL_Triple typeTriple : t.getQuery().getRDFTypeTriples(triple.getVariable().getName())){ - System.out.println(typeTriple); - for(Allocation a : allocations){ - Set<String> domains = getDomains(a.getUri()); - System.out.println(a); - System.out.println(domains); - for(Slot s : classSlots){ - if(s.getAnchor().equals(triple.getVariable().getName())){ - for(Allocation all : slot2Allocations.get(s)){ - if(!domains.contains(all.getUri())){ - System.out.println("DROP " + a); - } - } - } - } - } - - - } - */ - - private SortedSet<Allocation> computeAllocations(Slot slot){ - SortedSet<Allocation> allocations = new TreeSet<Allocation>(); - - SolrSearch index = getIndexBySlotType(slot); - - SolrQueryResultSet rs; - for(String word : slot.getWords()){ - if(slot.getSlotType() == SlotType.RESOURCE){ - rs = index.getResourcesWithScores(word, 250); - } else { - rs = index.getResourcesWithScores(word, 20); - } - - - //debugging -// for(Iterator<SolrQueryResultItem> iter = rs.getItems().iterator();iter.hasNext();){ -// SolrQueryResultItem item = iter.next(); -// if(exclusions.contains(item.getUri())){ -// iter.remove(); -// } -// } - - for(SolrQueryResultItem item : rs.getItems()){ - double similarity = Similarity.getSimilarity(word, item.getLabel()); - //get the labels of the redirects and compute the highest similarity - if(slot.getSlotType() == SlotType.RESOURCE){ - Set<String> labels = getRedirectLabels(item.getUri()); - for(String label : labels){ - double tmp = Similarity.getSimilarity(word, label); - if(tmp > similarity){ - similarity = tmp; - } - } - } - double prominence = getProminenceValue(item.getUri(), slot.getSlotType()); - allocations.add(new Allocation(item.getUri(), prominence, similarity)); - } - - } - - normProminenceValues(allocations); - - computeScore(allocations); - return new TreeSet<Allocation>(allocations); - } - - private Set<Allocation> computeAllocations(Slot slot, int limit){ - logger.info("Computing allocations for " + slot); - SortedSet<Allocation> allocations = computeAllocations(slot); - - if(allocations.isEmpty()){ - logger.info("...done."); - return allocations; - } - - ArrayList<Allocation> l = new ArrayList<Allocation>(allocations); - Collections.sort(l, new Comparator<Allocation>() { - - @Override - public int compare(Allocation o1, Allocation o2) { - double dif = o1.getScore() - o2.getScore(); - if(dif < 0){ - return 1; - } else if(dif > 0){ - return -1; - } else { - return o1.getUri().compareTo(o2.getUri()); - } - } - }); - logger.info("...done."); - return new TreeSet<Allocation>(l.subList(0, Math.min(limit, allocations.size()))); - } - - private Set<String> getRedirectLabels(String uri){ - Set<String> labels = new HashSet<String>(); - String query = String.format("SELECT ?label WHERE {?s <http://dbpedia.org/ontology/wikiPageRedirects> <%s>. ?s <%s> ?label.}", uri, RDFS.label.getURI()); - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - while(rs.hasNext()){ - qs = rs.next(); - labels.add(qs.getLiteral("label").getLexicalForm()); - - } - return labels; - } - - private double getProminenceValue(String uri, SlotType type){ - int cnt = 1; - String query = null; - if(type == SlotType.CLASS){ - query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; - } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ - query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; - } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ - query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; - } - query = String.format(query, uri); - - ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); - QuerySolution qs; - String projectionVar; - while(rs.hasNext()){ - qs = rs.next(); - projectionVar = qs.varNames().next(); - cnt = qs.get(projectionVar).asLiteral().getInt(); - } -// if(cnt == 0){ -// return 0; -// } -// return Math.log(cnt); - return cnt; - } - - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Set<Query> queries = new HashSet<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - for(Template template : templates){ - queries = new HashSet<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - Set<Query> tmp = new HashSet<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Query query : queries){ - Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, item.getUri()); - tmp.add(newQuery); - } - } - if(!words.isEmpty()){ - queries.clear(); - queries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<String, Float> getCandidateRatedSPARQLQueries(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - Map<String, Float> query2Score = new HashMap<String, Float>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - query2Score.put(query.toString(), Float.valueOf(0)); - for(Slot slot : template.getSlots()){ - Map<String, Float> tmp = new HashMap<String, Float>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); - } - } - if(!words.isEmpty()){ - query2Score.clear(); - query2Score.putAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return query2Score; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedByLucene(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - SortedSet<RatedQuery> ratedQueries = new TreeSet<RatedQuery>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - - Query query; - for(Template template : templates){ - query = template.getQuery(); - ratedQueries = new TreeSet<RatedQuery>(); - ratedQueries.add(new RatedQuery(query, 0)); - template2Queries.put(template, ratedQueries); - for(Slot slot : template.getSlots()){ - Set<RatedQuery> tmp = new HashSet<RatedQuery>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ - for(RatedQuery rQ : ratedQueries){ - RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, item.getUri()); - newRQ.setScore(newRQ.getScore() + item.getScore()); - tmp.add(newRQ); - } - } - if(!words.isEmpty()){ - ratedQueries.clear(); - ratedQueries.addAll(tmp); - } - } - } - mon.stop(); - logger.info("Done in " + mon.getLastValue() + "ms."); - return template2Queries; - } - - private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidatesSortedBySimilarity(Set<Template> templates){ - logger.info("Generating candidate SPARQL queries..."); - mon.start(); - List<Query> queries = new ArrayList<Query>(); - Map<Template, Collection<? extends Query>> template2Queries = new HashMap<Template, Collection<? extends Query>>(); - List<String> uriCandidates; - for(Template template : templates){ - queries = new ArrayList<Query>(); - queries.add(template.getQuery()); - template2Queries.put(template, queries); - for(Slot slot : template.getSlots()){ - List<Query> tmp = new ArrayList<Query>(); - String var = slot.getAnchor(); - List<String> words = slot.getWords(); - SPARQL_Prefix prefix = null; - uriCandidates = getCandidateURIsSortedBySimilarity(slot); - for(S... [truncated message content] |