From: <lor...@us...> - 2011-11-01 13:26:21
|
Revision: 3346 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3346&view=rev Author: lorenz_b Date: 2011-11-01 13:26:11 +0000 (Tue, 01 Nov 2011) Log Message: ----------- Started new method to build the queries weighted by similarity and prominence. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-31 22:11:38 UTC (rev 3345) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -26,6 +26,7 @@ import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; +import org.dllearner.algorithm.tbsl.sparql.Allocation; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; @@ -33,8 +34,10 @@ import org.dllearner.algorithm.tbsl.sparql.Slot; import org.dllearner.algorithm.tbsl.sparql.SlotType; import org.dllearner.algorithm.tbsl.sparql.Template; +import org.dllearner.algorithm.tbsl.sparql.WeightedQuery; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Prefixes; +import org.dllearner.algorithm.tbsl.util.Similarity; import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; @@ -238,10 +241,17 @@ logger.info(t); } - //generate SPARQL query candidates, but select only a fixed number per template - template2Queries = getSPARQLQueryCandidates(templates, ranking); - sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); +// //generate SPARQL query candidates, but select only a fixed number per template +// template2Queries = getSPARQLQueryCandidates(templates, ranking); +// sparqlQueryCandidates = getNBestQueryCandidatesForTemplates(template2Queries); + //get the weighted query candidates + Set<WeightedQuery> weightedQueries = getWeightedSPARQLQueries(templates); + sparqlQueryCandidates = new ArrayList<Query>(); + for(WeightedQuery wQ : weightedQueries){ + sparqlQueryCandidates.add(wQ.getQuery()); + } + //test candidates if(useRemoteEndpointValidation){ //on remote endpoint validateAgainstRemoteEndpoint(sparqlQueryCandidates); @@ -328,6 +338,122 @@ } } + private Set<WeightedQuery> getWeightedSPARQLQueries(Set<Template> templates){ + double alpha = 0.7; + double beta = 1 - alpha; + Map<Slot, Set<Allocation>> slot2Allocations = new HashMap<Slot, Set<Allocation>>(); + + Set<WeightedQuery> allQueries = new TreeSet<WeightedQuery>(); + + Set<Allocation> allAllocations; + for(Template t : templates){ + allAllocations = new HashSet<Allocation>(); + for(Slot slot : t.getSlots()){ + Set<Allocation> allocations = computeAllocation(slot); + allAllocations.addAll(allocations); + slot2Allocations.put(slot, allocations); + } + + int min = Integer.MAX_VALUE; + int max = Integer.MIN_VALUE; + for(Allocation a : allAllocations){ + if(a.getInDegree() < min){ + min = a.getInDegree(); + } + if(a.getInDegree() > max){ + max = a.getInDegree(); + } + } + for(Allocation a : allAllocations){ + double prominence = a.getInDegree()/(max-min); + a.setProminence(prominence); + + double score = alpha * a.getSimilarity() + beta * a.getProminence(); + a.setScore(score); + + } +// System.out.println(allAllocations); + + Set<WeightedQuery> queries = new HashSet<WeightedQuery>(); + Query cleanQuery = t.getQuery(); + queries.add(new WeightedQuery(cleanQuery)); + + Set<WeightedQuery> tmp = new HashSet<WeightedQuery>(); + for(Slot slot : t.getSlots()){ + for(Allocation a : slot2Allocations.get(slot)){ + for(WeightedQuery query : queries){ + if(slot.getSlotType() == SlotType.SYMPROPERTY){ + Query reversedQuery = new Query(query.getQuery()); + reversedQuery.getTriplesWithVar(slot.getAnchor()).iterator().next().reverse(); + reversedQuery.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(reversedQuery); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + Query q = new Query(query.getQuery()); + q.replaceVarWithURI(slot.getAnchor(), a.getUri()); + WeightedQuery w = new WeightedQuery(q); + double newScore = query.getScore() + a.getScore(); + w.setScore(newScore); + tmp.add(w); + } + } + queries.clear(); + queries.addAll(tmp); + tmp.clear(); + } + for(WeightedQuery q : queries){ + q.setScore(q.getScore()/t.getSlots().size()); + } + allQueries.addAll(queries); + } + return allQueries; + } + + private Set<Allocation> computeAllocation(Slot slot){ + Set<Allocation> allocations = new HashSet<Allocation>(); + + SolrSearch index = getIndexBySlotType(slot); + + SolrQueryResultSet rs; + for(String word : slot.getWords()){ + rs = index.getResourcesWithScores(word, 10); + + for(SolrQueryResultItem item : rs.getItems()){ + int prominence = getProminenceValue(item.getUri(), slot.getSlotType()); + double similarity = Similarity.getSimilarity(word, item.getLabel()); + allocations.add(new Allocation(item.getUri(), prominence, similarity)); + } + + } + + return allocations; + } + + private int getProminenceValue(String uri, SlotType type){ + int cnt = 1; + String query = null; + if(type == SlotType.CLASS){ + query = "SELECT COUNT(?s) WHERE {?s a <%s>}"; + } else if(type == SlotType.PROPERTY || type == SlotType.SYMPROPERTY){ + query = "SELECT COUNT(*) WHERE {?s <%s> ?o}"; + } else if(type == SlotType.RESOURCE || type == SlotType.UNSPEC){ + query = "SELECT COUNT(*) WHERE {?s ?p <%s>}"; + } + query = String.format(query, uri); + + ResultSet rs = SparqlQuery.convertJSONtoResultSet(cache.executeSelectQuery(endpoint, query)); + QuerySolution qs; + String projectionVar; + while(rs.hasNext()){ + qs = rs.next(); + projectionVar = qs.varNames().next(); + cnt = qs.get(projectionVar).asLiteral().getInt(); + } + return cnt; + } + private Map<Template, Collection<? extends Query>> getSPARQLQueryCandidates(Set<Template> templates){ logger.info("Generating candidate SPARQL queries..."); mon.start(); @@ -483,11 +609,10 @@ return template2Queries; } - private Set<String> getCandidateURIs(Slot slot){ + private SolrQueryResultSet getCandidateURIs(Slot slot, int limit){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); SolrSearch index = null; - Set<String> uris = new HashSet<String>(); if(slot.getSlotType() == SlotType.CLASS){ index = class_index; } else if(slot.getSlotType() == SlotType.PROPERTY){ @@ -495,14 +620,13 @@ } else if(slot.getSlotType() == SlotType.RESOURCE){ index = resource_index; } + SolrQueryResultSet rs = new SolrQueryResultSet(); for(String word : slot.getWords()){ - uris.addAll(index.getResources("label:" + word)); - + rs.add(index.getResourcesWithScores(word, limit)); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); - logger.info("Candidate URIs: " + uris); - return uris; + return rs; } private List<String> getCandidateURIsSortedBySimilarity(Slot slot){ @@ -641,7 +765,7 @@ index = resource_index; sorted = true; } - SolrQueryResultSet resultSet = null; + SolrQueryResultSet resultSet = new SolrQueryResultSet(); for(String word : slot.getWords()){ resultSet.add(index.getResourcesWithScores("label:" + word, sorted)); } @@ -777,6 +901,8 @@ return resources; } + + /** * @param args @@ -789,7 +915,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who developed the video game World of Warcraft?"; + String question = "Give me all films produced by Hal Roach?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -0,0 +1,57 @@ +package org.dllearner.algorithm.tbsl.sparql; + + +public class Allocation { + + private String uri; + private int inDegree; + + private double similarity; + private double prominence; + + private double score; + + public Allocation(String uri, int inDegree, double similarity) { + this.uri = uri; + this.inDegree = inDegree; + this.similarity = similarity; + } + + public String getUri() { + return uri; + } + + public int getInDegree() { + return inDegree; + } + + public double getSimilarity() { + return similarity; + } + + public double getProminence() { + return prominence; + } + + public void setProminence(double prominence) { + this.prominence = prominence; + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + @Override + public String toString() { + return uri + "(similarity: " + similarity + "; prominence: " + inDegree + ")"; + } + + + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Allocation.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java 2011-11-01 13:26:11 UTC (rev 3346) @@ -0,0 +1,46 @@ +package org.dllearner.algorithm.tbsl.sparql; + +public class WeightedQuery implements Comparable<WeightedQuery>{ + + private double score; + private Query query; + + public WeightedQuery(Query query, double score) { + super(); + this.score = score; + this.query = query; + } + + public WeightedQuery(Query query) { + this(query, 0); + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } + + public Query getQuery() { + return query; + } + + @Override + public int compareTo(WeightedQuery o) { + if(o.getScore() < this.score){ + return -1; + } else if(o.getScore() > this.score){ + return 1; + } else return query.toString().compareTo(o.getQuery().toString()); + } + + @Override + public String toString() { + return query.toString() + "\n(Score: " + score + ")"; + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/WeightedQuery.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |