From: <lor...@us...> - 2011-10-26 13:49:04
|
Revision: 3326 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3326&view=rev Author: lorenz_b Date: 2011-10-26 13:48:52 +0000 (Wed, 26 Oct 2011) Log Message: ----------- Added wrapper class for SOLR search result. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java Added Paths: ----------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -22,6 +22,8 @@ import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; +import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; +import org.dllearner.algorithm.tbsl.search.SolrQueryResultSet; import org.dllearner.algorithm.tbsl.search.SolrSearch; import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; @@ -33,6 +35,7 @@ import org.dllearner.algorithm.tbsl.sparql.Template; import org.dllearner.algorithm.tbsl.templator.Templator; import org.dllearner.algorithm.tbsl.util.Prefixes; +import org.dllearner.algorithm.tbsl.util.SolrQueryResultStringSimilarityComparator; import org.dllearner.algorithm.tbsl.util.StringSimilarityComparator; import org.dllearner.core.ComponentInitException; import org.dllearner.core.Oracle; @@ -86,9 +89,9 @@ private Oracle oracle; - private Map<String, List<String>> resourcesURICache; - private Map<String, List<String>> classesURICache; - private Map<String, List<String>> propertiesURICache; + private Map<String, SolrQueryResultSet> resourcesURICache; + private Map<String, SolrQueryResultSet> classesURICache; + private Map<String, SolrQueryResultSet> propertiesURICache; private Map<String, Object> learnedSPARQLQueries; private Set<Template> templates; @@ -211,9 +214,9 @@ private void reset(){ learnedSPARQLQueries = new HashMap<String, Object>(); - resourcesURICache = new HashMap<String, List<String>>(); - classesURICache = new HashMap<String, List<String>>(); - propertiesURICache = new HashMap<String, List<String>>(); + resourcesURICache = new HashMap<String, SolrQueryResultSet>(); + classesURICache = new HashMap<String, SolrQueryResultSet>(); + propertiesURICache = new HashMap<String, SolrQueryResultSet>(); template2Queries = new HashMap<Template, Collection<? extends Query>>(); slot2URI = new HashMap<Slot, List<String>>(); } @@ -337,10 +340,10 @@ Set<Query> tmp = new HashSet<Query>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(Query query : queries){ Query newQuery = new Query(query); - newQuery.replaceVarWithURI(var, entry1.getKey()); + newQuery.replaceVarWithURI(var, item.getUri()); tmp.add(newQuery); } } @@ -368,9 +371,9 @@ Map<String, Float> tmp = new HashMap<String, Float>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(Entry<String, Float> entry2 : query2Score.entrySet()){ - tmp.put(entry2.getKey().replace("?" + var, "<" + entry1.getKey() + ">"), Float.valueOf(entry1.getValue()+entry2.getValue())); + tmp.put(entry2.getKey().replace("?" + var, "<" + item.getUri() + ">"), item.getScore() + entry2.getValue()); } } if(!words.isEmpty()){ @@ -400,11 +403,11 @@ Set<RatedQuery> tmp = new HashSet<RatedQuery>(); String var = slot.getAnchor(); List<String> words = slot.getWords(); - for(Entry<String, Float> entry1 : getCandidateURIsWithScore(slot).entrySet()){ + for(SolrQueryResultItem item : getCandidateURIsWithScore(slot).getItems()){ for(RatedQuery rQ : ratedQueries){ RatedQuery newRQ = new RatedQuery(rQ, rQ.getScore()); - newRQ.replaceVarWithURI(var, entry1.getKey()); - newRQ.setScore(newRQ.getScore()+entry1.getValue()); + newRQ.replaceVarWithURI(var, item.getUri()); + newRQ.setScore(newRQ.getScore() + item.getScore()); tmp.add(newRQ); } } @@ -508,10 +511,10 @@ //get the appropriate index based on slot type SolrSearch index = getIndexBySlotType(slot); //get the appropriate cache for URIs to avoid redundant queries to index - Map<String, List<String>> uriCache = getCacheBySlotType(slot); + Map<String, SolrQueryResultSet> uriCache = getCacheBySlotType(slot); - SortedSet<String> tmp; - List<String> uris; + SortedSet<SolrQueryResultItem> tmp; + SolrQueryResultSet rs; //prune the word list only when slot type is not RESOURCE List<String> words; @@ -523,16 +526,18 @@ } for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); + tmp = new TreeSet<SolrQueryResultItem>(new SolrQueryResultStringSimilarityComparator(word)); + rs = uriCache.get(word); - if(uris == null){ - uris = index.getResources(word, 5); - uriCache.put(word, uris); + if(rs == null){ + rs = index.getResourcesWithScores(word, 50); + uriCache.put(word, rs); } - tmp.addAll(uris); - sortedURIs.addAll(tmp); + tmp.addAll(rs.getItems()); + for(SolrQueryResultItem item : tmp){ + sortedURIs.add(item.getUri()); + } tmp.clear(); } @@ -600,8 +605,8 @@ return index; } - private Map<String, List<String>> getCacheBySlotType(Slot slot){ - Map<String, List<String>> cache = null; + private Map<String, SolrQueryResultSet> getCacheBySlotType(Slot slot){ + Map<String, SolrQueryResultSet> cache = null; SlotType type = slot.getSlotType(); if(type == SlotType.CLASS){ cache = classesURICache; @@ -613,7 +618,7 @@ return cache; } - private Map<String, Float> getCandidateURIsWithScore(Slot slot){ + private SolrQueryResultSet getCandidateURIsWithScore(Slot slot){ logger.info("Generating candidate URIs for " + slot.getWords() + "..."); mon.start(); SolrSearch index = null; @@ -627,13 +632,14 @@ index = resource_index; sorted = true; } + SolrQueryResultSet resultSet = null; for(String word : slot.getWords()){ - uri2Score.putAll(index.getResourcesWithScores("label:" + word, sorted)); + resultSet.add(index.getResourcesWithScores("label:" + word, sorted)); } mon.stop(); logger.info("Done in " + mon.getLastValue() + "ms."); logger.info("Candidate URIs: " + uri2Score.keySet()); - return uri2Score; + return resultSet; } private List<Query> getNBestQueryCandidatesForTemplates(Map<Template, Collection<? extends Query>> template2Queries){ @@ -774,7 +780,7 @@ // Logger.getLogger(DefaultHttpParams.class).setLevel(Level.OFF); // Logger.getLogger(HttpClient.class).setLevel(Level.OFF); // Logger.getLogger(HttpMethodBase.class).setLevel(Level.OFF); - String question = "Who wrote the book The pillars of the Earth?"; + String question = "Is Natalie Portman an actress?"; // String question = "Give me all books written by authors influenced by Ernest Hemingway."; SPARQLTemplateBasedLearner learner = new SPARQLTemplateBasedLearner(); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/ltag/parser/Preprocessor.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -24,8 +24,8 @@ public Preprocessor(boolean n) { USE_NER = n; if (USE_NER) { - ner = new LingPipeNER(true); //not case sensitive best solution? -// ner = new DBpediaSpotlightNER(); +// ner = new LingPipeNER(true); //not case sensitive best solution? + ner = new DBpediaSpotlightNER(); } } Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,67 @@ +package org.dllearner.algorithm.tbsl.search; + +public class SolrQueryResultItem { + + private String label; + private String uri; + private float score; + + public SolrQueryResultItem(String label, String uri) { + this(label, uri, -1); + } + + public SolrQueryResultItem(String label, String uri, float score) { + super(); + this.label = label; + this.uri = uri; + this.score = score; + } + + public String getLabel() { + return label; + } + + public String getUri() { + return uri; + } + + public float getScore() { + return score; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((label == null) ? 0 : label.hashCode()); + result = prime * result + Float.floatToIntBits(score); + result = prime * result + ((uri == null) ? 0 : uri.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SolrQueryResultItem other = (SolrQueryResultItem) obj; + if (label == null) { + if (other.label != null) + return false; + } else if (!label.equals(other.label)) + return false; + if (Float.floatToIntBits(score) != Float.floatToIntBits(other.score)) + return false; + if (uri == null) { + if (other.uri != null) + return false; + } else if (!uri.equals(other.uri)) + return false; + return true; + } + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultItem.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,32 @@ +package org.dllearner.algorithm.tbsl.search; + +import java.util.HashSet; +import java.util.Set; + +public class SolrQueryResultSet { + + private Set<SolrQueryResultItem> items; + + public SolrQueryResultSet() { + items = new HashSet<SolrQueryResultItem>(); + } + + public SolrQueryResultSet(Set<SolrQueryResultItem> items) { + this.items = items; + } + + public Set<SolrQueryResultItem> getItems() { + return items; + } + + public void addItems(Set<SolrQueryResultItem> items) { + this.items.addAll(items); + } + + public void add(SolrQueryResultSet rs) { + this.items.addAll(rs.getItems()); + } + + + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrQueryResultSet.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -3,8 +3,10 @@ import java.net.MalformedURLException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; @@ -90,29 +92,54 @@ return resources; } - public Map<String, Float> getResourcesWithScores(String queryString) { + protected SolrQueryResultSet findResourcesWithScores(String queryString, int limit, int offset, boolean sorted){ + Set<SolrQueryResultItem> items = new HashSet<SolrQueryResultItem>(); + + QueryResponse response; + try { + SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); + query.setRows(limit); + query.setStart(offset); + query.addField("score"); + if(sorted){ + query.addSortField("score", SolrQuery.ORDER.desc); + query.addSortField( "pagerank", SolrQuery.ORDER.desc ); + } + response = server.query(query); + SolrDocumentList docList = response.getResults(); + lastTotalHits = (int) docList.getNumFound(); + + for(SolrDocument d : docList){ + items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); + } + } catch (SolrServerException e) { + e.printStackTrace(); + } + return new SolrQueryResultSet(items); + } + + public SolrQueryResultSet getResourcesWithScores(String queryString) { return getResourcesWithScores(queryString, hitsPerPage); } - public Map<String, Float> getResourcesWithScores(String queryString, boolean sorted) { + public SolrQueryResultSet getResourcesWithScores(String queryString, boolean sorted) { return getResourcesWithScores(queryString, hitsPerPage); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit) { + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit) { return getResourcesWithScores(queryString, limit, 0, false); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit, boolean sorted) { + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, boolean sorted) { return getResourcesWithScores(queryString, limit, 0, sorted); } - public Map<String, Float> getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { - Map<String, Float> resource2ScoreMap = new HashMap<String, Float>(); + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { + Set<SolrQueryResultItem> items = new HashSet<SolrQueryResultItem>(); QueryResponse response; try { - SolrQuery query = new SolrQuery(); - query.setQuery(queryString); + SolrQuery query = new SolrQuery((searchField != null) ? searchField + ":" + queryString : queryString); query.setRows(hitsPerPage); query.setStart(offset); query.addField("score"); @@ -123,13 +150,14 @@ response = server.query(query); SolrDocumentList docList = response.getResults(); lastTotalHits = (int) docList.getNumFound(); + for(SolrDocument d : docList){ - resource2ScoreMap.put((String) d.get("uri"), (Float) d.get("score")); + items.add(new SolrQueryResultItem((String) d.get("label"), (String) d.get("uri"), (Float) d.get("score"))); } } catch (SolrServerException e) { e.printStackTrace(); } - return resource2ScoreMap; + return new SolrQueryResultSet(items); } @Override @@ -141,5 +169,5 @@ public void setHitsPerPage(int hitsPerPage) { this.hitsPerPage = hitsPerPage; } - + } Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -51,4 +51,23 @@ return resources; } + @Override + public SolrQueryResultSet getResourcesWithScores(String queryString, int limit, int offset, boolean sorted) { + SolrQueryResultSet rs = new SolrQueryResultSet(); + + double threshold = 1; + + String queryWithThreshold = queryString; + while(rs.getItems().size() < limit && threshold >= minThreshold){ + if(threshold < 1){ + queryWithThreshold = queryString + "~" + format.format(threshold); + } + + rs.add(findResourcesWithScores(queryWithThreshold, limit - rs.getItems().size(), 0, sorted)); + threshold -= step; + } + + return rs; + } + } \ No newline at end of file Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 13:42:40 UTC (rev 3325) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/sparql/Query.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -97,7 +97,7 @@ Set<SPARQL_Filter> filters = new HashSet<SPARQL_Filter>(); for(SPARQL_Filter filter : query.getFilters()){ for(SPARQL_Pair term : filter.getTerms()){ - + filters.add(new SPARQL_Filter(term)); } } this.filter = filters; Added: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java (rev 0) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java 2011-10-26 13:48:52 UTC (rev 3326) @@ -0,0 +1,29 @@ +package org.dllearner.algorithm.tbsl.util; + +import java.util.Comparator; + +import org.dllearner.algorithm.tbsl.search.SolrQueryResultItem; + +public class SolrQueryResultStringSimilarityComparator implements Comparator<SolrQueryResultItem>{ + private String s; + + public SolrQueryResultStringSimilarityComparator(String s) { + this.s = s; + } + + @Override + public int compare(SolrQueryResultItem item1, SolrQueryResultItem item2) { + + double sim1 = Similarity.getSimilarity(s, item1.getLabel()); + double sim2 = Similarity.getSimilarity(s, item2.getLabel()); + + if(sim1 < sim2){ + return 1; + } else if(sim1 > sim2){ + return -1; + } else { + return item1.getLabel().compareTo(item2.getLabel()); + } + } + +} Property changes on: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/util/SolrQueryResultStringSimilarityComparator.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |