From: <lor...@us...> - 2011-09-29 08:47:21
|
Revision: 3291 http://dl-learner.svn.sourceforge.net/dl-learner/?rev=3291&view=rev Author: lorenz_b Date: 2011-09-29 08:47:15 +0000 (Thu, 29 Sep 2011) Log Message: ----------- Integrated new indexes in learning process. Modified Paths: -------------- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/learning/SPARQLTemplateBasedLearner.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -26,7 +26,9 @@ import org.dllearner.algorithm.qtl.util.ModelGenerator.Strategy; import org.dllearner.algorithm.tbsl.nlp.Lemmatizer; import org.dllearner.algorithm.tbsl.nlp.LingPipeLemmatizer; +import org.dllearner.algorithm.tbsl.search.HierarchicalSolrSearch; import org.dllearner.algorithm.tbsl.search.SolrSearch; +import org.dllearner.algorithm.tbsl.search.ThresholdSlidingSolrSearch; import org.dllearner.algorithm.tbsl.sparql.Query; import org.dllearner.algorithm.tbsl.sparql.RatedQuery; import org.dllearner.algorithm.tbsl.sparql.SPARQL_Prefix; @@ -129,20 +131,22 @@ private void init(Options options){ String resourcesIndexUrl = options.fetch("solr.resources.url"); String resourcesIndexSearchField = options.fetch("solr.resources.searchfield"); - resource_index = new SolrSearch(resourcesIndexUrl, resourcesIndexSearchField); + resource_index = new ThresholdSlidingSolrSearch(resourcesIndexUrl, resourcesIndexSearchField, 1.0, 0.1); String classesIndexUrl = options.fetch("solr.classes.url"); String classesIndexSearchField = options.fetch("solr.classes.searchfield"); - class_index = new SolrSearch(classesIndexUrl, classesIndexSearchField); + class_index = new ThresholdSlidingSolrSearch(classesIndexUrl, classesIndexSearchField, 1.0, 0.1); String propertiesIndexUrl = options.fetch("solr.properties.url"); String propertiesIndexSearchField = options.fetch("solr.properties.searchfield"); - property_index = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); + SolrSearch labelBasedPropertyIndex = new SolrSearch(propertiesIndexUrl, propertiesIndexSearchField); String boaPatternIndexUrl = options.fetch("solr.boa.properties.url"); String boaPatternIndexSearchField = options.fetch("solr.boa.properties.searchfield"); - boa_pattern_property_index = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); + SolrSearch patternBasedPropertyIndex = new SolrSearch(boaPatternIndexUrl, boaPatternIndexSearchField); + property_index = new HierarchicalSolrSearch(patternBasedPropertyIndex, labelBasedPropertyIndex); + int maxIndexResults = Integer.parseInt(options.fetch("solr.query.limit"), 10); maxQueryExecutionTimeInSeconds = Integer.parseInt(options.get("sparql.query.maxExecutionTimeInSeconds", "20")); @@ -507,41 +511,18 @@ words = pruneList(slot.getWords()); } - if(slot.getSlotType() == SlotType.PROPERTY || slot.getSlotType() == SlotType.SYMPROPERTY){ - for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); - index = boa_pattern_property_index; - if(uris == null){ - uris = index.getResources(word); - uriCache.put(word, uris); - } - index = property_index; - if(uris.size() < 10){ - uris.addAll(index.getResources(word)); - } - if(uris.size() < 10){ - uris.addAll(index.getResources("" + word + "~0.8")); - } - tmp.addAll(uris); - sortedURIs.addAll(tmp); - tmp.clear(); + for(String word : words){ + tmp = new TreeSet<String>(new StringSimilarityComparator(word)); + uris = uriCache.get(word); + + if(uris == null){ + uris = index.getResources(word, 5); + uriCache.put(word, uris); } - } else { - for(String word : words){ - tmp = new TreeSet<String>(new StringSimilarityComparator(word)); - uris = uriCache.get(word); - if(uris == null){ - uris = index.getResources(word); - uriCache.put(word, uris); - } - if(uris.size() < 10){ - uris.addAll(index.getResources("" + word + "~0.7")); - } - tmp.addAll(uris); - sortedURIs.addAll(tmp); - tmp.clear(); - } + + tmp.addAll(uris); + sortedURIs.addAll(tmp); + tmp.clear(); } slot2URI.put(slot, sortedURIs); Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/SolrSearch.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -54,6 +54,10 @@ @Override public List<String> getResources(String queryString, int limit, int offset) { + return findResources(queryString, limit, offset); + } + + protected List<String> findResources(String queryString, int limit, int offset){ List<String> resources = new ArrayList<String>(); QueryResponse response; try { Modified: trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java =================================================================== --- trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-09-28 18:15:59 UTC (rev 3290) +++ trunk/components-ext/src/main/java/org/dllearner/algorithm/tbsl/search/ThresholdSlidingSolrSearch.java 2011-09-29 08:47:15 UTC (rev 3291) @@ -28,12 +28,14 @@ double threshold = 1; - while(resources.size() < limit){ - resources.addAll(getResources(queryString + "~" + threshold, limit - resources.size())); + String queryWithThreshold = queryString; + while(resources.size() < limit && threshold >= minThreshold){ + if(threshold < 1){ + queryWithThreshold = queryString + "~" + threshold; + } + System.out.println(queryWithThreshold); + resources.addAll(findResources(queryWithThreshold, limit - resources.size(), 0)); threshold -= step; - if(threshold < minThreshold){ - break; - } } @@ -42,4 +44,4 @@ -} +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |