From: <mrp...@us...> - 2011-01-19 02:03:30
|
Revision: 4127 http://bigdata.svn.sourceforge.net/bigdata/?rev=4127&view=rev Author: mrpersonick Date: 2011-01-19 02:03:23 +0000 (Wed, 19 Jan 2011) Log Message: ----------- added support for exact match and prefix match from sparql Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -28,8 +28,6 @@ package com.bigdata.search; -import info.aduna.i18n.languagetag.IanaLanguageTag; - import java.io.IOException; import java.io.Reader; import java.io.StringReader; Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -14,6 +14,7 @@ import com.bigdata.rdf.internal.TermId; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.XSDDoubleIV; +import com.bigdata.rdf.lexicon.ITextIndexer; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; @@ -21,7 +22,6 @@ import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; -import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IPredicate; import com.bigdata.relation.rule.ISolutionExpander; @@ -139,14 +139,28 @@ if (hiterator == null) { assert database!=null; assert query != null; - if (database.getLexiconRelation().getSearchEngine() == null) + + final ITextIndexer textNdx = + database.getLexiconRelation().getSearchEngine(); + + if (textNdx == null) throw new UnsupportedOperationException( "No free text index?"); + // final long begin = System.nanoTime(); - hiterator = database.getLexiconRelation() - .getSearchEngine().search(query.getLabel(), + + String s = query.getLabel(); + final boolean prefixMatch; + if (s.indexOf('*') >= 0) { + prefixMatch = true; + s = s.replaceAll("\\*", ""); + } else { + prefixMatch = false; + } + + hiterator = textNdx.search(s, query.getLanguage(), - false/* prefixMatch */, + prefixMatch, minRelevance == null ? 0d : minRelevance.doubleValue()/* minCosine */, maxHits == null ? 10000 : maxHits.intValue()+1/* maxRank */, 1000L/* timeout */, TimeUnit.MILLISECONDS); Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-01-18 21:59:35 UTC (rev 4126) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2011-01-19 02:03:23 UTC (rev 4127) @@ -925,6 +925,138 @@ } + { // exact match + + final String searchQuery = "brown cow"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + + " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 2); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + if (!o.getLabel().contains(searchQuery)) + continue; + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + + { // prefix match + + final String searchQuery = "bro*"; + final double minRelevance = 0.0d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + +// " filter regex(?o, \""+searchQuery+"\") " + + "} " + + "order by desc(?score)"; + + log.info("\n"+query); + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + log.info(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results: " + i, i == 3); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + true, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + log.info(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |