From: <mrp...@us...> - 2010-12-21 19:30:57
|
Revision: 4034 http://bigdata.svn.sourceforge.net/bigdata/?rev=4034&view=rev Author: mrpersonick Date: 2010-12-21 19:30:51 +0000 (Tue, 21 Dec 2010) Log Message: ----------- added support for queries specifying text search maxHits and minRelevance Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -794,9 +794,10 @@ final StatementPattern sp = it.next().getKey(); final Value s = sp.getSubjectVar().getValue(); final Value p = sp.getPredicateVar().getValue(); - final Value o = sp.getObjectVar().getValue(); - if (s == null && p != null && o == null) { - if (BD.RELEVANCE.equals(p)) { + if (s == null && p != null) { + if (BD.RELEVANCE.equals(p) || + BD.MAX_HITS.equals(p) || + BD.MIN_RELEVANCE.equals(p)) { final Var sVar = sp.getSubjectVar(); Set<StatementPattern> metadata = searchMetadata2.get(sVar); if (metadata != null) { @@ -1507,15 +1508,14 @@ throw new IllegalArgumentException("not a valid magic search: " + sp); } - final ISolutionExpander expander = - new FreeTextSearchExpander(database, (Literal) objValue); - final Var subjVar = sp.getSubjectVar(); final IVariableOrConstant<IV> search = com.bigdata.relation.rule.Var.var(subjVar.getName()); IVariableOrConstant<IV> relevance = new Constant(DummyIV.INSTANCE); + Literal maxHits = null; + Literal minRelevance = null; for (StatementPattern meta : metadata) { if (!meta.getSubjectVar().equals(subjVar)) { @@ -1523,14 +1523,32 @@ } final Value pVal = meta.getPredicateVar().getValue(); final Var oVar = meta.getObjectVar(); - if (pVal == null || oVar.hasValue()) { + final Value oVal = oVar.getValue(); + if (pVal == null) { throw new IllegalArgumentException("illegal metadata: " + meta); } if (BD.RELEVANCE.equals(pVal)) { + if (oVar.hasValue()) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } relevance = com.bigdata.relation.rule.Var.var(oVar.getName()); - } + } else if (BD.MAX_HITS.equals(pVal)) { + if (oVal == null || !(oVal instanceof Literal)) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + maxHits = (Literal) oVal; + } else if (BD.MIN_RELEVANCE.equals(pVal)) { + if (oVal == null || !(oVal instanceof Literal)) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + minRelevance = (Literal) oVal; + } } + final ISolutionExpander expander = + new FreeTextSearchExpander(database, (Literal) objValue, + maxHits, minRelevance); + return new SPOPredicate( new String[] { database.getSPORelation().getNamespace() }, -1, // partitionId Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -56,12 +56,18 @@ private final AbstractTripleStore database; - private final Literal query; + private final Literal query, maxHits, minRelevance; private Set<URI> graphs; public FreeTextSearchExpander(final AbstractTripleStore database, final Literal query) { + this(database, query, null, null); + } + + public FreeTextSearchExpander(final AbstractTripleStore database, + final Literal query, final Literal maxHits, + final Literal minRelevance) { if (database == null) throw new IllegalArgumentException(); @@ -73,6 +79,10 @@ this.query = query; + this.maxHits = maxHits; + + this.minRelevance = minRelevance; + } public boolean backchain() { @@ -135,8 +145,10 @@ // final long begin = System.nanoTime(); hiterator = database.getLexiconRelation() .getSearchEngine().search(query.getLabel(), - query.getLanguage(), false/* prefixMatch */, - 0d/* minCosine */, 10000/* maxRank */, + query.getLanguage(), + false/* prefixMatch */, + minRelevance == null ? 0d : minRelevance.doubleValue()/* minCosine */, + maxHits == null ? 10000 : maxHits.intValue()+1/* maxRank */, 1000L/* timeout */, TimeUnit.MILLISECONDS); // hiterator = database.getSearchEngine().search // ( query.getLabel(), Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -677,7 +677,7 @@ } - public void testWithRelevance() throws Exception { + public void testWithMetadata() throws Exception { final BigdataSail sail = getSail(); try { @@ -755,16 +755,19 @@ " ?s <"+RDFS.LABEL+"> ?o . " + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + " ?o <"+BD.RELEVANCE+"> ?score . " + - "}"; + "} " + + "order by desc(?score)"; final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.setIncludeInferred(true /* includeInferred */); TupleQueryResult result = tupleQuery.evaluate(); + int i = 0; while (result.hasNext()) { - System.err.println(result.next()); + System.err.println(i++ + ": " + result.next().toString()); } + assertTrue("wrong # of results", i == 7); result = tupleQuery.evaluate(); @@ -777,7 +780,7 @@ null, // languageCode false, // prefixMatch 0d, // minCosine - 10000, // maxRank + 10000, // maxRank (=maxResults + 1) 1000L, // timeout TimeUnit.MILLISECONDS // unit ); @@ -788,16 +791,140 @@ final Literal score = vf.createLiteral(hit.getCosine()); final URI s = uris.get(id); final Literal o = literals.get(id); - answer.add(createBindingSet( + final BindingSet bs = createBindingSet( new BindingImpl("s", s), new BindingImpl("o", o), - new BindingImpl("score", score))); + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); } compare(result, answer); } + { + final String searchQuery = "how now brown cow"; + final int maxHits = 5; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \"0.6\" . " + + " ?o <"+BD.MAX_HITS+"> \""+maxHits+"\" . " + + "} " + + "order by desc(?score)"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + System.err.println(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results", i == 5); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + 0d, // minCosine + maxHits+1, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); + } + + compare(result, answer); + + } + + { + final String searchQuery = "how now brown cow"; + final double minRelevance = 0.6d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + + " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + + "} " + + "order by desc(?score)"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + System.err.println(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results", i == 3); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |