From: <br...@us...> - 2007-10-30 10:57:17
|
Revision: 6823 http://exist.svn.sourceforge.net/exist/?rev=6823&view=rev Author: brihaye Date: 2007-10-30 03:57:12 -0700 (Tue, 30 Oct 2007) Log Message: ----------- introduced hints in search methods. One more step to go : unify search methods. Modified Paths: -------------- branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/indexing/fulltext/FTIndexWorker.java branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FulltextSearch.java branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FuzzyMatchAll.java branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/MatchRegexp.java Modified: branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/indexing/fulltext/FTIndexWorker.java =================================================================== --- branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/indexing/fulltext/FTIndexWorker.java 2007-10-30 10:00:14 UTC (rev 6822) +++ branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/indexing/fulltext/FTIndexWorker.java 2007-10-30 10:57:12 UTC (rev 6823) @@ -504,9 +504,9 @@ Map hints) throws TerminatedException { //TODO : use this key List qnames = hints == null ? null : (List)hints.get(QNAMES_KEY); - //Expects a StringValue + //Expects a String Object start = hints == null ? null : hints.get(START_VALUE); - //Expects a StringValue + //Expects a String //TODO : possibly use this key ? Object end = hints == null ? null : hints.get(END_VALUE); TermMatcher matcher = hints == null ? null : (TermMatcher)hints.get(TOKEN_MATCHER); @@ -555,9 +555,16 @@ } //TODO : use generic index as well : or, better ?, add a method for generic index - public NodeSet search(XQueryContext context, int contextId, DocumentSet docs, NodeSet contextSet, int axis, List qnames, String token) - throws TerminatedException { - if (qnames == null || qnames.isEmpty()) + public NodeSet search(XQueryContext context, int contextId, DocumentSet docs, NodeSet contextSet, int axis, Map hints) + throws TerminatedException { + List qnames = hints == null ? null : (List)hints.get(QNAMES_KEY); + //Expects a String + Object start = hints == null ? null : hints.get(START_VALUE); + //Expects a String + //TODO : possibly use this key ? + Object end = hints == null ? null : hints.get(END_VALUE); + TermMatcher matcher = hints == null ? null : (TermMatcher)hints.get(TOKEN_MATCHER); + if (qnames == null || qnames.isEmpty()) //TODO : mmmmh : that prevents the use of the generic index. See above. qnames = getDefinedIndexes(context.getBroker(), docs); final NodeSet result = new ExtArrayNodeSet(docs.getLength(), 250); @@ -566,19 +573,14 @@ for (int i = 0; i < qnames.size(); i++) { QName qname = (QName) qnames.get(i); //TODO : use generic index as well according to the collection's configuration - FTQNamedTokenKey key = new FTQNamedTokenKey(collectionId, qname, context.getBroker().getSymbols(), token); + FTQNamedTokenKey key = new FTQNamedTokenKey(collectionId, qname, context.getBroker().getSymbols(), (String)start); final Lock lock = index.db.getLock(); try { lock.acquire(Lock.READ_LOCK); - //TODO : not sure about the 0 - //TODO : externalize that and pass the matcher as a "hint" - TermMatcher matcher = new RegexMatcher(token, DBBroker.MATCH_EXACT, 0, true); //SearchCallback cb = new SearchCallback(context, contextId, docs, contextSet, result, axis, qname, token); SearchCallback2 cb = new SearchCallback2(context, contextId, docs, contextSet, result, axis, qname, matcher); //TODO : revisit, particularly for the right truncature ; we match have several strategies there index.db.query(new IndexQuery(IndexQuery.TRUNC_RIGHT, key), cb); - } catch (EXistException e) { - LOG.warn("Failed to build a TermMatcher", e); } catch (LockException e) { LOG.warn("Failed to acquire lock for '" + index.db.getFile().getName() + "'", e); } catch (IOException e) { @@ -595,7 +597,14 @@ //TODO : remove this method ; replace with search() when the start key and the end key would be the same ones public NodeSet searchExact(XQueryContext context, int contextId, DocumentSet docs, NodeSet contextSet, int axis, - QName qname, String token) throws TerminatedException { + Map hints) throws TerminatedException { + List qname = hints == null ? null : (List)hints.get(QNAMES_KEY); + //Expects a String + Object start = hints == null ? null : hints.get(START_VALUE); + //Expects a String + //TODO : possibly use this key ? + Object end = hints == null ? null : hints.get(END_VALUE); + TermMatcher matcher = hints == null ? null : (TermMatcher)hints.get(TOKEN_MATCHER); final NodeSet result = new ExtArrayNodeSet(docs.getLength(), 250); for (Iterator iter = docs.getCollectionIterator(); iter.hasNext();) { final Collection collection = (Collection) iter.next(); @@ -603,24 +612,18 @@ Value key; if (qname == null) //TODO : check that the collecion is generic indexed ? - key = new FTGenericTokenKey(collectionId, token); + key = new FTGenericTokenKey(collectionId, (String)start); else { //TODO : check that the collecion is qname indexed ? - key = new FTQNamedTokenKey(collectionId, qname, context.getBroker().getSymbols(), token); + key = new FTQNamedTokenKey(collectionId, (QName)qname.get(0), context.getBroker().getSymbols(), (String)start); // LOG.debug("Using qname: " + qname.toString() + " " + key.dump() + " '" + key.toString() + "'"); } final Lock lock = index.db.getLock(); try { lock.acquire(Lock.READ_LOCK); - //SearchExactCallback cb = new SearchExactCallback(context, contextId, docs, contextSet, result, axis, qname, token); - //TODO : not sure about the 0 - //TODO : externalize that and pass the matcher as a "hint" - TermMatcher matcher = new RegexMatcher(token, DBBroker.MATCH_EXACT, 0, true); //SearchExactCallback cb = new SearchExactCallback(context, contextId, docs, contextSet, result, axis, qname, token); - SearchCallback2 cb = new SearchCallback2(context, contextId, docs, contextSet, result, axis, qname, matcher); - index.db.query(new IndexQuery(IndexQuery.EQ, key), cb); - } catch (EXistException e) { - LOG.warn("Failed to build a TermMatcher", e); + SearchCallback2 cb = new SearchCallback2(context, contextId, docs, contextSet, result, axis, (QName)qname.get(0), matcher); + index.db.query(new IndexQuery(IndexQuery.EQ, key), cb); } catch (LockException e) { LOG.warn("Failed to acquire lock for '" + index.db.getFile().getName() + "'", e); } catch (IOException e) { Modified: branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FulltextSearch.java =================================================================== --- branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FulltextSearch.java 2007-10-30 10:00:14 UTC (rev 6822) +++ branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FulltextSearch.java 2007-10-30 10:57:12 UTC (rev 6823) @@ -1,8 +1,11 @@ package org.exist.xquery.modules.fulltext; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import org.exist.EXistException; import org.exist.dom.DocumentSet; import org.exist.dom.ExtArrayNodeSet; import org.exist.dom.Match; @@ -12,6 +15,9 @@ import org.exist.dom.QName; import org.exist.indexing.fulltext.FTIndex; import org.exist.indexing.fulltext.FTIndexWorker; +import org.exist.indexing.fulltext.utils.RegexMatcher; +import org.exist.indexing.fulltext.utils.TermMatcher; +import org.exist.storage.DBBroker; import org.exist.storage.ElementValue; import org.exist.xquery.AnalyzeContextInfo; import org.exist.xquery.Atomize; @@ -155,7 +161,7 @@ qnames = new ArrayList(1); qnames.add(contextQName); } - //TODO : what is qnames == null ? + //TODO : what if qnames == null ? result = processMatches(index, docs, qnames, token, inNodes, NodeSet.ANCESTOR); } } else { @@ -167,10 +173,22 @@ return result; } - private NodeSet processMatches(FTIndexWorker index, DocumentSet docs, List qnames, String token, NodeSet nodeSet, int axis) throws TerminatedException { + private NodeSet processMatches(FTIndexWorker index, DocumentSet docs, List qnames, String token, NodeSet nodeSet, int axis) + throws TerminatedException, XPathException { NodeSet result = null; long start = System.currentTimeMillis(); - NodeSet nodes = index.search(context, getExpressionId(), docs, nodeSet, axis, qnames, token); + Map hints = new HashMap(); + TermMatcher matcher = null; + //TODO : not sure about the 0 + try { + matcher = new RegexMatcher(token, DBBroker.MATCH_EXACT, 0, true); + } catch (EXistException e) { + throw new XPathException(e); + } + hints.put(FTIndexWorker.TOKEN_MATCHER, matcher); + hints.put(FTIndexWorker.START_VALUE, token); + hints.put(FTIndexWorker.QNAMES_KEY, qnames); + NodeSet nodes = index.search(context, getExpressionId(), docs, nodeSet, axis, hints); if (LOG.isTraceEnabled()) LOG.trace("Found " + nodes.getLength() + " for " + token + " in " + (System.currentTimeMillis() - start)); Modified: branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FuzzyMatchAll.java =================================================================== --- branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FuzzyMatchAll.java 2007-10-30 10:00:14 UTC (rev 6822) +++ branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/FuzzyMatchAll.java 2007-10-30 10:57:12 UTC (rev 6823) @@ -91,6 +91,7 @@ Map hints = new HashMap(); matcher = new FuzzyMatcher(term, threshold); hints.put(FTIndexWorker.TOKEN_MATCHER, matcher); + //TODO : check this substring hints.put(FTIndexWorker.START_VALUE, term.substring(0, 1).toLowerCase()); hits[k] = index.getNodes( Modified: branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/MatchRegexp.java =================================================================== --- branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/MatchRegexp.java 2007-10-30 10:00:14 UTC (rev 6822) +++ branches/meshram/eXist/extensions/indexes/fulltext/src/org/exist/xquery/modules/fulltext/MatchRegexp.java 2007-10-30 10:57:12 UTC (rev 6823) @@ -374,9 +374,10 @@ protected NodeSet[] getMatches(DocumentSet docs, NodeSet contextSet, int axis, QName qname, List terms, boolean matchAll) throws XPathException { FTIndexWorker index = (FTIndexWorker) - context.getBroker().getIndexController().getWorkerByIndexId(FTIndex.ID); - + context.getBroker().getIndexController().getWorkerByIndexId(FTIndex.ID); NodeSet hits[] = new NodeSet[terms.size()]; + Map hints = new HashMap(); + TermMatcher matcher; for (int k = 0; k < terms.size(); k++) { String token = (String) terms.get(k); //Moved from FTIndexWorker. TODO : rethink @@ -399,8 +400,17 @@ token = token.toLowerCase(); if (((FTIndex)index.getIndex()).isStemming()) token = ((FTIndex)index.getIndex()).getStemmer().stem(token); - - hits[k] = index.searchExact(context, contextId, docs, contextSet, axis, qname, token); + + try { + // TODO : not sure about the 0 + matcher = new RegexMatcher(token, DBBroker.MATCH_EXACT, 0, true); + } catch (EXistException e) { + throw new XPathException(e); + } + hints.put(FTIndexWorker.TOKEN_MATCHER, matcher); + hints.put(FTIndexWorker.START_VALUE, token); + hints.put(FTIndexWorker.QNAMES_KEY, qname); + hits[k] = index.searchExact(context, contextId, docs, contextSet, axis, hints); break; default : //Return early @@ -423,17 +433,15 @@ } start = buf; } - try { - Map hints = new HashMap(); - TermMatcher matcher = new RegexMatcher(token, matchType, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE, + try { + matcher = new RegexMatcher(token, matchType, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE, matchAll); - hints.put(FTIndexWorker.TOKEN_MATCHER, matcher); - hints.put(FTIndexWorker.START_VALUE, start); - hits[k] = index.getNodes(context, contextId, docs, contextSet, axis, qname, hints); } catch (EXistException e) { throw new XPathException(e); - } - + } + hints.put(FTIndexWorker.TOKEN_MATCHER, matcher); + hints.put(FTIndexWorker.START_VALUE, start); + hits[k] = index.getNodes(context, contextId, docs, contextSet, axis, qname, hints); } LOG.debug("Matches for " + token + ": " + hits[k].getLength()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |