From: <di...@us...> - 2011-04-30 14:08:11
|
Revision: 14349 http://exist.svn.sourceforge.net/exist/?rev=14349&view=rev Author: dizzzz Date: 2011-04-30 14:08:03 +0000 (Sat, 30 Apr 2011) Log Message: ----------- [ignore] simplified function Added Paths: ----------- branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java Removed Paths: ------------- branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java Deleted: branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java =================================================================== --- branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java 2011-04-30 13:16:25 UTC (rev 14348) +++ branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java 2011-04-30 14:08:03 UTC (rev 14349) @@ -1,315 +0,0 @@ -package org.exist.xquery.modules.lucene; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import org.apache.log4j.Logger; -import org.apache.lucene.queryParser.ParseException; -import org.exist.dom.DocumentSet; -import org.exist.dom.NodeSet; -import org.exist.dom.QName; -import org.exist.dom.VirtualNodeSet; -import org.exist.indexing.lucene.LuceneIndex; -import org.exist.indexing.lucene.LuceneIndexWorker; -import org.exist.storage.ElementValue; -import org.exist.xquery.*; -import org.exist.xquery.value.FunctionParameterSequenceType; -import org.exist.xquery.value.FunctionReturnSequenceType; -import org.exist.xquery.value.Item; -import org.exist.xquery.value.NodeValue; -import org.exist.xquery.value.Sequence; -import org.exist.xquery.value.SequenceType; -import org.exist.xquery.value.Type; -import org.w3c.dom.Element; - -public class Search extends Function implements Optimizable { - - protected static final Logger logger = Logger.getLogger(Search.class); - - public final static FunctionSignature[] signatures = { - new FunctionSignature( - new QName("search", LuceneModule.NAMESPACE_URI, LuceneModule.PREFIX), - "Queries a node set using a Lucene full text index; a lucene index " + - "must already be defined on the nodes, because if no index is available " + - "on a node, nothing will be found. Indexes on descendant nodes are not " + - "used. The context of the Lucene query is determined by the given input " + - "node set. The query is specified either as a query string based on " + - "Lucene's default query syntax or as an XML fragment. " + - "See http://exist-db.org/lucene.html#N1029E for complete documentation.", - new SequenceType[] { -// new FunctionParameterSequenceType("nodes", Type.NODE, Cardinality.ZERO_OR_MORE, -// "The node set to search using a Lucene full text index which is defined on those nodes"), - new FunctionParameterSequenceType("query", Type.ITEM, Cardinality.EXACTLY_ONE, - "The query to search for, provided either as a string or text in Lucene's default query " + - "syntax or as an XML fragment to bypass Lucene's default query parser") - }, - new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, - "all nodes from the input node set matching the query. match highlighting information " + - "will be available for all returned nodes. Lucene's match score can be retrieved via " + - "the ft:score function.") - ), - new FunctionSignature( - new QName("search", LuceneModule.NAMESPACE_URI, LuceneModule.PREFIX), - "Queries a node set using a Lucene full text index; a lucene index " + - "must already be defined on the nodes, because if no index is available " + - "on a node, nothing will be found. Indexes on descendant nodes are not " + - "used. The context of the Lucene query is determined by the given input " + - "node set. The query is specified either as a query string based on " + - "Lucene's default query syntax or as an XML fragment. " + - "See http://exist-db.org/lucene.html#N1029E for complete documentation.", - new SequenceType[] { -// new FunctionParameterSequenceType("nodes", Type.NODE, Cardinality.ZERO_OR_MORE, -// "The node set to search using a Lucene full text index which is defined on those nodes"), - new FunctionParameterSequenceType("query", Type.ITEM, Cardinality.EXACTLY_ONE, - "The query to search for, provided either as a string or text in Lucene's default query " + - "syntax or as an XML fragment to bypass Lucene's default query parser"), - new FunctionParameterSequenceType("options", Type.NODE, Cardinality.ZERO_OR_ONE, - "An XML fragment containing options to be passed to Lucene's query parser. The following " + - "options are supported (a description can be found in the docs):\n" + - "<options>\n" + - " <default-operator>and|or</default-operator>\n" + - " <phrase-slop>number</phrase-slop>\n" + - " <leading-wildcard>yes|no</leading-wildcard>\n" + - " <filter-rewrite>yes|no</filter-rewrite>\n" + - "</options>") - }, - new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, - "all nodes from the input node set matching the query. match highlighting information " + - "will be available for all returned nodes. Lucene's match score can be retrieved via " + - "the ft:score function.") - ) - }; - - private LocationStep contextStep = null; - protected QName contextQName = null; - protected int axis = Constants.UNKNOWN_AXIS; - private NodeSet preselectResult = null; - protected boolean optimizeSelf = false; - protected boolean optimizeChild = false; - - public Search(XQueryContext context, FunctionSignature signature) { - super(context, signature); - } - - public void setArguments(List<Expression> arguments) throws XPathException { - Expression path = arguments.get(0); - steps.add(path); - -// Expression arg = arguments.get(1); -// arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, -// new org.exist.xquery.util.Error(org.exist.xquery.util.Error.FUNC_PARAM_CARDINALITY, "2", mySignature)); -// steps.add(arg); -// -// if (arguments.size() == 3) { -// arg = arguments.get(2); -// arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg, -// new org.exist.xquery.util.Error(org.exist.xquery.util.Error.FUNC_PARAM_CARDINALITY, "2", mySignature)); -// arg = new DynamicTypeCheck(context, Type.ELEMENT, arg); -// steps.add(arg); -// } - } - - /* (non-Javadoc) - * @see org.exist.xquery.PathExpr#analyze(org.exist.xquery.Expression) - */ - public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { - super.analyze(new AnalyzeContextInfo(contextInfo)); - - List<LocationStep> steps = BasicExpressionVisitor.findLocationSteps(getArgument(0)); - if (!steps.isEmpty()) { - LocationStep firstStep = steps.get(0); - LocationStep lastStep = steps.get(steps.size() - 1); - if (steps.size() == 1 && firstStep.getAxis() == Constants.SELF_AXIS) { - Expression outerExpr = contextInfo.getContextStep(); - if (outerExpr != null && outerExpr instanceof LocationStep) { - LocationStep outerStep = (LocationStep) outerExpr; - NodeTest test = outerStep.getTest(); - if (test.getName() == null) - contextQName = new QName(null, null, null); - else if (test.isWildcardTest()) - contextQName = test.getName(); - else - contextQName = new QName(test.getName()); - if (outerStep.getAxis() == Constants.ATTRIBUTE_AXIS || outerStep.getAxis() == Constants.DESCENDANT_ATTRIBUTE_AXIS) - contextQName.setNameType(ElementValue.ATTRIBUTE); - contextStep = firstStep; - axis = outerStep.getAxis(); - optimizeSelf = true; - } - } else { - NodeTest test = lastStep.getTest(); - if (test.getName() == null) - contextQName = new QName(null, null, null); - else if (test.isWildcardTest()) - contextQName = test.getName(); - else - contextQName = new QName(test.getName()); - if (lastStep.getAxis() == Constants.ATTRIBUTE_AXIS || lastStep.getAxis() == Constants.DESCENDANT_ATTRIBUTE_AXIS) - contextQName.setNameType(ElementValue.ATTRIBUTE); - axis = firstStep.getAxis(); - optimizeChild = steps.size() == 1 && - (axis == Constants.CHILD_AXIS || axis == Constants.ATTRIBUTE_AXIS); - contextStep = lastStep; - } - } - } - - public boolean canOptimize(Sequence contextSequence) { - return contextQName != null; - } - - public boolean optimizeOnSelf() { - return optimizeSelf; - } - - public boolean optimizeOnChild() { - return optimizeChild; - } - - public int getOptimizeAxis() { - return axis; - } - - public NodeSet preSelect(Sequence contextSequence, boolean useContext) throws XPathException { - if (contextSequence != null && !contextSequence.isPersistentSet()) - // in-memory docs won't have an index - return NodeSet.EMPTY_SET; - - long start = System.currentTimeMillis(); - // the expression can be called multiple times, so we need to clear the previous preselectResult - preselectResult = null; - LuceneIndexWorker index = (LuceneIndexWorker) - context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID); - DocumentSet docs = contextSequence.getDocumentSet(); - Item key = getKey(contextSequence, null); - List<QName> qnames = new ArrayList<QName>(1); - qnames.add(contextQName); - Properties options = parseOptions(contextSequence, null); - try { - if (Type.subTypeOf(key.getType(), Type.ELEMENT)) - preselectResult = index.query(context, getExpressionId(), docs, useContext ? contextSequence.toNodeSet() : null, - qnames, (Element) ((NodeValue)key).getNode(), NodeSet.DESCENDANT, options); - else - preselectResult = index.query(context, getExpressionId(), docs, useContext ? contextSequence.toNodeSet() : null, - qnames, key.getStringValue(), NodeSet.DESCENDANT, options); - } catch (IOException e) { - throw new XPathException(this, "Error while querying full text index: " + e.getMessage(), e); - } catch (ParseException e) { - throw new XPathException(this, "Error while querying full text index: " + e.getMessage(), e); - } - LOG.debug("Lucene query took " + (System.currentTimeMillis() - start)); - if( context.getProfiler().traceFunctions() ) { - context.getProfiler().traceIndexUsage( context, "lucene", this, PerformanceStats.OPTIMIZED_INDEX, System.currentTimeMillis() - start ); - } - return preselectResult; - } - - public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { - - if (contextItem != null) - contextSequence = contextItem.toSequence(); - - if (contextSequence != null && !contextSequence.isPersistentSet()) - // in-memory docs won't have an index - return Sequence.EMPTY_SEQUENCE; - - NodeSet result; - if (preselectResult == null) { - long start = System.currentTimeMillis(); - Sequence input = getArgument(0).eval(contextSequence); - if (!(input instanceof VirtualNodeSet) && input.isEmpty()) - result = NodeSet.EMPTY_SET; - else { - NodeSet inNodes = input.toNodeSet(); - DocumentSet docs = inNodes.getDocumentSet(); - LuceneIndexWorker index = (LuceneIndexWorker) - context.getBroker().getIndexController().getWorkerByIndexId(LuceneIndex.ID); -// Item key = getKey(contextSequence, contextItem); - String query = getQuery(contextSequence, contextItem); - List<QName> qnames = null; - if (contextQName != null) { - qnames = new ArrayList<QName>(1); - qnames.add(contextQName); - } - Properties options = parseOptions(contextSequence, contextItem); -// try { -// if (Type.subTypeOf(key.getType(), Type.ELEMENT)) - result = index.search(context, docs, query); -// else -// result = index.search(context, docs, query); // options - -// } catch (IOException e) { -// throw new XPathException(this, e.getMessage()); -// } catch (ParseException e) { -// throw new XPathException(this, e.getMessage()); -// } - } - if( context.getProfiler().traceFunctions() ) { - context.getProfiler().traceIndexUsage( context, "lucene", this, PerformanceStats.BASIC_INDEX, System.currentTimeMillis() - start ); - } - } else { - contextStep.setPreloadedData(contextSequence.getDocumentSet(), preselectResult); - result = getArgument(0).eval(contextSequence).toNodeSet(); - } - return result; - } - - protected Item getKey(Sequence contextSequence, Item contextItem) throws XPathException { - Sequence keySeq = getArgument(1).eval(contextSequence, contextItem); - Item key = keySeq.itemAt(0); - if (!(Type.subTypeOf(key.getType(), Type.STRING) || Type.subTypeOf(key.getType(), Type.NODE))) - throw new XPathException(this, "Second argument to ft:query should either be a query string or " + - "an XML element describing the query. Found: " + Type.getTypeName(key.getType())); - return key; - } - - protected String getQuery(Sequence contextSequence, Item contextItem) throws XPathException { - Sequence keySeq = getArgument(0).eval(contextSequence, contextItem); - Item key = keySeq.itemAt(0); - return key.getStringValue(); - } - - public int getDependencies() { - final Expression stringArg = getArgument(0); - if (Type.subTypeOf(stringArg.returnsType(), Type.NODE) && - !Dependency.dependsOn(stringArg, Dependency.CONTEXT_ITEM)) { - return Dependency.CONTEXT_SET; - } else { - return Dependency.CONTEXT_SET + Dependency.CONTEXT_ITEM; - } - } - - public int returnsType() { - return Type.NODE; - } - - protected Properties parseOptions(Sequence contextSequence, Item contextItem) throws XPathException { - if (getArgumentCount() < 3) - return null; - Properties options = new Properties(); - Sequence optSeq = getArgument(2).eval(contextSequence, contextItem); - NodeValue optRoot = (NodeValue) optSeq.itemAt(0); - try { - XMLStreamReader reader = context.getXMLStreamReader(optRoot); - reader.next(); - reader.next(); - while (reader.hasNext()) { - int status = reader.next(); - if (status == XMLStreamReader.START_ELEMENT) { - options.put(reader.getLocalName(), reader.getElementText()); - } - } - return options; - } catch (XMLStreamException e) { - throw new XPathException(this, "Error while parsing options to ft:query: " + e.getMessage(), e); - } catch (IOException e) { - throw new XPathException(this, "Error while parsing options to ft:query: " + e.getMessage(), e); - } - } -} - Copied: branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java (from rev 14346, branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Index.java) =================================================================== --- branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java (rev 0) +++ branches/dizzzz/non-xml-indexing/extensions/indexes/lucene/src/org/exist/xquery/modules/lucene/Search.java 2011-04-30 14:08:03 UTC (rev 14349) @@ -0,0 +1,107 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-07 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * \$Id\$ + */ +package org.exist.xquery.modules.lucene; + +import org.apache.log4j.Logger; + +import org.exist.dom.DocumentImpl; +import org.exist.dom.QName; +import org.exist.indexing.StreamListener; +import org.exist.indexing.lucene.LuceneIndex; +import org.exist.indexing.lucene.LuceneIndexWorker; +import org.exist.storage.lock.Lock; +import org.exist.xmldb.XmldbURI; + +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +public class Search extends BasicFunction { + + private static final Logger logger = Logger.getLogger(Search.class); + public final static FunctionSignature signatures[] = { + new FunctionSignature( + new QName("search", LuceneModule.NAMESPACE_URI, LuceneModule.PREFIX), + "Search for (non-XML) data with lucene", + new SequenceType[]{ + new FunctionParameterSequenceType("path", Type.STRING, Cardinality.ZERO_OR_MORE, + "Path of documents or document in database."), + new FunctionParameterSequenceType("query", Type.STRING, Cardinality.EXACTLY_ONE, + "query string") + }, + new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, + "The documents that match the query")) + }; + + /* + * Constructor + */ + public Search(XQueryContext context, FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { + + DocumentImpl doc = null; + try { + // Get first parameter, this is the document + String path = args[0].itemAt(0).getStringValue(); + + doc = context.getBroker() + .getXMLResource(XmldbURI.xmldbUriFor(path), Lock.READ_LOCK); + + if(doc==null){ + throw new XPathException("Document "+path + " does not exist."); + } + + // Get node from second parameter + NodeValue descriptor = (NodeValue) args[1].itemAt(0); + + // Retrieve Lucene + LuceneIndexWorker index = (LuceneIndexWorker) context.getBroker() + .getIndexController().getWorkerByIndexId(LuceneIndex.ID); + + // Order is important + index.setDocument(doc, StreamListener.STORE); + index.setMode(StreamListener.STORE); + + // Pas document and index instructions to indexer + index.indexNonXML(doc, descriptor); + + // Make sure things are written + // TODO: to be removed? + index.flush(); + + } catch (Exception ex) { // PermissionDeniedException + logger.error(ex); + throw new XPathException(ex); + + } finally { + if (doc != null) { + doc.getUpdateLock().release(Lock.READ_LOCK); + } + } + + // Return nothing [status would be nice] + return Sequence.EMPTY_SEQUENCE; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |