From: <wol...@us...> - 2007-10-29 20:47:02
|
Revision: 6818 http://exist.svn.sourceforge.net/exist/?rev=6818&view=rev Author: wolfgang_m Date: 2007-10-29 13:46:58 -0700 (Mon, 29 Oct 2007) Log Message: ----------- Removed old workaround for FTMatchListener. Match highlighting for the full text index is now handled by the full text module. Modified Paths: -------------- trunk/eXist/src/org/exist/fulltext/FTIndexWorker.java trunk/eXist/src/org/exist/storage/NativeTextEngine.java trunk/eXist/src/org/exist/storage/serializers/Serializer.java Added Paths: ----------- trunk/eXist/src/org/exist/fulltext/FTMatch.java trunk/eXist/src/org/exist/fulltext/FTMatchListener.java Modified: trunk/eXist/src/org/exist/fulltext/FTIndexWorker.java =================================================================== --- trunk/eXist/src/org/exist/fulltext/FTIndexWorker.java 2007-10-29 20:09:30 UTC (rev 6817) +++ trunk/eXist/src/org/exist/fulltext/FTIndexWorker.java 2007-10-29 20:46:58 UTC (rev 6818) @@ -1,3 +1,24 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-07 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id$ + */ package org.exist.fulltext; import org.exist.collections.Collection; @@ -4,6 +25,7 @@ import org.exist.dom.*; import org.exist.indexing.*; import org.exist.storage.*; +import org.exist.fulltext.FTMatchListener; import org.exist.storage.txn.Txn; import org.exist.util.DatabaseConfigurationException; import org.exist.util.Occurrences; @@ -30,7 +52,8 @@ private int mode = StreamListener.UNKNOWN; private FTStreamListener listener = new FTStreamListener(); - + private FTMatchListener matchListener = null; + public FTIndexWorker(NativeTextEngine engine) { this.index = engine; } @@ -109,8 +132,22 @@ } public MatchListener getMatchListener(NodeProxy proxy) { - // Not implemented - return null; + boolean needToFilter = false; + Match nextMatch = proxy.getMatches(); + while (nextMatch != null) { + if (nextMatch.getIndexId() == ID) { + needToFilter = true; + break; + } + nextMatch = nextMatch.getNextMatch(); + } + if (!needToFilter) + return null; + if (matchListener == null) + matchListener = new FTMatchListener(proxy); + else + matchListener.reset(proxy); + return matchListener; } public void flush() { Added: trunk/eXist/src/org/exist/fulltext/FTMatch.java =================================================================== --- trunk/eXist/src/org/exist/fulltext/FTMatch.java (rev 0) +++ trunk/eXist/src/org/exist/fulltext/FTMatch.java 2007-10-29 20:46:58 UTC (rev 6818) @@ -0,0 +1,52 @@ +/* + * eXist Open Source Native XML Database + * Copyright (C) 2001-07 The eXist Project + * http://exist-db.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * $Id$ + */ +package org.exist.fulltext; + +import org.exist.dom.Match; +import org.exist.numbering.NodeId; + +public class FTMatch extends Match { + + public FTMatch(int contextId, NodeId nodeId, String matchTerm) { + super(contextId, nodeId, matchTerm); + } + + public FTMatch(int contextId, NodeId nodeId, String matchTerm, int frequency) { + super(contextId, nodeId, matchTerm, frequency); + } + + public FTMatch(Match match) { + super(match); + } + + public Match createInstance(int contextId, NodeId nodeId, String matchTerm) { + return new FTMatch(contextId, nodeId, matchTerm); + } + + public Match newCopy() { + return new FTMatch(this); + } + + public String getIndexId() { + return FTIndexWorker.ID; + } +} \ No newline at end of file Copied: trunk/eXist/src/org/exist/fulltext/FTMatchListener.java (from rev 6817, trunk/eXist/src/org/exist/storage/serializers/FTMatchListener.java) =================================================================== --- trunk/eXist/src/org/exist/fulltext/FTMatchListener.java (rev 0) +++ trunk/eXist/src/org/exist/fulltext/FTMatchListener.java 2007-10-29 20:46:58 UTC (rev 6818) @@ -0,0 +1,216 @@ +package org.exist.fulltext; + +import org.apache.log4j.Logger; +import org.exist.dom.*; +import org.exist.fulltext.FTIndexWorker; +import org.exist.indexing.AbstractMatchListener; +import org.exist.numbering.NodeId; +import org.exist.stax.EmbeddedXMLStreamReader; +import org.exist.util.FastQSort; +import org.exist.util.serializer.AttrList; +import org.xml.sax.SAXException; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Stack; + +/** + * Implementation of {@link org.exist.indexing.MatchListener} for the fulltext index. + * Right now, the serializer will directly plug this into the listener pipeline. This will + * change once we move the fulltext index into its own module. + */ +public class FTMatchListener extends AbstractMatchListener { + + private final static Logger LOG = Logger.getLogger(FTMatchListener.class); + + private Match match; + private Stack offsetStack = null; + + public FTMatchListener() { + } + + public FTMatchListener(NodeProxy proxy) { + reset(proxy); + } + + public boolean hasMatches(NodeProxy proxy) { + Match nextMatch = proxy.getMatches(); + while (nextMatch != null) { + if (nextMatch.getIndexId() == FTIndexWorker.ID) { + return true; + } + nextMatch = nextMatch.getNextMatch(); + } + return false; + } + + public void reset(NodeProxy proxy) { + this.match = proxy.getMatches(); + setNextInChain(null); + + /* Check if an index is defined on an ancestor of the current node. + * If yes, scan the ancestor to get the offset of the first character + * in the current node. For example, if the indexed node is <a>abc<b>de</b></a> + * and we query for //a[text:ngram-contains(., 'de')]/b, proxy will be a <b> node, but + * the offsets of the matches are relative to the start of <a>. + */ + NodeSet ancestors = null; + Match nextMatch = this.match; + while (nextMatch != null) { + if (proxy.getNodeId().isDescendantOf(nextMatch.getNodeId())) { + if (ancestors == null) + ancestors = new ExtArrayNodeSet(); + ancestors.add(new NodeProxy(proxy.getDocument(), nextMatch.getNodeId())); + } + nextMatch = nextMatch.getNextMatch(); + } + if (ancestors != null && !ancestors.isEmpty()) { + for (Iterator i = ancestors.iterator(); i.hasNext();) { + NodeProxy p = (NodeProxy) i.next(); + int startOffset = 0; + try { + XMLStreamReader reader = proxy.getDocument().getBroker().getXMLStreamReader(p, false); + while (reader.hasNext()) { + int ev = reader.next(); + NodeId nodeId = (NodeId) reader.getProperty(EmbeddedXMLStreamReader.PROPERTY_NODE_ID); + if (nodeId.equals(proxy.getNodeId())) + break; + if (ev == XMLStreamReader.CHARACTERS) + startOffset += reader.getText().length(); + } + } catch (IOException e) { + LOG.warn("Problem found while serializing XML: " + e.getMessage(), e); + } catch (XMLStreamException e) { + LOG.warn("Problem found while serializing XML: " + e.getMessage(), e); + } + if (offsetStack == null) + offsetStack = new Stack(); + offsetStack.push(new NodeOffset(p.getNodeId(), startOffset)); + } + } + } + + public void startElement(QName qname, AttrList attribs) throws SAXException { + Match nextMatch = match; + // check if there are any matches in the current element + // if yes, push a NodeOffset object to the stack to track + // the node contents + while (nextMatch != null) { + if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) { + if (offsetStack == null) + offsetStack = new Stack(); + offsetStack.push(new NodeOffset(nextMatch.getNodeId())); + break; + } + nextMatch = nextMatch.getNextMatch(); + } + super.startElement(qname, attribs); + } + + public void endElement(QName qname) throws SAXException { + Match nextMatch = match; + // check if we need to pop the stack + while (nextMatch != null) { + if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) { + offsetStack.pop(); + break; + } + nextMatch = nextMatch.getNextMatch(); + } + super.endElement(qname); + } + + public void characters(CharSequence seq) throws SAXException { + List offsets = null; // a list of offsets to process + if (offsetStack != null) { + // walk through the stack to find matches which start in + // the current string of text + for (int i = 0; i < offsetStack.size(); i++) { + NodeOffset no = (NodeOffset) offsetStack.get(i); + int end = no.offset + seq.length(); + // scan all matches + Match next = match; + while (next != null) { + if (next.getIndexId() == FTIndexWorker.ID && next.getNodeId().equals(no.nodeId)) { + int freq = next.getFrequency(); + for (int j = 0; j < freq; j++) { + Match.Offset offset = next.getOffset(j); + if (offset.getOffset() < end && + offset.getOffset() + offset.getLength() > no.offset) { + // add it to the list to be processed + if (offsets == null) { + offsets = new ArrayList(4); + } + // adjust the offset and add it to the list + int start = offset.getOffset() - no.offset; + int len = offset.getLength(); + if (start < 0) { + len = len - Math.abs(start); + start = 0; + } + if (start + len > seq.length()) + len = seq.length() - start; + offsets.add(new Match.Offset(start, len)); + } + } + } + next = next.getNextMatch(); + } + // add the length of the current text to the element content length + no.offset = end; + } + } + // walk through the matches a second time to find matches in the text node itself + Match next = match; + while (next != null) { + if (next.getIndexId() == FTIndexWorker.ID && + next.getNodeId().equals(getCurrentNode().getNodeId())) { + if (offsets == null) + offsets = new ArrayList(); + int freq = next.getFrequency(); + for (int i = 0; i < freq; i++) { + offsets.add(next.getOffset(i)); + } + } + next = next.getNextMatch(); + } + // now print out the text, marking all matches with a match element + if (offsets != null) { + FastQSort.sort(offsets, 0, offsets.size() - 1); + String s = seq.toString(); + int pos = 0; + for (int i = 0; i < offsets.size(); i++) { + Match.Offset offset = (Match.Offset) offsets.get(i); + if (offset.getOffset() > pos) { + super.characters(s.substring(pos, pos + (offset.getOffset() - pos))); + } + super.startElement(MATCH_ELEMENT, null); + super.characters(s.substring(offset.getOffset(), offset.getOffset() + offset.getLength())); + super.endElement(MATCH_ELEMENT); + pos = offset.getOffset() + offset.getLength(); + } + if (pos < s.length()) { + super.characters(s.substring(pos)); + } + } else + super.characters(seq); + } + + private class NodeOffset { + NodeId nodeId; + int offset = 0; + + public NodeOffset(NodeId nodeId) { + this.nodeId = nodeId; + } + + public NodeOffset(NodeId nodeId, int offset) { + this.nodeId = nodeId; + this.offset = offset; + } + } +} Property changes on: trunk/eXist/src/org/exist/fulltext/FTMatchListener.java ___________________________________________________________________ Name: svn:executable + * Modified: trunk/eXist/src/org/exist/storage/NativeTextEngine.java =================================================================== --- trunk/eXist/src/org/exist/storage/NativeTextEngine.java 2007-10-29 20:09:30 UTC (rev 6817) +++ trunk/eXist/src/org/exist/storage/NativeTextEngine.java 2007-10-29 20:46:58 UTC (rev 6818) @@ -26,8 +26,9 @@ import org.exist.EXistException; import org.exist.collections.Collection; import org.exist.dom.*; -import org.exist.fulltext.FTIndexWorker; import org.exist.fulltext.ElementContent; +import org.exist.fulltext.FTIndexWorker; +import org.exist.fulltext.FTMatch; import org.exist.numbering.NodeId; import org.exist.security.PermissionDeniedException; import org.exist.storage.analysis.TextToken; @@ -64,8 +65,6 @@ */ public class NativeTextEngine extends TextSearchEngine implements ContentLoadingObserver { - public static final String FT_MATCH_ID = NativeTextEngine.class.getName(); - public static final String FILE_NAME = "words.dbx"; public static final String FILE_KEY_IN_CONFIG = "db-connection.words"; @@ -497,7 +496,7 @@ throw new IllegalArgumentException("Invalid section type in '" + dbTokens.getFile().getName() + "'"); } if (parent != null) { - Match match = new FtMatch(-1, nodeId, token, freq); + Match match = new FTMatch(-1, nodeId, token, freq); readOccurrences(freq, is, match, token.length()); if (axis == NodeSet.ANCESTOR) { parent.addMatch(match); @@ -513,7 +512,7 @@ } // otherwise, we add all text nodes without check } else { - Match match = new FtMatch(-1, nodeId, token, freq); + Match match = new FTMatch(-1, nodeId, token, freq); readOccurrences(freq, is, match, token.length()); storedNode.addMatch(match); result.add(storedNode, Constants.NO_SIZE_HINT); @@ -1393,7 +1392,7 @@ throw new IllegalArgumentException("Invalid section type in '" + dbTokens.getFile().getName() + "'"); } if (parentNode != null) { - Match match = new FtMatch(-1, nodeId, word.toString(), freq); + Match match = new FTMatch(-1, nodeId, word.toString(), freq); readOccurrences(freq, is, match, word.length()); int sizeHint = contextSet.getSizeHint(storedDocument); if (axis == NodeSet.ANCESTOR) { @@ -1406,7 +1405,7 @@ } else is.skip(freq); } else { - Match match = new FtMatch(-1, nodeId, word.toString(), freq); + Match match = new FTMatch(-1, nodeId, word.toString(), freq); readOccurrences(freq, is, match, word.length()); storedNode.addMatch(match); result.add(storedNode, Constants.NO_SIZE_HINT); @@ -1699,31 +1698,4 @@ else return "no word"; } } - - public class FtMatch extends Match { - - public FtMatch(int contextId, NodeId nodeId, String matchTerm) { - super(contextId, nodeId, matchTerm); - } - - public FtMatch(int contextId, NodeId nodeId, String matchTerm, int frequency) { - super(contextId, nodeId, matchTerm, frequency); - } - - public FtMatch(Match match) { - super(match); - } - - public Match createInstance(int contextId, NodeId nodeId, String matchTerm) { - return new FtMatch(contextId, nodeId, matchTerm); - } - - public Match newCopy() { - return new FtMatch(this); - } - - public String getIndexId() { - return FT_MATCH_ID; - } - } } Modified: trunk/eXist/src/org/exist/storage/serializers/Serializer.java =================================================================== --- trunk/eXist/src/org/exist/storage/serializers/Serializer.java 2007-10-29 20:09:30 UTC (rev 6817) +++ trunk/eXist/src/org/exist/storage/serializers/Serializer.java 2007-10-29 20:46:58 UTC (rev 6818) @@ -167,10 +167,6 @@ protected LexicalHandler lexicalHandler = null; protected User user = null; - // match listener for the fulltext index. to be removed once the index has - // been moved to the new architecture - private FTMatchListener ftmatch = new FTMatchListener(); - public Serializer(DBBroker broker, Configuration config) { this.broker = broker; factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); @@ -552,12 +548,6 @@ if (root != null && getHighlightingMode() != TAG_NONE) { IndexController controller = broker.getIndexController(); MatchListener listener = controller.getMatchListener(root); - if (ftmatch.hasMatches(root)) { - ftmatch.reset(root); - ftmatch.setNextInChain(receiver); - receiver = ftmatch; - LOG.debug("Applying FTMatchListener"); - } if (listener != null) { MatchListener last = (MatchListener) listener.getLastInChain(); last.setNextInChain(receiver); @@ -571,12 +561,6 @@ if (getHighlightingMode() != TAG_NONE) { IndexController controller = broker.getIndexController(); MatchListener listener = controller.getMatchListener(p); - if (ftmatch.hasMatches(p)) { - ftmatch.reset(p); - ftmatch.setNextInChain(receiver); - receiver = ftmatch; - LOG.debug("Applying FTMatchListener"); - } if (listener != null) { MatchListener last = (MatchListener) listener.getLastInChain(); last.setNextInChain(receiver); @@ -737,11 +721,6 @@ if (root != null && getHighlightingMode() != TAG_NONE) { IndexController controller = broker.getIndexController(); MatchListener listener = controller.getMatchListener(root); - if (ftmatch.hasMatches(root)) { - ftmatch.reset(root); - ftmatch.setNextInChain(receiver); - receiver = ftmatch; - } if (listener != null) { MatchListener last = (MatchListener) listener.getLastInChain(); last.setNextInChain(receiver); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |