From: <wol...@us...> - 2007-07-30 21:50:02
|
Revision: 6313 http://exist.svn.sourceforge.net/exist/?rev=6313&view=rev Author: wolfgang_m Date: 2007-07-30 14:50:00 -0700 (Mon, 30 Jul 2007) Log Message: ----------- Added NGram functions ngram:starts-with and ngram:ends-with. Modified Paths: -------------- trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramModule.java trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramSearch.java trunk/eXist/src/org/exist/dom/Match.java Modified: trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramModule.java =================================================================== --- trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramModule.java 2007-07-30 20:38:48 UTC (rev 6312) +++ trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramModule.java 2007-07-30 21:50:00 UTC (rev 6313) @@ -34,7 +34,9 @@ public static final String PREFIX = "ngram"; public static final FunctionDef[] functions = { - new FunctionDef(NGramSearch.signature, NGramSearch.class) + new FunctionDef(NGramSearch.signatures[0], NGramSearch.class), + new FunctionDef(NGramSearch.signatures[1], NGramSearch.class), + new FunctionDef(NGramSearch.signatures[2], NGramSearch.class) }; public NGramModule() { Modified: trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramSearch.java =================================================================== --- trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramSearch.java 2007-07-30 20:38:48 UTC (rev 6312) +++ trunk/eXist/extensions/indexes/ngram/src/org/exist/xquery/modules/ngram/NGramSearch.java 2007-07-30 21:50:00 UTC (rev 6313) @@ -17,16 +17,35 @@ public class NGramSearch extends Function implements Optimizable { - public final static FunctionSignature signature = + public final static FunctionSignature signatures[] = { new FunctionSignature( new QName("contains", NGramModule.NAMESPACE_URI, NGramModule.PREFIX), "", - new SequenceType[]{ + new SequenceType[] { new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE), new SequenceType(Type.STRING, Cardinality.ZERO_OR_ONE) }, new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE) - ); + ), + new FunctionSignature( + new QName("ends-with", NGramModule.NAMESPACE_URI, NGramModule.PREFIX), + "", + new SequenceType[] { + new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE), + new SequenceType(Type.STRING, Cardinality.ZERO_OR_ONE) + }, + new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE) + ), + new FunctionSignature( + new QName("starts-with", NGramModule.NAMESPACE_URI, NGramModule.PREFIX), + "", + new SequenceType[] { + new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE), + new SequenceType(Type.STRING, Cardinality.ZERO_OR_ONE) + }, + new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE) + ) + }; private LocationStep contextStep = null; protected QName contextQName = null; @@ -34,7 +53,7 @@ private NodeSet preselectResult = null; protected boolean optimizeSelf = false; - public NGramSearch(XQueryContext context) { + public NGramSearch(XQueryContext context, FunctionSignature signature) { super(context, signature); } @@ -164,9 +183,12 @@ if (LOG.isTraceEnabled()) LOG.trace("Found " + nodes.getLength() + " for " + ngram + " in " + (System.currentTimeMillis() - start)); - if (result == null) - result = nodes; - else { + if (result == null) { + if (isCalledAs("starts-with")) + result = startsWith(nodes); + else + result = nodes; + } else { NodeSet temp = new ExtArrayNodeSet(); for (NodeSetIterator iterator = nodes.iterator(); iterator.hasNext();) { NodeProxy next = (NodeProxy) iterator.next(); @@ -201,10 +223,49 @@ result = temp; } } + if (isCalledAs("starts-with")) + result = startsWith(result); + else if (isCalledAs("ends-with")) + result = endsWith(result); return result; } - public int getDependencies() { + private NodeSet startsWith(NodeSet nodes) { + NodeSet temp = new ExtArrayNodeSet(); + for (NodeSetIterator iterator = nodes.iterator(); iterator.hasNext();) { + NodeProxy next = (NodeProxy) iterator.next(); + Match mn = next.getMatches(); + while (mn != null) { + if (mn.hasMatchAt(0)) { + temp.add(next); + break; + } + mn = mn.getNextMatch(); + } + } + return temp; + } + + private NodeSet endsWith(NodeSet nodes) { + NodeSet temp = new ExtArrayNodeSet(); + LOG.debug("Filtering " + nodes.getLength()); + for (NodeSetIterator iterator = nodes.iterator(); iterator.hasNext();) { + NodeProxy next = (NodeProxy) iterator.next(); + String data = next.getNodeValue(); + int len = data.length(); + Match mn = next.getMatches(); + while (mn != null) { + if (mn.hasMatchEndingAt(len)) { + temp.add(next); + break; + } + mn = mn.getNextMatch(); + } + } + return temp; + } + + public int getDependencies() { final Expression stringArg = getArgument(0); if (Type.subTypeOf(stringArg.returnsType(), Type.NODE) && !Dependency.dependsOn(stringArg, Dependency.CONTEXT_ITEM)) { @@ -217,4 +278,4 @@ public int returnsType() { return Type.NODE; } -} +} \ No newline at end of file Modified: trunk/eXist/src/org/exist/dom/Match.java =================================================================== --- trunk/eXist/src/org/exist/dom/Match.java 2007-07-30 20:38:48 UTC (rev 6312) +++ trunk/eXist/src/org/exist/dom/Match.java 2007-07-30 21:50:00 UTC (rev 6313) @@ -158,6 +158,35 @@ return m; } + /** + * Return true if there's a match starting at the given + * character position. + * + * @param pos the position + * @return true if a match starts at the given position + */ + public boolean hasMatchAt(int pos) { + for (int i = 0; i < currentOffset; i++) { + if (offsets[i] == pos) + return true; + } + return false; + } + + /** + * Returns true if the given position is within a match. + * + * @param pos the position + * @return true if the given position is within a match + */ + public boolean hasMatchAround(int pos) { + for (int i = 0; i < currentOffset; i++) { + if (offsets[i] + lengths[i] >= pos) + return true; + } + return false; + } + public Match getNextMatch() { return nextMatch; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |