From: <fn...@us...> - 2013-07-24 09:23:37
|
Revision: 4278 http://openutils.svn.sourceforge.net/openutils/?rev=4278&view=rev Author: fnecci Date: 2013-07-24 09:23:32 +0000 (Wed, 24 Jul 2013) Log Message: ----------- CRIT-55 Added unit test and patch taken from https://issues.apache.org/jira/browse/JCR-2732 Modified Paths: -------------- magnoliamodules/trunk/openutils-mgnlcriteria/src/test/java/net/sourceforge/openutils/mgnlcriteria/advanced/JcrContainsCriteriaSearchTest.java magnoliamodules/trunk/openutils-mgnlcriteria/src/test/resources/crit-bootstrap/website.contains.xml Added Paths: ----------- magnoliamodules/trunk/openutils-mgnlcriteria/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java Added: magnoliamodules/trunk/openutils-mgnlcriteria/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java =================================================================== --- magnoliamodules/trunk/openutils-mgnlcriteria/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (rev 0) +++ magnoliamodules/trunk/openutils-mgnlcriteria/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java 2013-07-24 09:23:32 UTC (rev 4278) @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; +import java.io.StringReader; +import java.util.List; +import java.util.ArrayList; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.util.Version; + +/** + * <code>JackrabbitQueryParser</code> extends the standard lucene query parser + * and adds JCR specific customizations. + */ +public class JackrabbitQueryParser extends QueryParser { + + /** + * The Jackrabbit synonym provider or <code>null</code> if there is none. + */ + private final SynonymProvider synonymProvider; + + private final PerQueryCache cache; + + /** + * Creates a new query parser instance. + * + * @param fieldName the field name. + * @param analyzer the analyzer. + * @param synonymProvider the synonym provider or <code>null</code> if none + * is available. + */ + public JackrabbitQueryParser(String fieldName, + Analyzer analyzer, + SynonymProvider synonymProvider, + PerQueryCache cache) { + super(Version.LUCENE_24, fieldName, analyzer); + this.synonymProvider = synonymProvider; + this.cache = cache; + setAllowLeadingWildcard(true); + setDefaultOperator(Operator.AND); + } + + /** + * {@inheritDoc} + */ + public Query parse(String textsearch) throws ParseException { + // replace escaped ' with just ' + StringBuffer rewritten = new StringBuffer(); + // the default lucene query parser recognizes 'AND' and 'NOT' as + // keywords. + textsearch = textsearch.replaceAll("AND", "and"); + textsearch = textsearch.replaceAll("NOT", "not"); + boolean escaped = false; + for (int i = 0; i < textsearch.length(); i++) { + if (textsearch.charAt(i) == '\\') { + if (escaped) { + rewritten.append("\\\\"); + escaped = false; + } else { + escaped = true; + } + } else if (textsearch.charAt(i) == '\'') { + if (escaped) { + escaped = false; + } + rewritten.append(textsearch.charAt(i)); + } else if (textsearch.charAt(i) == '~') { + if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) { + // escape tilde so we can use it for similarity query + rewritten.append("\\"); + } + if (escaped) { + rewritten.append('\\'); + escaped = false; + } + rewritten.append('~'); + } else if (textsearch.charAt(i) == ':') { + // fields as known in lucene are not supported + rewritten.append("\\:"); + if (escaped) { + escaped = false; + } + } else { + if (escaped) { + rewritten.append('\\'); + escaped = false; + } + rewritten.append(textsearch.charAt(i)); + } + } + return super.parse(rewritten.toString()); + } + + /** + * Factory method for generating a synonym query. + * Called when parser parses an input term token that has the synonym + * prefix (~term) prepended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getSynonymQuery(String field, String termStr) + throws ParseException { + List<BooleanClause> synonyms = new ArrayList<BooleanClause>(); + synonyms.add(new BooleanClause(getFieldQuery(field, termStr), + BooleanClause.Occur.SHOULD)); + if (synonymProvider != null) { + for (String term : synonymProvider.getSynonyms(termStr)) { + synonyms.add(new BooleanClause(getFieldQuery(field, term), BooleanClause.Occur.SHOULD)); + } + } + if (synonyms.size() == 1) { + return synonyms.get(0).getQuery(); + } else { + return getBooleanQuery(synonyms); + } + } + + + /** + * {@inheritDoc} + */ + protected Query getFieldQuery(String field, String queryText) + throws ParseException { + if (queryText.startsWith("~")) { + // synonym query + return getSynonymQuery(field, queryText.substring(1)); + } else { + return super.getFieldQuery(field, queryText); + } + } + + /** + * {@inheritDoc} + */ + protected Query getPrefixQuery(String field, String termStr) + throws ParseException { + // only create a prefix query when the term is a single word / token + Analyzer a = getAnalyzer(); + TokenStream ts = a.tokenStream(field, new StringReader(termStr)); + int count = 0; + boolean isCJ = false; + try { + TypeAttribute t = ts.addAttribute(TypeAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + count++; + isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type()); + } + ts.end(); + } catch (IOException e) { + throw new ParseException(e.getMessage()); + } finally { + try { + ts.close(); + } catch (IOException e) { + // ignore + } + } + if (count > 1 && isCJ) { + return getFieldQuery(field, termStr); + } else { + return getWildcardQuery(field, termStr + "*"); + } + } + + /** + * {@inheritDoc} + */ + protected Query getWildcardQuery(String field, String termStr) + throws ParseException { + if (getLowercaseExpandedTerms()) { + termStr = termStr.toLowerCase(); + } + return new WildcardQuery(field, null, translateWildcards(termStr), cache); + } + + /** + * Translates unescaped wildcards '*' and '?' into '%' and '_'. + * + * @param input the input String. + * @return the translated String. + */ + private String translateWildcards(String input) { + StringBuffer translated = new StringBuffer(input.length()); + boolean escaped = false; + for (int i = 0; i < input.length(); i++) { + if (input.charAt(i) == '\\') { + if (escaped) { + translated.append("\\\\"); + escaped = false; + } else { + escaped = true; + } + } else if (input.charAt(i) == '*') { + if (escaped) { + translated.append('*'); + escaped = false; + } else { + translated.append('%'); + } + } else if (input.charAt(i) == '?') { + if (escaped) { + translated.append('?'); + escaped = false; + } else { + translated.append('_'); + } + } else if (input.charAt(i) == '%' || input.charAt(i) == '_') { + // escape every occurrence of '%' and '_' + escaped = false; + translated.append('\\').append(input.charAt(i)); + } else { + if (escaped) { + translated.append('\\'); + escaped = false; + } + translated.append(input.charAt(i)); + } + } + return translated.toString(); + } +} Modified: magnoliamodules/trunk/openutils-mgnlcriteria/src/test/java/net/sourceforge/openutils/mgnlcriteria/advanced/JcrContainsCriteriaSearchTest.java =================================================================== --- magnoliamodules/trunk/openutils-mgnlcriteria/src/test/java/net/sourceforge/openutils/mgnlcriteria/advanced/JcrContainsCriteriaSearchTest.java 2013-07-05 14:37:39 UTC (rev 4277) +++ magnoliamodules/trunk/openutils-mgnlcriteria/src/test/java/net/sourceforge/openutils/mgnlcriteria/advanced/JcrContainsCriteriaSearchTest.java 2013-07-24 09:23:32 UTC (rev 4278) @@ -39,7 +39,6 @@ import javax.jcr.Node; import net.sourceforge.openutils.mgnlcriteria.jcr.query.AdvancedResult; -import net.sourceforge.openutils.mgnlcriteria.jcr.query.AdvancedResultItem; import net.sourceforge.openutils.mgnlcriteria.jcr.query.Criteria; import net.sourceforge.openutils.mgnlcriteria.jcr.query.JCRCriteriaFactory; import net.sourceforge.openutils.mgnlcriteria.jcr.query.JCRQueryException; @@ -230,6 +229,30 @@ Assert.assertEquals(CriteriaTestUtils.title(item), "hello \"Milano\" world"); } + @Test + public void testColonEscape() throws Exception + { + String textEnteredByUser = "l:u"; + Criteria criteria = criteria(textEnteredByUser, true); + Assert.assertEquals( + StringUtils.remove(criteria.toXpathExpression(), ' '), + "//*[((@jcr:primaryType='mgnl:content')and(jcr:contains(@title,'l\\:u')))]orderby@jcr:scoredescending"); + AdvancedResult advResult = null; + try + { + advResult = criteria.execute(); + } + catch (JCRQueryException e) + { + Assert.fail("Invalid query. " + e.getMessage()); + } + Assert.assertNotNull(advResult); + Assert.assertEquals(advResult.getTotalSize(), 1); + ResultIterator<? extends Node> items = advResult.getItems(); + Node item = items.next(); + Assert.assertEquals(CriteriaTestUtils.title(item), "hello l:u"); + } + private Criteria criteria(String titleSearch, boolean escape) { Criteria criteria = JCRCriteriaFactory.createCriteria().setWorkspace(RepositoryConstants.WEBSITE); Modified: magnoliamodules/trunk/openutils-mgnlcriteria/src/test/resources/crit-bootstrap/website.contains.xml =================================================================== --- magnoliamodules/trunk/openutils-mgnlcriteria/src/test/resources/crit-bootstrap/website.contains.xml 2013-07-05 14:37:39 UTC (rev 4277) +++ magnoliamodules/trunk/openutils-mgnlcriteria/src/test/resources/crit-bootstrap/website.contains.xml 2013-07-24 09:23:32 UTC (rev 4278) @@ -3,7 +3,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -36,7 +36,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -76,7 +76,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -116,7 +116,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -156,7 +156,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -193,7 +193,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -239,7 +239,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -279,7 +279,7 @@ <sv:property sv:name="jcr:primaryType" sv:type="Name"> <sv:value>mgnl:content</sv:value> </sv:property> - <sv:property sv:name="jcr:mixinTypes" sv:type="Name"> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> <sv:value>mix:lockable</sv:value> </sv:property> <sv:property sv:name="jcr:uuid" sv:type="String"> @@ -315,4 +315,41 @@ </sv:property> </sv:node> </sv:node> + <sv:node sv:name="colon"> + <sv:property sv:name="jcr:primaryType" sv:type="Name"> + <sv:value>mgnl:content</sv:value> + </sv:property> + <sv:property sv:name="jcr:mixinTypes" sv:type="Name" sv:multiple="true"> + <sv:value>mix:lockable</sv:value> + </sv:property> + <sv:property sv:name="jcr:uuid" sv:type="String"> + <sv:value>17d8648e-155e-47aa-b848-bcefd16aeb2e</sv:value> + </sv:property> + <sv:property sv:name="jcr:createdBy" sv:type="String"> + <sv:value>admin</sv:value> + </sv:property> + <sv:property sv:name="title" sv:type="String"> + <sv:value>hello l:u</sv:value> + </sv:property> + <sv:node sv:name="MetaData"> + <sv:property sv:name="jcr:primaryType" sv:type="Name"> + <sv:value>mgnl:metaData</sv:value> + </sv:property> + <sv:property sv:name="jcr:createdBy" sv:type="String"> + <sv:value>admin</sv:value> + </sv:property> + <sv:property sv:name="mgnl:authorid" sv:type="String"> + <sv:value>superuser</sv:value> + </sv:property> + <sv:property sv:name="mgnl:creationdate" sv:type="Date"> + <sv:value>2013-07-24T10:37:29.994+02:00</sv:value> + </sv:property> + <sv:property sv:name="mgnl:lastmodified" sv:type="Date"> + <sv:value>2013-07-24T10:38:00.532+02:00</sv:value> + </sv:property> + <sv:property sv:name="mgnl:template" sv:type="String"> + <sv:value>t-redirect</sv:value> + </sv:property> + </sv:node> + </sv:node> </sv:node> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |