From: <fg...@us...> - 2011-01-01 20:53:26
|
Revision: 3201 http://openutils.svn.sourceforge.net/openutils/?rev=3201&view=rev Author: fgiust Date: 2011-01-01 20:53:20 +0000 (Sat, 01 Jan 2011) Log Message: ----------- CRIT-30 Adds an utility package with commonly used lucene analyzers Modified Paths: -------------- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/Latin1Analyzer.java Added Paths: ----------- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/BaseAnalyzer.java Modified: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java =================================================================== --- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java 2011-01-01 20:52:43 UTC (rev 3200) +++ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/ASCIIFoldingAnalyzer.java 2011-01-01 20:53:20 UTC (rev 3201) @@ -18,61 +18,25 @@ */ package net.sourceforge.openutils.mgnlcriteria.utils; -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; /** * @author molaschi * @version $Id$ */ -public class ASCIIFoldingAnalyzer extends Analyzer +public class ASCIIFoldingAnalyzer extends BaseAnalyzer { @Override - public TokenStream tokenStream(String fieldName, Reader reader) + protected TokenStream tokenFiltersChain(TokenStream tokenStream) { - StandardTokenizer tokenStream = new StandardTokenizer(reader); - TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); return result; } - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException - { - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); - if (streams == null) - { - streams = new SavedStreams(); - setPreviousTokenStream(streams); - streams.tokenStream = new StandardTokenizer(reader); - streams.filteredTokenStream = new StandardFilter(streams.tokenStream); - streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); - streams.filteredTokenStream = new ASCIIFoldingFilter(streams.filteredTokenStream); - } - else - { - streams.tokenStream.reset(reader); - } - - return streams.filteredTokenStream; - } - - private static final class SavedStreams - { - - StandardTokenizer tokenStream; - - TokenStream filteredTokenStream; - } - } Added: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/BaseAnalyzer.java =================================================================== --- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/BaseAnalyzer.java (rev 0) +++ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/BaseAnalyzer.java 2011-01-01 20:53:20 UTC (rev 3201) @@ -0,0 +1,89 @@ +/** + * + * Magnolia Criteria API (http://www.openmindlab.com/lab/products/mgnlcriteria.html) + * Copyright(C) 2009-2010, Openmind S.r.l. http://www.openmindonline.it + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package net.sourceforge.openutils.mgnlcriteria.utils; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardTokenizer; + + +/** + * Base analyzer class. Subclasses should simply implement the tokenFiltersChain() mathod in order to add TokenFilters. + * @author fgiust + * @version $Id$ + */ +public abstract class BaseAnalyzer extends Analyzer +{ + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) + { + StandardTokenizer tokenStream = tokenize(reader); + return tokenFiltersChain(tokenStream); + } + + @Override + public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException + { + SavedStreams streams = (SavedStreams) getPreviousTokenStream(); + if (streams == null) + { + streams = new SavedStreams(); + setPreviousTokenStream(streams); + streams.tokenStream = tokenize(reader); + streams.filteredTokenStream = tokenFiltersChain(streams.tokenStream); + } + else + { + streams.tokenStream.reset(reader); + } + + return streams.filteredTokenStream; + } + + /** + * Tokenize using a StandardTokenizer. Subclasses may override this mehod. + * @param reader base reader + * @return tokenizer + */ + protected StandardTokenizer tokenize(Reader reader) + { + return new StandardTokenizer(reader); + } + + /** + * Apply a set of TokenFilters to the TokenStream + * @param tokenStream original tokenStream + * @return filtered tokenStream + */ + protected abstract TokenStream tokenFiltersChain(TokenStream tokenStream); + + private static final class SavedStreams + { + + StandardTokenizer tokenStream; + + TokenStream filteredTokenStream; + } + +} Property changes on: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/BaseAnalyzer.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + Author Date Id Revision Added: svn:eol-style + native Modified: trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/Latin1Analyzer.java =================================================================== --- trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/Latin1Analyzer.java 2011-01-01 20:52:43 UTC (rev 3200) +++ trunk/openutils-mgnlcriteria/src/main/java/net/sourceforge/openutils/mgnlcriteria/utils/Latin1Analyzer.java 2011-01-01 20:53:20 UTC (rev 3201) @@ -19,62 +19,27 @@ package net.sourceforge.openutils.mgnlcriteria.utils; -import java.io.IOException; -import java.io.Reader; - -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.ISOLatin1AccentFilter; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; /** - * @author molaschi + * A filter for latin-1 chars. + * @author fgiust * @version $Id$ */ -public class Latin1Analyzer extends Analyzer +public class Latin1Analyzer extends BaseAnalyzer { @Override - public TokenStream tokenStream(String fieldName, Reader reader) + protected TokenStream tokenFiltersChain(TokenStream tokenStream) { - StandardTokenizer tokenStream = new StandardTokenizer(reader); - TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new ISOLatin1AccentFilter(result); return result; } - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException - { - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); - if (streams == null) - { - streams = new SavedStreams(); - setPreviousTokenStream(streams); - streams.tokenStream = new StandardTokenizer(reader); - streams.filteredTokenStream = new StandardFilter(streams.tokenStream); - streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); - streams.filteredTokenStream = new ISOLatin1AccentFilter(streams.filteredTokenStream); - } - else - { - streams.tokenStream.reset(reader); - } - - return streams.filteredTokenStream; - } - - private static final class SavedStreams - { - - StandardTokenizer tokenStream; - - TokenStream filteredTokenStream; - } - } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |