[Adapdev-commits] Adapdev/src/Adapdev/Text/Indexing/FullText/Filters AbstractFilter.cs,1.2,1.3 Condi
Status: Beta
Brought to you by:
intesar66
Update of /cvsroot/adapdev/Adapdev/src/Adapdev/Text/Indexing/FullText/Filters In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19977/src/Adapdev/Text/Indexing/FullText/Filters Added Files: AbstractFilter.cs ConditionalFilter.cs RegexTokenFilter.cs SpecialCharactersFilter.cs TokenLengthFilter.cs WordFilter.cs Log Message: Reposting to the repository after it got hosed --- NEW FILE: RegexTokenFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Text.RegularExpressions; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// Filters off tokens based on a regular expression. /// </summary> /// <remarks>This filter will filter off /// any tokens that <b>match</b> the regular /// expression (and not the ones that don't)</remarks> [Serializable] public class RegexTokenFilter : ConditionalFilter { Regex _regex; /// <summary> /// Creates a new filter that will filter off any /// tokens that match the regular expression passed /// as argument. /// </summary> /// <param name="regex">the regular expression</param> public RegexTokenFilter(string regex) : this(null, regex) { } /// <summary> /// Creates a new filter that will filter off any /// tokens that match the regular expression passed /// as argument. /// </summary> /// <param name="previous">the previous tokenizer in the chain</param> /// <param name="regex">the regular expression</param> public RegexTokenFilter(ITokenizer previous, string regex) : base(previous) { if (null == regex) { throw new ArgumentNullException("regex", "regex can't be null!"); } _regex = new Regex(regex); } /// <summary> /// Creates a new filter that will filter off any /// tokens that match the regular expression passed /// as argument. /// </summary> /// <param name="regex">the regular expression</param> public RegexTokenFilter(Regex regex) : this(null, regex) { } /// <summary> /// Creates a new filter that will filter off any /// tokens that match the regular expression passed /// as argument. /// </summary> /// <param name="previous">the previous tokenizer in the chain</param> /// <param name="regex">the regular expression</param> public RegexTokenFilter(ITokenizer previous, Regex regex) : base(previous) { if (null == regex) { throw new ArgumentNullException("regex", "regex can't be null!"); } _regex = regex; } /// <summary> /// See <see cref="ConditionalFilter.IsValidToken"/> for details. /// </summary> /// <param name="token"></param> /// <returns></returns> protected override bool IsValidToken(Token token) { return !_regex.IsMatch(token.Value); } } } --- NEW FILE: AbstractFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// Basic implementation for ITokenFilter with /// support for tokenizer chaining. /// </summary> [Serializable] public abstract class AbstractFilter : ITokenFilter { /// <summary> /// the previous tokenizer in the chain /// </summary> protected ITokenizer _previous; /// <summary> /// Creates a new filter with no previous /// tokenizer. /// </summary> protected AbstractFilter() { _previous = null; } /// <summary> /// Creates a new filter with a previous /// tokenizer in a tokenizer chain. /// </summary> /// <param name="previous">the previous tokenizer /// in the chain</param> protected AbstractFilter(ITokenizer previous) { _previous = previous; } /// <summary> /// Gets/sets the previous tokenizer /// in the chain. /// </summary> public ITokenizer Previous { get { return _previous; } set { _previous = value; } } /// <summary> /// Returns a MemberwiseClone of this object /// with the guarantee that the tail argument /// will be the last tokenizer in the new /// tokenizer chain. /// </summary> /// <param name="tail">the last tokenizer for the /// new chain</param> /// <returns>cloned chain with tail as the /// last tokenizer in the chain</returns> public ITokenizer Clone(ITokenizer tail) { AbstractFilter clone = MemberwiseClone() as AbstractFilter; if (null == _previous) { clone._previous = tail; } else { clone._previous = _previous.Clone(tail); } return clone; } /// <summary> /// Must be supplied by derived classes. /// </summary> /// <returns></returns> public abstract Token NextToken(); } } --- NEW FILE: WordFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// Filters off tokens by word. /// </summary> [Serializable] public class WordFilter : ConditionalFilter { Hashtable _words; /// <summary> /// See <see cref="WordFilter(string[])"/>. /// </summary> /// <param name="previous">the previous tokenizer in the chain</param> /// <param name="words">list of words that should be filtered /// off the chain</param> public WordFilter(ITokenizer previous, params string[] words) : base(previous) { if (null == words) { throw new ArgumentNullException("words"); } _words = new Hashtable(words.Length); foreach (string word in words) { _words[word] = null; } } /// <summary> /// Creates a new filter that will not allow /// any words in the list represented by the words /// argument to pass through the chain. /// </summary> /// <param name="words">list of words that should be filtered /// off the chain</param> public WordFilter(params string[] words) : this(null, words) { } /// <summary> /// See <see cref="ConditionalFilter.IsValidToken"/> for details. /// </summary> /// <param name="token"></param> /// <returns></returns> protected override bool IsValidToken(Token token) { return !_words.ContainsKey(token.Value); } } } --- NEW FILE: SpecialCharactersFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Globalization; using System.Text; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// A filter that replaces special characters by /// their simpler ASCII counterparts. /// </summary> [Serializable] public class SpecialCharactersFilter : AbstractFilter { /// <summary> /// Creates a new filter. /// </summary> public SpecialCharactersFilter() { } /// <summary> /// Creates a new filter in a tokenizer chain. /// </summary> /// <param name="previous">previous tokenizer in the chain</param> public SpecialCharactersFilter(ITokenizer previous) : base(previous) { } /// <summary> /// Gets the token from the previous tokenizer in the /// chain and replaces every "complex" character /// in the token by its simpler counterpart. /// </summary> /// <returns>the new token or null</returns> public override Token NextToken() { Token token = _previous.NextToken(); if (null != token) { token.Value = Filter(token.Value); } return token; } public static string Filter(string value) { char[] mapped = new char[value.Length]; for (int i=0; i<value.Length; ++i) { char c = char.ToLower(value[i]); switch (c) { case 'á': c = 'a'; break; case 'ã': c = 'a'; break; case 'â': c = 'a'; break; case 'à ': c = 'a'; break; case 'é': c = 'e'; break; case 'ê': c = 'e'; break; case 'Ã': c = 'i'; break; case 'ó': c = 'o'; break; case 'õ': c = 'o'; break; case 'ô': c = 'o'; break; case 'ú': c = 'u'; break; case 'ü': c = 'u'; break; case 'ç': c = 'c'; break; } mapped[i] = c; } return new string(mapped); } } } --- NEW FILE: ConditionalFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// Base class for filter implementations that /// exclude tokens from the stream based on /// a condition. /// </summary> [Serializable] public abstract class ConditionalFilter : AbstractFilter { /// <summary> /// Creates a standalone filter (with no previous /// tokenizer). /// </summary> protected ConditionalFilter() { } /// <summary> /// Creates a filter in a filter chain. /// </summary> /// <param name="previous">the previous token in the chain</param> protected ConditionalFilter(ITokenizer previous) : base(previous) { } /// <summary> /// Gets a token from the previous tokenizer in the chain /// and checks the condition implemented by IsValidToken, /// when IsValidToken returns false the token is discarded /// and a new one is tried. This process is repeated until /// IsValidToken returns true or the previous tokenizer /// returns null. /// </summary> /// <returns>the next token for which IsValidToken returns true or /// null when the previous tokenizer runs out of tokens</returns> public override Adapdev.Text.Indexing.FullText.Token NextToken() { Token token = _previous.NextToken(); while (null != token) { if (IsValidToken(token)) { break; } token = _previous.NextToken(); } return token; } /// <summary> /// Test if the token is a valid token and /// as such should be returned to the /// caller of <see cref="NextToken" />. /// </summary> /// <param name="token">token to be tested</param> /// <returns>true if the token is valid, false otherwise</returns> protected abstract bool IsValidToken(Token token); } } --- NEW FILE: TokenLengthFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using Adapdev.Text.Indexing.FullText; namespace Adapdev.Text.Indexing.FullText.Filters { /// <summary> /// Filters off tokens by length. /// </summary> [Serializable] public class TokenLengthFilter : ConditionalFilter { int _minTokenLength; /// <summary> /// Creates a new filter that will only allow /// tokens with at least minTokenLength /// characters to pass. /// </summary> /// <param name="minTokenLength">minimum token length</param> public TokenLengthFilter(int minTokenLength) { _minTokenLength = minTokenLength; } /// <summary> /// See <see cref="TokenLengthFilter(int)"/>. /// </summary> /// <param name="previous">previous tokenizer in the chain</param> /// <param name="minTokenLength">minimum token length</param> public TokenLengthFilter(ITokenizer previous, int minTokenLength) : base(previous) { _minTokenLength = minTokenLength; } /// <summary> /// See <see cref="ConditionalFilter.IsValidToken"/> for details. /// </summary> /// <param name="token"></param> /// <returns></returns> protected override bool IsValidToken(Token token) { return (token.Value.Length >= _minTokenLength); } } } |