Thread: [Adapdev-commits] Adapdev/src/Adapdev/Text/Indexing/FullText FullTextSearchExpression.cs,1.4,1.5 Ful
Status: Beta
Brought to you by:
intesar66
From: Sean M. <int...@us...> - 2005-11-17 02:46:58
|
Update of /cvsroot/adapdev/Adapdev/src/Adapdev/Text/Indexing/FullText In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv11442/src/Adapdev/Text/Indexing/FullText Added Files: FullTextSearchExpression.cs FullTextSearchIndex.cs FullTextSearchMode.cs ITokenFilter.cs ITokenizer.cs IndexedField.cs IndexedFieldCollection.cs Posting.cs Postings.cs TermOccurenceCollection.cs TermOccurrence.cs Token.cs Log Message: --- NEW FILE: FullTextSearchMode.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Search mode. /// </summary> public enum FullTextSearchMode { /// <summary> /// Include a record in the result if /// any of the words in the search expression /// occurs in the record /// </summary> IncludeAny, /// <summary> /// Include a record in the result only /// if all of the words in the search expression /// occurs in the record /// </summary> IncludeAll } } --- NEW FILE: ITokenFilter.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Marker interface for token filters. /// </summary> public interface ITokenFilter : ITokenizer { } } --- NEW FILE: FullTextSearchIndex.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; using Adapdev.Text.Indexing; using Adapdev.Text.Indexing.FullText.Filters; using Adapdev.Text.Indexing.FullText.Tokenizers; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// An index for full text searches over record objects. /// </summary> /// <remarks> /// <b>The mutating methods of this class (such as /// <see cref="Add" />, <see cref="Remove" /> and /// <see cref="Update" />) /// are not thread-safe, all the /// synchronization work must be done by the /// application.</b><br /><br /> /// Non mutating methods such as the various <see cref="Search" /> /// implementations <b>can be safely called from multiple threads</b> /// simultaneously. /// </remarks> /// <example> /// <code> /// FullTextSearchIndex index = new FullTextSearchIndex(); /// index.Fields.Add("title"); /// index.Fields.Add("author"); /// /// HashtableRecord book1 = new HashtableRecord(); /// book1["title"] = "A Midsummer Night's Dream"; /// book1["author"] = "Shakespeare, William" /// /// HashtableRecord book2 = new HashtableRecord(); /// book2["title"] = "Much Ado About Nothing"; /// book2["author"] = "Shakespeare, William"; /// /// index.Add(book1); /// index.Add(book2); /// /// SearchResult result1 = index.Search("midsummer dream"); /// AssertEquals(1, result1.Count); /// Assert(result1.Contains(book1)); /// /// SearchResult result2 = index.Search("shakespeare"); /// result2.SortByField("title"); /// /// AssertEquals(2, result2.Count); /// AssertEquals(book1, result2[0].Record); /// AssertEquals(book2, result2[1].Record); /// </code> /// </example> [Serializable] public class FullTextSearchIndex : IIndex { /// <summary> /// A filter that considers only tokens with more than 2 characters /// and replaces special characters (like '', '') by their /// simpler counterparts ('c', 'a'). /// </summary> public static ITokenFilter DefaultFilter = new SpecialCharactersFilter(new TokenLengthFilter(3)); IndexedFieldCollection _fields; Hashtable _postings; ITokenFilter _filter = FullTextSearchIndex.DefaultFilter; /// <summary> /// Creates an empty index. /// </summary> public FullTextSearchIndex() { _fields = new IndexedFieldCollection(); _postings = new Hashtable(); } /// <summary> /// Creates an empty index with a specific filter /// chain for token filtering. /// </summary> /// <param name="filter">the filter chain that /// should be used by the index to filter /// tokens</param> public FullTextSearchIndex(ITokenFilter filter) : this() { Filter = filter; } /// <summary> /// Gets/sets the filter chain that will be used /// for all text preprocessing /// </summary> public ITokenFilter Filter { get { return _filter; } set { if (null == value) { throw new ArgumentNullException("value", "Filter cannot be null!"); } _filter = value; } } /// <summary> /// Returns a snapshot of all the Postings held /// by this index. Each Postings instance represents /// a currently indexed term and all its occurrences. /// </summary> public Postings[] Postings { get { Postings[] postings = new Postings[_postings.Count]; _postings.Values.CopyTo(postings, 0); return postings; } } /// <summary> /// Returns a snapshot of all the records currently /// held by this index. /// </summary> public IRecord[] Records { get { Hashtable records = new Hashtable(); foreach (Postings postings in _postings.Values) { foreach (IRecord record in postings.Records) { records[record] = null; } } IRecord[] returnValue = new IRecord[records.Count]; records.Keys.CopyTo(returnValue, 0); return returnValue; } } /// <summary> /// Collection of fields that compose the index. /// </summary> public IndexedFieldCollection Fields { get { return _fields; } } #region Implementation of IIndex /// <summary> /// See <see cref="Adapdev.Text.Indexing.IIndex.Add"/> for details. /// </summary> /// <param name="record">record that should be indexed</param> /// <remarks> /// Indexes all the fields included in the /// <see cref="Fields"/> collection. Notice /// however that the record is never automatically /// reindexed should its fields change or should /// the collection of indexed fields (<see cref="Fields"/>) /// change.<br /> /// The application is always responsible for calling /// <see cref="Update"/> in such cases. /// </remarks> public void Add(Adapdev.Text.Indexing.IRecord record) { foreach (IndexedField field in _fields) { IndexByField(record, field); } } public void Clear() { _postings.Clear(); } /// <summary> /// See <see cref="Adapdev.Text.Indexing.IIndex.Remove"/> for details. /// </summary> /// <param name="record">record that should be removed from the index</param> /// <remarks>reference comparison is always used</remarks> public void Remove(Adapdev.Text.Indexing.IRecord record) { foreach (Postings postings in _postings.Values) { postings.Remove(record); } } /// <summary> /// See <see cref="Adapdev.Text.Indexing.IIndex.Update"/> for details. /// </summary> /// <param name="record">existing record that should have its index information updated</param> /// <remarks>reference comparison is always used</remarks> public void Update(Adapdev.Text.Indexing.IRecord record) { Remove(record); Add(record); } /// <summary> /// When the expression passed as argument is an instance /// of FullTextSearchExpression this method behaves exactly /// as <see cref="Search(FullTextSearchExpression)" />, otherwise /// it behaves as expression.Evaluate(this). /// </summary> /// <param name="expression">search expression</param> /// <returns>the result of applying the search against this index</returns> public Adapdev.Text.Indexing.SearchResult Search(Adapdev.Text.Indexing.ISearchExpression expression) { FullTextSearchExpression ftexpression = expression as FullTextSearchExpression; if (null != ftexpression) { return Search(ftexpression); } return expression.Evaluate(this); } #endregion /// <summary> /// Convenience method that creates a new <see cref="FullTextSearchExpression"/> /// for the expression passed as argument and calls /// <see cref="Search(FullTextSearchExpression)"/>. /// </summary> /// <param name="expression">search expression</param> /// <returns><see cref="Search(FullTextSearchExpression)"/></returns> public Adapdev.Text.Indexing.SearchResult Search(string expression) { return Search(new FullTextSearchExpression(expression)); } /// <summary> /// Searches the index for the words included in /// the expression passed as argument. <br /> /// All the fields are searched for every word /// in the expression.<br /> /// </summary> /// <param name="expression">search expression</param> /// <returns> /// When expression.SearchMode is /// <see cref="FullTextSearchMode.IncludeAny"/> every /// record for which at least one word in the expression /// implies a match will be returned.<br /> /// When expression.SearchMode is /// <see cref="FullTextSearchMode.IncludeAll" /> only /// those records for which all of the words in the expression /// imply a match will be returned. /// </returns> public Adapdev.Text.Indexing.SearchResult Search(FullTextSearchExpression expression) { ITokenizer tokenizer = CreateTokenizer(expression.Expression); Token token = tokenizer.NextToken(); if (null == token) { throw new ArgumentException("Invalid search expression. The expression must contain at least one valid token!", "expression"); } long begin = System.Environment.TickCount; SearchResult result = null; if (expression.SearchMode == FullTextSearchMode.IncludeAny) { result = IncludeAny(tokenizer, token); } else { result = IncludeAll(tokenizer, token); } result.ElapsedTime = System.Environment.TickCount - begin; return result; } SearchResult IncludeAny(ITokenizer tokenizer, Token token) { SearchResult result = new SearchResult(); while (null != token) { SearchToken(result, token); token = tokenizer.NextToken(); } return result; } SearchResult IncludeAll(ITokenizer tokenizer, Token token) { ArrayList results = new ArrayList(); while (null != token) { SearchResult tokenResult = new SearchResult(); SearchToken(tokenResult, token); results.Add(tokenResult); token = tokenizer.NextToken(); } SearchResult result = (SearchResult)results[0]; for (int i=1; i<results.Count && result.Count > 0; ++i) { result = result.Intersect((SearchResult)results[i]); } return result; } void IndexByField(IRecord record, IndexedField field) { string value = (string)record[field.Name]; ITokenizer tokenizer = CreateTokenizer(value); Token token = tokenizer.NextToken(); while (null != token) { IndexByToken(token, record, field); token = tokenizer.NextToken(); } } void IndexByToken(Token token, IRecord record, IndexedField field) { Postings postings = (Postings)_postings[token.Value]; if (null == postings) { postings = new Postings(token.Value); _postings[token.Value] = postings; } postings.Add(record, field, token.Position); } void SearchToken(SearchResult result, Token token) { Postings postings = (Postings)_postings[token.Value]; if (null != postings) { AddToResult(result, postings); } } void AddToResult(SearchResult result, Postings found) { foreach (Posting posting in found) { result.Add(new SearchHit(posting.Record)); } } ITokenizer CreateTokenizer(string value) { if (null == value || 0 == value.Length) { return NullTokenizer.Instance; } return _filter.Clone(new StringTokenizer(value)); } } } --- NEW FILE: Postings.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; using System.Text; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// A collection of Posting objects for a /// specific term.<br /> /// Posting objects are indexed by record for /// fast Add and Remove operations. /// </summary> [Serializable] public class Postings : System.Collections.IEnumerable { Hashtable _postings; string _term; /// <summary> /// Creates a new Postings object for /// a term. /// </summary> /// <param name="term">the term</param> public Postings(string term) { _term = term; _postings = new Hashtable(); } /// <summary> /// the term /// </summary> public string Term { get { return _term; } } /// <summary> /// Returns a snapshot of all the /// records currently indexed by the term /// </summary> public IRecord[] Records { get { IRecord[] records = new IRecord[_postings.Count]; _postings.Keys.CopyTo(records, 0); return records; } } /// <summary> /// Adds a new occurrence of the term. The occurrence /// information (field and position) will be added /// to an existing Posting object whenever possible. /// </summary> /// <param name="record">the record where the term was found</param> /// <param name="field">the field where the term was found</param> /// <param name="position">the position in the field where the term was found</param> public void Add(IRecord record, IndexedField field, int position) { Posting posting = _postings[record] as Posting; if (null == posting) { posting = new Posting(record); _postings[record] = posting; } posting.Occurrences.Add(field, position); } /// <summary> /// Removes all information related to a /// specific record from this object. /// </summary> /// <param name="record">the record to be removed</param> public void Remove(IRecord record) { _postings.Remove(record); } /// <summary> /// Enumerates through all the Posting objects. /// </summary> /// <returns></returns> public System.Collections.IEnumerator GetEnumerator() { return _postings.Values.GetEnumerator(); } /// <summary> /// Builds a readable representation of this object. /// </summary> /// <returns></returns> public override string ToString() { StringBuilder builder = new StringBuilder(); builder.Append(_term); builder.Append(" => ["); foreach (Posting posting in _postings.Values) { builder.Append(posting.ToString()); builder.Append(", "); } if (builder.Length > 1) { builder.Remove(builder.Length-2, 2); } builder.Append("]"); return builder.ToString(); } } } --- NEW FILE: ITokenizer.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// A tokenizer is a source of or a filter for <see cref="Token"/> objects. /// </summary> public interface ITokenizer { /// <summary> /// For chaining tokenizers together. /// </summary> ITokenizer Previous { get; set; } /// <summary> /// Clone the tokenizer. If the tokenizer /// supports chaining it should also clone /// the Previous tokenizer in the chain. If /// Previous is null the value of the tail /// parameter should be used instead (but without cloning /// the tail). /// </summary> /// <param name="tail">the last tokenizer in the chain</param> /// <example> /// <code> /// public ITokenizer Clone(ITokenizer tail) /// { /// ITokenizer clone = this.MemberwiseClone() as ITokenizer; /// if (null == this.Previous) /// { /// clone.Previous = tail; /// } /// else /// { /// clone.Previous = this.Previous.Clone(tail); /// } /// return clone; /// } /// </code> /// </example> ITokenizer Clone(ITokenizer tail); /// <summary> /// Retrieves the next token. /// </summary> /// <returns> /// next token or null if no more tokens /// are available. /// </returns> Token NextToken(); } } --- NEW FILE: Posting.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// A Posting object represents the occurrence /// of a term in a record and stores all /// the information associated to this occurrence /// (in which fields does the term occur? how many /// times?).<br /> /// The term itself is not stored in the posting but /// should be used to index the posting in a /// dictionary. /// </summary> [Serializable] public class Posting { IRecord _record; TermOccurrenceCollection _occurrences; /// <summary> /// Creates a new posting for a record. /// </summary> /// <param name="record">the record</param> public Posting(IRecord record) { _record = record; _occurrences = new TermOccurrenceCollection(); } /// <summary> /// Occurrences of the term in the record. /// </summary> internal TermOccurrenceCollection Occurrences { get { return _occurrences; } } /// <summary> /// The record. /// </summary> public IRecord Record { get { return _record; } } /// <summary> /// Builds a more friendly representation of this object. /// </summary> /// <returns></returns> public override string ToString() { return "<" + _record + " => " + _occurrences + ">"; } } } --- NEW FILE: TermOccurrence.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Text; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Answers the questions: /// <list type="bullet"> /// <item>in which field does the term occur?</item> /// <item>how many times does the term occur in that specific field?</item> /// <item>in which positions?</item> /// </list> /// This information can be used for ranking the search and /// for specific search types such as proximity search, /// searching for terms only in specific fields, etc. /// </summary> [Serializable] public class TermOccurrence { IndexedField _field; int[] _positions; /// <summary> /// Creates a new TermOccurrence for the /// field and position passed as arguments. /// </summary> /// <param name="field">the field where the term was found</param> /// <param name="position">the position where the term was found</param> public TermOccurrence(IndexedField field, int position) { _field = field; _positions = new int[] { position }; } /// <summary> /// Field where the term was found /// </summary> public IndexedField Field { get { return _field; } } /// <summary> /// Positions in the field where /// the term was found. /// </summary> public int[] Positions { get { return _positions; } } internal void Add(int position) { int[] newPositions = new int[_positions.Length + 1]; Array.Copy(_positions, newPositions, _positions.Length); newPositions[_positions.Length] = position; } /// <summary> /// More readable representation of the object. /// </summary> /// <returns></returns> public override string ToString() { StringBuilder builder = new StringBuilder(); builder.Append("<"); builder.Append(_field.ToString()); builder.Append(" => "); builder.Append("["); for (int i=0; i<_positions.Length; ++i) { builder.Append(_positions[i]); builder.Append(", "); } builder.Append("]"); return builder.ToString(); } } } --- NEW FILE: FullTextSearchExpression.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using Adapdev.Text.Indexing; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Search expression class for searches over /// a <see cref="FullTextSearchIndex"/>. /// </summary> [Serializable] public class FullTextSearchExpression : ISearchExpression { string _expression; FullTextSearchMode _mode; /// <summary> /// Creates a new search expression that will /// return all the records that include any of /// the words (<see cref="FullTextSearchMode.IncludeAny"/>) /// in the expression passed as argument. /// </summary> /// <param name="expression">words to search for</param> public FullTextSearchExpression(string expression) { _expression = expression; _mode = FullTextSearchMode.IncludeAny; } /// <summary> /// Creates a new search expression for /// the words in the expression argument /// with the specific behavior indicated by the mode /// argument. /// </summary> /// <param name="expression">words to search for</param> /// <param name="mode">search mode</param> public FullTextSearchExpression(string expression, FullTextSearchMode mode) { _expression = expression; _mode = mode; } /// <summary> /// Search expression /// </summary> public string Expression { get { return _expression; } } /// <summary> /// Search mode /// </summary> public FullTextSearchMode SearchMode { get { return _mode; } } #region Implementation of ISearchExpression /// <summary> /// Delegates to <see cref="Adapdev.Text.Indexing.FullText.FullTextSearchIndex.Search"/>. /// </summary> /// <param name="index">index</param> /// <returns></returns> /// <exception cref="ArgumentException">if the /// index argument is not of the correct type</exception> public Adapdev.Text.Indexing.SearchResult Evaluate(Adapdev.Text.Indexing.IIndex index) { FullTextSearchIndex ftindex = index as FullTextSearchIndex; if (null == ftindex) { throw new ArgumentException("FullTextSearchExpression objects can be evaluated against FullTextSearchIndex objects only!"); } return ftindex.Search(this); } #endregion } } --- NEW FILE: IndexedFieldCollection.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Definition of the fields that will be used /// to compose a <see cref="FullTextSearchIndex"/>. /// </summary> [Serializable] public class IndexedFieldCollection : CollectionBase { /// <summary> /// Creates an empty collection. /// </summary> public IndexedFieldCollection() { } /// <summary> /// Adds a new IndexedField to the collection /// for the field with name passed as argument. /// </summary> /// <param name="field">name of the /// record field to be indexed</param> public void Add(string field) { InnerList.Add(new IndexedField(field)); } } } --- NEW FILE: TermOccurenceCollection.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; using System.Collections; using System.Text; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// A collection of TermOccurrence objects. /// </summary> [Serializable] public class TermOccurrenceCollection : CollectionBase { /// <summary> /// Creates an empty collection. /// </summary> public TermOccurrenceCollection() { } /// <summary> /// Adds the information related to the new /// occurrence of the term in the field and /// position passed as argument. If a TermOccurrence /// object for the specified field /// is already in the collection, the new position /// information is simply added to the existing /// TermOccurrence object. Otherwise a new TermOccurrence /// object will be created and added to the /// collection. /// </summary> /// <param name="field">field where the term /// was found</param> /// <param name="position"> /// position in the field where the term was found</param> public void Add(IndexedField field, int position) { foreach (TermOccurrence to in InnerList) { if (to.Field == field) { to.Add(position); return; } } InnerList.Add(new TermOccurrence(field, position)); } /// <summary> /// Builds a readable representation of this object. /// </summary> /// <returns></returns> public override string ToString() { StringBuilder builder = new StringBuilder(); builder.Append("["); foreach (TermOccurrence to in InnerList) { builder.Append(to.ToString()); builder.Append(", "); } builder.Append("]"); return builder.ToString(); } } } --- NEW FILE: IndexedField.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// Metadata for a field that should be indexed /// for full text searching. /// </summary> [Serializable] public class IndexedField { string _name; /// <summary> /// Creates a new IndexedField. /// </summary> /// <param name="name">name of the record field /// to be indexed</param> public IndexedField(string name) { _name = name; } /// <summary> /// Record's field name /// </summary> public string Name { get { return _name; } } /// <summary> /// Builds a more friendly representation of this object. /// </summary> /// <returns></returns> public override string ToString() { return "<IndexedField \"" + _name + "\">"; } } } --- NEW FILE: Token.cs --- #region license // Bamboo.Prevalence - a .NET object prevalence engine // Copyright (C) 2004 Rodrigo B. de Oliveira // // Based on the original concept and implementation of Prevayler (TM) // by Klaus Wuestefeld. Visit http://www.prevayler.org for details. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, // and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Contact Information // // http://bbooprevalence.sourceforge.net // mailto:rod...@us... #endregion using System; namespace Adapdev.Text.Indexing.FullText { /// <summary> /// A token. /// </summary> [Serializable] public class Token { int _position; string _value; /// <summary> /// Creates a new token. /// </summary> /// <param name="value">token image</param> /// <param name="position">absolute position of the /// image in the original text</param> public Token(string value, int position) { if (null == value) { throw new ArgumentNullException("value"); } if (position < 0) { throw new ArgumentOutOfRangeException("occurrences", "position must be a positive number"); } _value = value; _position = position; } /// <summary> /// Token image /// </summary> public string Value { get { return _value; } set { if (null == value) { throw new ArgumentNullException("value"); } _value = value; } } /// <summary> /// Absolute position in the original text from /// which this token was extracted. /// </summary> public int Position { get { return _position; } } /// <summary> /// Tokens are equal if both properties, /// Value and Position, are considered /// equal. /// </summary> /// <param name="other">object to test equality for</param> /// <returns>true if the objects are considered equal</returns> public override bool Equals(object other) { Token token = other as Token; if (null == token) { return false; } return _position == token._position && _value == token._value; } /// <summary> /// Calculates a hashcode based on the properties /// Value and Position. /// </summary> /// <returns>the combined hashcode of both properties</returns> public override int GetHashCode() { return _position.GetHashCode() ^ _value.GetHashCode(); } /// <summary> /// Builds a more human friendly representation of the token. /// </summary> /// <returns>a readable representation of the token</returns> public override string ToString() { return "<\"" + _value + "\" at " + _position + ">"; } } } |