From: <ust...@us...> - 2006-06-29 12:45:19
|
Revision: 2242 Author: ustramooner Date: 2006-06-29 05:45:03 -0700 (Thu, 29 Jun 2006) ViewCVS: http://svn.sourceforge.net/clucene/?rev=2242&view=rev Log Message: ----------- added WildcardFilter and bitset fix Modified Paths: -------------- trunk/src/CLucene/index/MultiReader.h trunk/src/CLucene/index/SegmentTermDocs.cpp trunk/src/CLucene/index/SegmentTermEnum.h trunk/src/CLucene/index/Terms.h trunk/src/CLucene/search/BooleanScorer.cpp trunk/src/CLucene/search/FuzzyQuery.h trunk/src/CLucene/search/WildcardQuery.cpp trunk/src/CLucene/search/WildcardQuery.h trunk/src/CLucene/search/WildcardTermEnum.h trunk/src/CLucene/util/BitSet.cpp trunk/test/search/TestWildcard.cpp Modified: trunk/src/CLucene/index/MultiReader.h =================================================================== --- trunk/src/CLucene/index/MultiReader.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/index/MultiReader.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -94,6 +94,10 @@ //Closes the set of enumerations in the queue void close(); + + + const char* getObjectName(){ return MultiTermEnum::getClassName(); } + static const char* getClassName(){ return "MultiTermEnum"; } }; Modified: trunk/src/CLucene/index/SegmentTermDocs.cpp =================================================================== --- trunk/src/CLucene/index/SegmentTermDocs.cpp 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/index/SegmentTermDocs.cpp 2006-06-29 12:45:03 UTC (rev 2242) @@ -63,15 +63,11 @@ TermInfo* ti=NULL; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs - try{ - //todo: this is a really dodgy way of doing this - //we try and convert the Term Enum to a segment term enum... if its - //not a segment term enum, then we throw and assume it is not a segmenttermenum - //should have a way of knowing what type the TermEnum is... + if ( termEnum->getObjectName() == SegmentTermEnum::getClassName() ){ SegmentTermEnum* te = (SegmentTermEnum*)termEnum; te->fieldInfos = parent->fieldInfos; ti = te->getTermInfo(); - }catch(...){ + }else{ ti = parent->tis->get(termEnum->term(false)); } Modified: trunk/src/CLucene/index/SegmentTermEnum.h =================================================================== --- trunk/src/CLucene/index/SegmentTermEnum.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/index/SegmentTermEnum.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -93,6 +93,9 @@ //Returns a clone of this instance SegmentTermEnum* clone() const; + const char* getObjectName(){ return SegmentTermEnum::getClassName(); } + static const char* getClassName(){ return "SegmentTermEnum"; } + private: //Reads the next term in the enumeration Term* readTerm(Term* reuse); Modified: trunk/src/CLucene/index/Terms.h =================================================================== --- trunk/src/CLucene/index/Terms.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/index/Terms.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -135,6 +135,11 @@ } while (target->compareTo(term(false)) > 0); return true; } + + /** + * Because we need to know how to cast the object, we need the objects name. + */ + virtual const char* getObjectName() = 0; }; Modified: trunk/src/CLucene/search/BooleanScorer.cpp =================================================================== --- trunk/src/CLucene/search/BooleanScorer.cpp 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/search/BooleanScorer.cpp 2006-06-29 12:45:03 UTC (rev 2242) @@ -48,23 +48,23 @@ // check prohibited & required if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask) { - return true; + return true; + } } - } - - // refill the queue - more = false; - end += BooleanScorer::BucketTable_SIZE; - for (SubScorer* sub = scorers; sub != NULL; sub = sub->next) { - Scorer* scorer = sub->scorer; - while (!sub->done && scorer->doc() < end) { - sub->collector->collect(scorer->doc(), scorer->score()); - sub->done = !scorer->next(); + + // refill the queue + more = false; + end += BooleanScorer::BucketTable_SIZE; + for (SubScorer* sub = scorers; sub != NULL; sub = sub->next) { + Scorer* scorer = sub->scorer; + while (!sub->done && scorer->doc() < end) { + sub->collector->collect(scorer->doc(), scorer->score()); + sub->done = !scorer->next(); + } + if (!sub->done) { + more = true; + } } - if (!sub->done) { - more = true; - } - } } while (bucketTable->first != NULL || more); return false; @@ -226,9 +226,6 @@ BucketTable* table = bucketTable; const int32_t i = doc & (BooleanScorer::BucketTable_SIZE-1); Bucket* bucket = &table->buckets[i]; - //if (bucket == NULL) - // bucket = _CLNEW Bucket(); - // table.buckets[i] = bucket; if (bucket->doc != doc) { // invalid bucket bucket->doc = doc; // set doc Modified: trunk/src/CLucene/search/FuzzyQuery.h =================================================================== --- trunk/src/CLucene/search/FuzzyQuery.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/search/FuzzyQuery.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -149,7 +149,10 @@ * multiplied by the scale factor */ float_t difference(); - + + + const char* getObjectName(){ return FuzzyTermEnum::getClassName(); } + static const char* getClassName(){ return "FuzzyTermEnum"; } }; CL_NS_END #endif Modified: trunk/src/CLucene/search/WildcardQuery.cpp =================================================================== --- trunk/src/CLucene/search/WildcardQuery.cpp 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/search/WildcardQuery.cpp 2006-06-29 12:45:03 UTC (rev 2242) @@ -7,8 +7,10 @@ #include "CLucene/StdHeader.h" #ifndef NO_WILDCARD_QUERY #include "WildcardQuery.h" +#include "CLucene/util/BitSet.h" CL_NS_USE(index) +CL_NS_USE(util) CL_NS_DEF(search) @@ -64,5 +66,82 @@ return (this->getBoost() == tq->getBoost()) && getTerm()->equals(tq->getTerm()); } + + + + + + + + + + + + +WildcardFilter::WildcardFilter( Term* term ) +{ + this->term = _CL_POINTER(term); +} + +WildcardFilter::~WildcardFilter() +{ + _CLDECDELETE(term); +} + +WildcardFilter::WildcardFilter( const WildcardFilter& copy ) : + term( _CL_POINTER(copy.term) ) +{ +} + +Filter* WildcardFilter::clone() const { + return _CLNEW WildcardFilter(*this ); +} + + +TCHAR* WildcardFilter::toString() +{ + //Instantiate a stringbuffer buffer to store the readable version temporarily + CL_NS(util)::StringBuffer buffer; + //check if field equal to the field of prefix + if( term->field() != NULL != 0 ) { + //Append the field of prefix to the buffer + buffer.append(term->field()); + //Append a colon + buffer.append(_T(":") ); + } + //Append the text of the prefix + buffer.append(term->text()); + + //Convert StringBuffer buffer to TCHAR block and return it + return buffer.toString(); +} + + +/** Returns a BitSet with true for documents which should be permitted in +search results, and false for those that should not. */ +BitSet* WildcardFilter::bits( IndexReader* reader ) +{ + BitSet* bts = _CLNEW BitSet( reader->maxDoc() ); + + WildcardTermEnum termEnum (reader, term); + TermDocs* termDocs = reader->termDocs(); + + try{ + do{ + termDocs->seek(&termEnum); + + while (termDocs->next()) { + bts->set(termDocs->doc()); + } + }while(termEnum.next()); + } _CLFINALLY( + termDocs->close(); + _CLDELETE(termDocs); + termEnum.close(); + ) + + return bts; +} + CL_NS_END #endif Modified: trunk/src/CLucene/search/WildcardQuery.h =================================================================== --- trunk/src/CLucene/search/WildcardQuery.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/search/WildcardQuery.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -44,7 +44,29 @@ bool equals(Query* other) const; Query* clone() const; }; + + + +class WildcardFilter: public Filter +{ +private: + CL_NS(index)::Term* term; +protected: + WildcardFilter( const WildcardFilter& copy ); + +public: + WildcardFilter(CL_NS(index)::Term* term); + ~WildcardFilter(); + /** Returns a BitSet with true for documents which should be permitted in + search results, and false for those that should not. */ + CL_NS(util)::BitSet* bits( CL_NS(index)::IndexReader* reader ); + + Filter* clone() const; + TCHAR* toString(); +}; + + CL_NS_END #endif #endif Modified: trunk/src/CLucene/search/WildcardTermEnum.h =================================================================== --- trunk/src/CLucene/search/WildcardTermEnum.h 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/search/WildcardTermEnum.h 2006-06-29 12:45:03 UTC (rev 2242) @@ -61,6 +61,9 @@ static bool wildcardEquals(const TCHAR* pattern, int32_t patternLen, int32_t patternIdx, const TCHAR* str, int32_t strLen, int32_t stringIdx); void close(); + + const char* getObjectName(){ return WildcardTermEnum::getClassName(); } + static const char* getClassName(){ return "WildcardTermEnum"; } }; CL_NS_END #endif Modified: trunk/src/CLucene/util/BitSet.cpp =================================================================== --- trunk/src/CLucene/util/BitSet.cpp 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/src/CLucene/util/BitSet.cpp 2006-06-29 12:45:03 UTC (rev 2242) @@ -22,7 +22,8 @@ ///Create a bitset with the specified size BitSet::BitSet ( int32_t size ): - _size(size) + _size(size), + _count(-1) { int32_t len = (_size >> 3) + 1; bits = _CL_NEWARRAY(uint8_t, len); Modified: trunk/test/search/TestWildcard.cpp =================================================================== --- trunk/test/search/TestWildcard.cpp 2006-06-29 12:43:33 UTC (rev 2241) +++ trunk/test/search/TestWildcard.cpp 2006-06-29 12:45:03 UTC (rev 2242) @@ -8,6 +8,27 @@ #ifndef NO_WILDCARD_QUERY +void _testWildcard(CuTest* tc, IndexSearcher* searcher, const TCHAR* qt, int expectedLen){ + Term* term = _CLNEW Term(_T("body"), qt); + Query* query = _CLNEW WildcardQuery(term); + + //test the wildcardquery + Hits* result = searcher->search(query); + CLUCENE_ASSERT(expectedLen == result->length()); + _CLDELETE(result); + _CLDELETE(query); + + + //now test wildcardfilter + Filter* filter = _CLNEW WildcardFilter(term); + BitSet* bits = filter->bits(searcher->getReader()); + CLUCENE_ASSERT(expectedLen == bits->count()); + _CLDELETE(filter); + _CLDELETE(bits); + + _CLDECDELETE(term); +} + void testFuzzyQuery(CuTest *tc){ RAMDirectory ram; @@ -44,7 +65,7 @@ _CLDELETE(doc); //--- - writer->optimize(); + //writer->optimize(); //test unoptimized writer->close(); _CLDELETE(writer); @@ -86,44 +107,21 @@ IndexReader* reader = IndexReader::open(&indexStore); IndexSearcher* searcher = _CLNEW IndexSearcher(reader); + _testWildcard(tc, searcher, _T("metal*"), 2); + _testWildcard(tc, searcher, _T("m*tal"), 1); + _testWildcard(tc, searcher, _T("m*tal*"), 2); + + Term* term = _CLNEW Term(_T("body"), _T("metal")); - Query* query1 = _CLNEW TermQuery(term); + Query* query1 = _CLNEW TermQuery(term); _CLDECDELETE(term); - term = _CLNEW Term(_T("body"), _T("metal*")); - Query* query2 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - - term = _CLNEW Term(_T("body"), _T("m*tal")); - Query* query3 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - - term = _CLNEW Term(_T("body"), _T("m*tal*")); - Query* query4 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - - Hits* result = NULL; - - result = searcher->search(query1); + Hits* result = searcher->search(query1); CLUCENE_ASSERT(1 == result->length()); _CLDELETE(result); _CLDELETE(query1); - result = searcher->search(query2); - CLUCENE_ASSERT(2 == result->length()); - _CLDELETE(result); - _CLDELETE(query2); - result = searcher->search(query3); - CLUCENE_ASSERT(1 == result->length()); - _CLDELETE(result); - _CLDELETE(query3); - - result = searcher->search(query4); - CLUCENE_ASSERT(2 == result->length()); - _CLDELETE(result); - _CLDELETE(query4); - indexStore.close(); searcher->close(); reader->close(); @@ -155,45 +153,12 @@ IndexReader* reader = IndexReader::open(&indexStore); IndexSearcher* searcher = _CLNEW IndexSearcher(reader); - Term* term = _CLNEW Term(_T("body"), _T("m?tal")); - Query* query1 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - term = _CLNEW Term(_T("body"), _T("metal?")); - Query* query2 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); + _testWildcard(tc, searcher, _T("m?tal"), 1); + _testWildcard(tc, searcher, _T("metal?"), 2); + _testWildcard(tc, searcher, _T("metals?"), 1); + _testWildcard(tc, searcher, _T("m?t?ls"), 3); - term = _CLNEW Term(_T("body"), _T("metals?")); - Query* query3 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - - term = _CLNEW Term(_T("body"), _T("m?t?ls")); - Query* query4 = _CLNEW WildcardQuery(term); - _CLDECDELETE(term); - - - Hits* result = NULL; - - result = searcher->search(query1); - CLUCENE_ASSERT(1 == result->length()); - _CLDELETE(result); - _CLDELETE(query1); - - result = searcher->search(query2); - CLUCENE_ASSERT(2 == result->length()); - _CLDELETE(result); - _CLDELETE(query2); - - result = searcher->search(query3); - CLUCENE_ASSERT(1 == result->length()); - _CLDELETE(result); - _CLDELETE(query3); - - result = searcher->search(query4); - CLUCENE_ASSERT(3 == result->length()); - _CLDELETE(result); - _CLDELETE(query4); - indexStore.close(); reader->close(); searcher->close(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |