You can subscribe to this list here.
2004 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(25) |
Dec
(67) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2005 |
Jan
(125) |
Feb
(23) |
Mar
(167) |
Apr
(2) |
May
|
Jun
(19) |
Jul
(304) |
Aug
(181) |
Sep
(189) |
Oct
(145) |
Nov
(110) |
Dec
(44) |
2006 |
Jan
(303) |
Feb
(40) |
Mar
(2) |
Apr
(143) |
May
|
Jun
(74) |
Jul
(31) |
Aug
(7) |
Sep
(21) |
Oct
(33) |
Nov
(102) |
Dec
(36) |
2007 |
Jan
|
Feb
(16) |
Mar
(38) |
Apr
(34) |
May
(3) |
Jun
(4) |
Jul
(4) |
Aug
(13) |
Sep
(5) |
Oct
|
Nov
|
Dec
|
2008 |
Jan
(2) |
Feb
|
Mar
(13) |
Apr
|
May
(18) |
Jun
(48) |
Jul
(136) |
Aug
(45) |
Sep
(21) |
Oct
(32) |
Nov
|
Dec
(9) |
2009 |
Jan
(4) |
Feb
|
Mar
(33) |
Apr
(23) |
May
(6) |
Jun
(3) |
Jul
(11) |
Aug
|
Sep
(5) |
Oct
|
Nov
|
Dec
|
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <syn...@us...> - 2009-05-03 21:01:53
|
Revision: 3005 http://clucene.svn.sourceforge.net/clucene/?rev=3005&view=rev Author: synhershko Date: 2009-05-03 21:01:38 +0000 (Sun, 03 May 2009) Log Message: ----------- removing redundant NULL checks on deletor macros Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/debug/mem.h Modified: branches/lucene2_3_2/src/core/CLucene/debug/mem.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/debug/mem.h 2009-05-02 21:26:19 UTC (rev 3004) +++ branches/lucene2_3_2/src/core/CLucene/debug/mem.h 2009-05-03 21:01:38 UTC (rev 3005) @@ -18,7 +18,7 @@ #else #define _CLNEW new //todo: get this working again... - //#define LUCENE_BASE_CHECK(obj) (obj)->dummy__see_mem_h_for_details + //#define LUCENE_BASE_CHECK(obj) if (obj) obj->dummy__see_mem_h_for_details #endif #define _CL_POINTER(x) (x==NULL?NULL:(x->__cl_addref()>=0?x:x)) //return a add-ref'd object @@ -33,19 +33,19 @@ #if defined(_MSC_VER) && (_MSC_VER < 1300) //6.0 - #define _CLDELETE_CARRAY(x) if (x!=NULL){delete[] const_cast<TCHAR*>(x); x=NULL;} - #define _CLDELETE_CaARRAY(x) if (x!=NULL){delete[] const_cast<char*>(x); x=NULL;} - #define _CLDELETE_LCARRAY(x) if (x!=NULL){delete[] const_cast<TCHAR*>(x);} - #define _CLDELETE_LCaARRAY(x) if (x!=NULL){delete[] const_cast<char*>(x);} + #define _CLDELETE_CARRAY(x) delete[] const_cast<TCHAR*>(x); x=NULL; + #define _CLDELETE_CaARRAY(x) delete[] const_cast<char*>(x); x=NULL; + #define _CLDELETE_LCARRAY(x) delete[] const_cast<TCHAR*>(x); + #define _CLDELETE_LCaARRAY(x) delete[] const_cast<char*>(x); #endif //Macro for creating new arrays #define _CL_NEWARRAY(type,size) new type[size] -#define _CLDELETE_ARRAY(x) if (x!=NULL){delete [] x; x=NULL;} -#define _CLDELETE_LARRAY(x) if (x!=NULL){delete [] x;} +#define _CLDELETE_ARRAY(x) {delete[] x;x=NULL;} +#define _CLDELETE_LARRAY(x) {delete[] x;} #ifndef _CLDELETE_CARRAY - #define _CLDELETE_CARRAY(x) if (x!=NULL){delete [] x; x=NULL;} - #define _CLDELETE_LCARRAY(x) if (x!=NULL){delete [] x;} + #define _CLDELETE_CARRAY(x) {delete[] x;x=NULL;} + #define _CLDELETE_LCARRAY(x) {delete[] x;} #endif //a shortcut for deleting a carray and all its contents @@ -63,8 +63,10 @@ #define _CLDELETE(x) if (x!=NULL){ CND_PRECONDITION((x)->__cl_refcount>=0,"__cl_refcount was < 0"); if ((x)->__cl_decref() <= 0)delete x; x=NULL; } #define _CLLDELETE(x) if (x!=NULL){ CND_PRECONDITION((x)->__cl_refcount>=0,"__cl_refcount was < 0"); if ((x)->__cl_decref() <= 0)delete x; } #else - #define _CLDELETE(x) if (x!=NULL){ LUCENE_BASE_CHECK(x); delete x; x=NULL; } - #define _CLLDELETE(x) if (x!=NULL){ LUCENE_BASE_CHECK(x); delete x; } + // Here we had a redundant check for NULL and LUCENE_BASE_CHECK(x), which were removed once the internal memory + // tracking code was put out + #define _CLDELETE(x) {delete x;x=NULL;} + #define _CLLDELETE(x) {delete x;} #endif //_CLDECDELETE deletes objects which are *always* refcounted @@ -74,6 +76,6 @@ //_VDelete should be used for deleting non-clucene objects. //when using reference counting, _CLDELETE casts the object //into a LuceneBase*. -#define _CLVDELETE(x) if(x!=NULL){delete x; x=NULL;} +#define _CLVDELETE(x) {delete x;x=NULL;} #endif //_lucene_debug_lucenebase_ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-05-02 21:26:27
|
Revision: 3004 http://clucene.svn.sourceforge.net/clucene/?rev=3004&view=rev Author: synhershko Date: 2009-05-02 21:26:19 +0000 (Sat, 02 May 2009) Log Message: ----------- Updated QueryParser to conform with JLucene 2.3.2: * Old queryParser is still available under queryParser::legacy. It's no longer supported, and will probably be removed before the final release. * Added NO_RESOLUTION to DateTools::Resolution to allow for NULL resolution in QP * MultiFieldQueryParser is temporarily inavailable. * Several v2.1+ Query classes were not ported yet, hence the QP still does not support them. * FuzzyQuery::toString and BooleanQuery::toString were updated to conform with JL 2.3.2 * New QP might differ in syntax from the legacy one, as it completely conforms with JL 2.3.2. Examples for such differences are in the tests, where incompatible queries were commented out * Breaking change: TokenStream::next(Token*) signature was changed - it now accepts Token*& and returns Token*. If NULL pointer is passed, a new Token object will be created. This also affects all derived classes (Filters, Tokenizers and Analyzers). * Tests were updated to comply with the above change. * DocumentWriter::invertDocument was also updated to comply with this change * LUCENE_TOKEN_WORD_LENGTH macro is not supported in the current QP implementation for queryParser::Token. Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h branches/lucene2_3_2/src/core/CLucene/document/DateTools.h branches/lucene2_3_2/src/core/CLucene/files_list.txt branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/util/Equators.h branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/test/CuTest.cpp branches/lucene2_3_2/src/test/CuTest.h branches/lucene2_3_2/src/test/analysis/TestAnalysis.cpp branches/lucene2_3_2/src/test/analysis/TestAnalyzers.cpp branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/search/TestSearch.cpp branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/src/core/CLucene/queryParser/CharStream.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/FastCharStream.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserConstants.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.h branches/lucene2_3_2/src/core/CLucene/queryParser/Token.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/Token.h branches/lucene2_3_2/src/core/CLucene/queryParser/_CharStream.h branches/lucene2_3_2/src/core/CLucene/queryParser/_FastCharStream.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/ branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/Lexer.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryToken.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryToken.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/TokenList.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/_Lexer.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/_TokenList.h Removed Paths: ------------- branches/lucene2_3_2/src/core/CLucene/queryParser/Lexer.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.h branches/lucene2_3_2/src/core/CLucene/queryParser/TokenList.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/_Lexer.h branches/lucene2_3_2/src/core/CLucene/queryParser/_TokenList.h Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -240,7 +240,6 @@ TokenStream::~TokenStream(){ } - TokenFilter::TokenFilter(TokenStream* in, bool deleteTS): input(in), deleteTokenStream(deleteTS) Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -186,8 +186,29 @@ */ class CLUCENE_EXPORT TokenStream:LUCENE_BASE { public: - /** Sets token to the next token in the stream, returns false at the EOS. */ - virtual bool next(Token* token) = 0; + /** Returns the next token in the stream, or null at EOS. + * When possible, the input Token should be used as the + * returned Token (this gives fastest tokenization + * performance), but this is not required and a new Token + * may be returned (pass NULL for this). + * Callers may re-use a single Token instance for successive + * calls to this method. + * <p> + * This implicitly defines a "contract" between + * consumers (callers of this method) and + * producers (implementations of this method + * that are the source for tokens): + * <ul> + * <li>A consumer must fully consume the previously + * returned Token before calling this method again.</li> + * <li>A producer must call {@link Token#clear()} + * before setting the fields in it & returning it</li> + * </ul> + * Note that a {@link TokenFilter} is considered a consumer. + * @param result a Token that may or may not be used to return + * @return next token in the stream or null if end-of-stream was hit + */ + virtual Token* next(Token*& token) = 0; /** This is for backwards compatibility only. You should pass the token you want to fill * to next(), this will save a lot of object construction and destructions. @@ -205,7 +226,7 @@ * of a TokenStream are intended to be consumed more than * once, it is necessary to implement reset(). */ - //virtual void reset(CL_NS(util)::Reader* _input=NULL) = 0; + //virtual void reset(CL_NS(util)::Reader* _input=NULL){} virtual ~TokenStream(); }; Modified: branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -28,7 +28,7 @@ { return c; } -bool CharTokenizer::next(Token* token){ +Token* CharTokenizer::next(Token*& token){ int32_t length = 0; int32_t start = offset; while (true) { @@ -36,15 +36,13 @@ offset++; if (bufferIndex >= dataLen) { dataLen = input->read(ioBuffer, 1, LUCENE_IO_BUFFER_SIZE ); - if (dataLen == -1) - dataLen = 0; bufferIndex = 0; } if (dataLen <= 0 ) { if (length > 0) break; else - return false; + return NULL; }else c = ioBuffer[bufferIndex++]; if (isTokenChar(c)) { // if it's a token TCHAR @@ -62,8 +60,11 @@ } buffer[length]=0; - token->set( buffer, start, start+length); - return true; + if (token != NULL) + token->set( buffer, start, start+length); + else + token = _CLNEW Token( buffer, start, start+length ); + return token; } void CharTokenizer::reset(CL_NS(util)::Reader* input) { @@ -147,11 +148,11 @@ LowerCaseFilter::~LowerCaseFilter(){ } -bool LowerCaseFilter::next(Token* t){ - if (!input->next(t)) - return false; +Token* LowerCaseFilter::next(Token*& t){ + if (input->next(t) == NULL) + return NULL; stringCaseFold( t->_termText ); - return true; + return t; } bool StopFilter::ENABLE_POSITION_INCREMENTS_DEFAULT = false; @@ -206,7 +207,7 @@ stopTable->insert( stopWords[i] ); } -bool StopFilter::next(Token* token) { +Token* StopFilter::next(Token*& token) { // return the first non-stop word found int32_t skippedPositions = 0; while (input->next(token)){ @@ -215,13 +216,13 @@ if (enablePositionIncrements) { token->setPositionIncrement(token->getPositionIncrement() + skippedPositions); } - return true; + return token; } skippedPositions += token->getPositionIncrement(); } // reached EOS -- return nothing - return false; + return NULL; } StopAnalyzer::StopAnalyzer(const char* stopwordsFile, const char* enc): @@ -312,7 +313,7 @@ } ISOLatin1AccentFilter::~ISOLatin1AccentFilter(){ } -bool ISOLatin1AccentFilter::next(Token* token){ +Token* ISOLatin1AccentFilter::next(Token*& token){ if ( input->next(token) ){ int32_t l = token->termLength(); const TCHAR* chars = token->termBuffer(); @@ -329,7 +330,7 @@ } if ( !doProcess ) { - return true; + return token; } StringBuffer output(l*2); @@ -466,9 +467,9 @@ } } token->setText(output.getBuffer()); - return true; + return token; } - return false; + return NULL; } @@ -498,8 +499,10 @@ KeywordTokenizer::~KeywordTokenizer(){ } -bool KeywordTokenizer::next(Token* token){ +Token* KeywordTokenizer::next(Token*& token){ if (!done) { + if (token==NULL) + token = _CLNEW Token(); done = true; int32_t rd; const TCHAR* buffer=0; @@ -517,9 +520,9 @@ } token->_termText[token->_termTextLen]=0; token->set(token->_termText,0,token->_termTextLen); - return true; + return token; } - return false; + return NULL; } void KeywordTokenizer::reset(CL_NS(util)::Reader* input) { @@ -535,18 +538,18 @@ this->_max = _max; } -bool LengthFilter::next(Token* token) +Token* LengthFilter::next(Token*& token) { // return the first non-stop word found while ( input->next(token) ) { size_t len = token->termLength(); if (len >= _min && len <= _max) - return true; + return token; // note: else we ignore it but should we index each part of it? } // reached EOS -- return null - return false; + return NULL; } Modified: branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -38,7 +38,7 @@ public: CharTokenizer(CL_NS(util)::Reader* in); - bool next(Token* token); + Token* next(Token*& token); void reset(CL_NS(util)::Reader* input); virtual ~CharTokenizer(); @@ -126,7 +126,7 @@ public: LowerCaseFilter(TokenStream* in, bool deleteTokenStream); virtual ~LowerCaseFilter(); - bool next(Token* token); + Token* next(Token*& token); }; @@ -169,7 +169,7 @@ /** * Returns the next input Token whose termText() is not a stop word. */ - bool next(Token* token); + Token* next(Token*& token); /** @@ -336,7 +336,7 @@ /** * To replace accented characters in a String by unaccented equivalents. */ - bool next(Token* token); + Token* next(Token*& token); virtual ~ISOLatin1AccentFilter(); }; @@ -352,7 +352,7 @@ int bufferSize; public: KeywordTokenizer(CL_NS(util)::Reader* input, int bufferSize=-1); - bool next(Token* token); + Token* next(Token*& token); void reset(CL_NS(util)::Reader* input); virtual ~KeywordTokenizer(); @@ -389,7 +389,7 @@ /** * Returns the next input Token whose termText() is the right len */ - bool next(Token* token); + Token* next(Token*& token); }; Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -23,9 +23,9 @@ StandardFilter::~StandardFilter(){ } - bool StandardFilter::next(Token* t) { - if (!input->next(t)) - return false; + Token* StandardFilter::next(Token*& t) { + if (input->next(t) == NULL) + return NULL; TCHAR* text = t->_termText; const int32_t textLength = t->termLength(); @@ -38,7 +38,7 @@ text[textLength-2]=0; t->resetTermTextLen(); - return true; + return t; } else if ( type == tokenImage[ACRONYM] ) { // remove dots int32_t j = 0; @@ -47,10 +47,10 @@ text[j++]=text[i]; } text[j]=0; - return true; + return t; } else { - return true; + return t; } } Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -29,7 +29,7 @@ * <p>Removes <tt>'s</tt> from the end of words. * <p>Removes dots from acronyms. */ - bool next(Token* token); + Token* next(Token*& token); }; CL_NS_END2 #endif Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -131,8 +131,13 @@ return true; } - bool StandardTokenizer::next(Token* t) { + Token* StandardTokenizer::next(Token*& t) { int ch=0; + + bool bOwnsToken = (t==NULL); + if (bOwnsToken) + t = _CLNEW Token(); + while (!EOS) { ch = readChar(); @@ -142,19 +147,20 @@ continue; } else if (ALPHA || UNDERSCORE) { tokenStart = rdPos; - return ReadAlphaNum(ch,t); + if(ReadAlphaNum(ch,t))return t; } else if (DIGIT || NEGATIVE_SIGN_ || DECIMAL) { tokenStart = rdPos; /* ReadNumber returns NULL if it fails to extract a valid number; in ** that case, we just continue. */ if (ReadNumber(NULL, ch,t)) - return true; + return t; } else if ( _CJK ){ if ( ReadCJK(ch,t) ) - return true; + return t; } } - return false; + if (bOwnsToken) _CLDELETE(t); + return NULL; } bool StandardTokenizer::ReadNumber(const TCHAR* previousNumber, const TCHAR prev,Token* t) { Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -62,7 +62,7 @@ /** Returns the next token in the stream, or false at end-of-stream. * The returned token's type is set to an element of * StandardTokenizerConstants::tokenImage. */ - bool next(Token* token); + Token* next(Token*& token); // Reads for number like "1"/"1234.567", or IP address like "192.168.1.2". bool ReadNumber(const TCHAR* previousNumber, const TCHAR prev, Token* t); Modified: branches/lucene2_3_2/src/core/CLucene/document/DateTools.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/DateTools.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/document/DateTools.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -14,6 +14,7 @@ public: enum Resolution { + NO_RESOLUTION = NULL, YEAR_FORMAT, // yyyy MONTH_FORMAT, // yyyyMM DAY_FORMAT, // yyyyMMdd Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt =================================================================== (Binary files differ) Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -287,21 +287,21 @@ // Tokenize field and add to postingTable. CL_NS(analysis)::TokenStream* stream = analyzer->tokenStream(fieldName, reader); + CL_NS(analysis)::Token* t = NULL; try { - CL_NS(analysis)::Token t; int32_t lastTokenEndOffset = -1; - while (stream->next(&t)) { - position += (t.getPositionIncrement() - 1); + while (stream->next(t)) { + position += (t->getPositionIncrement() - 1); if(field->isStoreOffsetWithTermVector()){ TermVectorOffsetInfo tio; - tio.setStartOffset(offset + t.startOffset()); - tio.setEndOffset(offset + t.endOffset()); - addPosition(fieldName, t.termBuffer(), position++, &tio); + tio.setStartOffset(offset + t->startOffset()); + tio.setEndOffset(offset + t->endOffset()); + addPosition(fieldName, t->termBuffer(), position++, &tio); }else - addPosition(fieldName, t.termBuffer(), position++, NULL); + addPosition(fieldName, t->termBuffer(), position++, NULL); - lastTokenEndOffset = t.endOffset(); + lastTokenEndOffset = t->endOffset(); length++; // Apply field truncation policy. if (maxFieldLength != IndexWriter::FIELD_TRUNC_POLICY__WARN) { @@ -342,7 +342,8 @@ offset += lastTokenEndOffset + 1; } _CLFINALLY ( stream->close(); - _CLDELETE(stream); + _CLLDELETE(stream); + _CLLDELETE(t); ); } _CLFINALLY ( if (delReader) { Added: branches/lucene2_3_2/src/core/CLucene/queryParser/CharStream.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/CharStream.cpp (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/CharStream.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -0,0 +1,16 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/_ApiHeader.h" +#include "_CharStream.h" + +CL_NS_DEF(queryParser) + +~CharStream::~CharStream() +{ +} + +CL_NS_END Added: branches/lucene2_3_2/src/core/CLucene/queryParser/FastCharStream.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/FastCharStream.cpp (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/FastCharStream.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -0,0 +1,119 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/_ApiHeader.h" +#include "_CharStream.h" +#include "_FastCharStream.h" +#include "CLucene/util/CLStreams.h" + +CL_NS_DEF(queryParser) + +FastCharStream::FastCharStream(CL_NS(util)::Reader* r, bool ownsReader) : _bufferSize(0),buffer(NULL), + bufferLength(0),bufferPosition(0),tokenStart(0),bufferStart(0),input(r),_ownsReader(ownsReader) +{ +} + +FastCharStream::~FastCharStream() +{ + if (_ownsReader ){ + _CLLDELETE(input); + } + _CLDELETE_LCARRAY(buffer); +} + +TCHAR FastCharStream::readChar() { + if (bufferPosition >= bufferLength) + refill(); + return buffer[bufferPosition++]; +} + +void FastCharStream::refill() { + int32_t newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) { // token won't fit in buffer + if (buffer == NULL) { // first time: alloc buffer + buffer = _CL_NEWARRAY(TCHAR, 2048); + _bufferSize = 2048; + } else if (bufferLength == _bufferSize) { // grow buffer + _bufferSize *= 2; + TCHAR* newBuffer = _CL_NEWARRAY(TCHAR, _bufferSize); + _tcsncpy(newBuffer, buffer, bufferLength); + _CLDELETE_LCARRAY(buffer); + buffer = newBuffer; + } + } else { // shift token to front + _tcsncpy(buffer, buffer+tokenStart,newPosition); + } + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + const TCHAR* charBuf = NULL; + int32_t charsRead = // fill space in buffer + input->read(charBuf, newPosition, _bufferSize-newPosition); + if (charsRead == -1){ + _CLTHROWA(CL_ERR_IO, "read past eof"); + } + else { + memcpy(buffer, charBuf, charsRead * sizeof(TCHAR)); // TODO: Can we use the reader buffer instead of copying to our own? + bufferLength += charsRead; + } +} + +void FastCharStream::backup(const int32_t amount) { + bufferPosition -= amount; +} + +TCHAR* FastCharStream::GetImage() { + size_t len = bufferPosition - tokenStart; + TCHAR* ret = _CL_NEWARRAY(TCHAR, len + 1); + _tcsncpy(ret, buffer+tokenStart, len); + ret[len] = 0; // NULL terminated string + return ret; +} + +TCHAR* FastCharStream::GetSuffix(const int32_t len) { + TCHAR* value = _CL_NEWARRAY(TCHAR, len + 1); + _tcsncpy(value, buffer+(bufferPosition - len), len); + value[len] = 0; // NULL terminated string + return value; +} + +void FastCharStream::Done() { + try { + //input->close(); + } _CLCATCH_ERR(CL_ERR_IO, /*cleanup code*/, { + /*System.err.println("Caught: " + e + "; ignoring.");*/ + }) +} + +TCHAR FastCharStream::BeginToken() { + tokenStart = bufferPosition; + return readChar(); +} + +int32_t FastCharStream::getColumn() const { + return bufferStart + bufferPosition; +} +int32_t FastCharStream::getLine() const { + return 1; +} +int32_t FastCharStream::getEndColumn() const { + return bufferStart + bufferPosition; +} +int32_t FastCharStream::getEndLine() const { + return 1; +} +int32_t FastCharStream::getBeginColumn() const { + return bufferStart + tokenStart; +} +int32_t FastCharStream::getBeginLine() const { + return 1; +} + +CL_NS_END Deleted: branches/lucene2_3_2/src/core/CLucene/queryParser/Lexer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/Lexer.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/Lexer.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -1,371 +0,0 @@ -/*------------------------------------------------------------------------------ -* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or -* the GNU Lesser General Public License, as specified in the COPYING file. -------------------------------------------------------------------------------*/ -#include "CLucene/_ApiHeader.h" -#include "QueryParser.h" -#include "_TokenList.h" -#include "QueryToken.h" -#include "_Lexer.h" - -#include "CLucene/util/CLStreams.h" -#include "CLucene/util/StringBuffer.h" -#include "CLucene/util/_FastCharStream.h" - -CL_NS_USE(util) - -CL_NS_DEF(queryParser) -Lexer::Lexer(QueryParserBase* queryparser, const TCHAR* query) { - //Func - Constructor - //Pre - query != NULL and contains the query string - //Post - An instance of Lexer has been created - - this->queryparser = queryparser; - - CND_PRECONDITION(query != NULL, "query is NULL"); - - //The InputStream of Reader must be destroyed in the destructor - delSR = true; - - StringReader *r = _CLNEW StringReader(query); - - //Check to see if r has been created properly - CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r"); - - //Instantie a FastCharStream instance using r and assign it to reader - reader = _CLNEW FastCharStream(r); - - //Check to see if reader has been created properly - CND_CONDITION(reader != NULL, "Could not allocate memory for FastCharStream reader"); - - //The InputStream of Reader must be destroyed in the destructor - delSR = true; - -} - - -Lexer::Lexer(QueryParserBase* queryparser, BufferedReader* source) { - //Func - Constructor - // Initializes a new instance of the Lexer class with the specified - // TextReader to lex. - //Pre - Source contains a valid reference to a Reader - //Post - An instance of Lexer has been created using source as the reader - - this->queryparser = queryparser; - - //Instantie a FastCharStream instance using r and assign it to reader - reader = _CLNEW FastCharStream(source); - - //Check to see if reader has been created properly - CND_CONDITION(reader != NULL, "Could not allocate memory for FastCharStream reader"); - - //The InputStream of Reader must not be destroyed in the destructor - delSR = false; -} - - -Lexer::~Lexer() { - //Func - Destructor - //Pre - true - //Post - if delSR was true the InputStream input of reader has been deleted - // The instance of Lexer has been destroyed - - if (delSR) { - _CLDELETE(reader->input); - } - - _CLDELETE(reader); -} - - -void Lexer::Lex(TokenList *tokenList) { - //Func - Breaks the input stream onto the tokens list tokens - //Pre - tokens != NULL and contains a TokenList in which the tokens can be stored - //Post - The tokens have been added to the TokenList tokens - - CND_PRECONDITION(tokenList != NULL, "tokens is NULL"); - - //Get all the tokens - while(true) { - //Add the token to the tokens list - - //Get the next token - QueryToken* token = _CLNEW QueryToken; - if ( !GetNextToken(token) ){ - _CLDELETE(token); - break; - } - tokenList->add(token); - } - - //The end has been reached so create an EOF_ token - //Add the final token to the TokenList _tokens - tokenList->add(_CLNEW QueryToken( QueryToken::EOF_)); -} - - -bool Lexer::GetNextToken(QueryToken* token) { - while(!reader->Eos()) { - int ch = reader->GetNext(); - - if ( ch == -1 ) - break; - - // skipping whitespaces - if( _istspace(ch)!=0 ) { - continue; - } - TCHAR buf[2] = {ch,'\0'}; - switch(ch) { - case '+': - token->set(buf, QueryToken::PLUS); - return true; - case '-': - token->set(buf, QueryToken::MINUS); - return true; - case '(': - token->set(buf, QueryToken::LPAREN); - return true; - case ')': - token->set(buf, QueryToken::RPAREN); - return true; - case ':': - token->set(buf, QueryToken::COLON); - return true; - case '!': - token->set(buf, QueryToken::NOT); - return true; - case '^': - token->set(buf, QueryToken::CARAT); - return true; - case '~': - if( _istdigit( reader->Peek() )!=0 ) { - TCHAR number[LUCENE_MAX_FIELD_LEN]; - ReadIntegerNumber(ch, number,LUCENE_MAX_FIELD_LEN); - token->set(number, QueryToken::SLOP); - return true; - }else{ - token->set(buf, QueryToken::FUZZY); - return true; - } - break; - case '"': - return ReadQuoted(ch, token); - case '[': - return ReadInclusiveRange(ch, token); - case '{': - return ReadExclusiveRange(ch, token); - case ']': - case '}': - case '*': - queryparser->throwParserException( _T("Unrecognized char %d at %d::%d."), - ch, reader->Column(), reader->Line() ); - return false; - default: - return ReadTerm(ch, token); - - // end of swith - } - - } - return false; -} - - -void Lexer::ReadIntegerNumber(const TCHAR ch, TCHAR* buf, int buflen) { - int bp=0; - buf[bp++] = ch; - - int c = reader->Peek(); - while( c!=-1 && _istdigit(c)!=0 && bp<buflen-1 ) { - buf[bp++] = reader->GetNext(); - c = reader->Peek(); - } - buf[bp++] = 0; -} - - -bool Lexer::ReadInclusiveRange(const TCHAR prev, QueryToken* token) { - int ch = prev; - StringBuffer range; - range.appendChar(ch); - - while(!reader->Eos()) { - ch = reader->GetNext(); - if ( ch == -1 ) - break; - range.appendChar(ch); - - if(ch == ']'){ - token->set(range.getBuffer(), QueryToken::RANGEIN); - return true; - } - } - queryparser->throwParserException(_T("Unterminated inclusive range! %d %d::%d"),' ', - reader->Column(),reader->Column()); - return false; -} - - -bool Lexer::ReadExclusiveRange(const TCHAR prev, QueryToken* token) { - int ch = prev; - StringBuffer range; - range.appendChar(ch); - - while(!reader->Eos()) { - ch = reader->GetNext(); - - if (ch==-1) - break; - range.appendChar(ch); - - if(ch == '}'){ - token->set(range.getBuffer(), QueryToken::RANGEEX); - return true; - } - } - queryparser->throwParserException(_T("Unterminated exclusive range! %d %d::%d"),' ', - reader->Column(),reader->Column() ); - return false; -} - -bool Lexer::ReadQuoted(const TCHAR prev, QueryToken* token) { - int ch = prev; - StringBuffer quoted; - quoted.appendChar(ch); - - while(!reader->Eos()) { - ch = reader->GetNext(); - - if (ch==-1) - break; - - quoted.appendChar(ch); - - if(ch == '"'){ - token->set(quoted.getBuffer(), QueryToken::QUOTED); - return true; - } - } - queryparser->throwParserException(_T("Unterminated string! %d %d::%d"),' ', - reader->Column(),reader->Column()); - return false; -} - - -bool Lexer::ReadTerm(const TCHAR prev, QueryToken* token) { - int ch = prev; - bool completed = false; - int32_t asteriskCount = 0; - bool hasQuestion = false; - - StringBuffer val; - TCHAR buf[3]; //used for readescaped - - while(true) { - switch(ch) { - case -1: - break; - case '\\': - { - if ( ReadEscape(ch, buf) ) - val.append( buf ); - else - return false; - } - break; - - case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING: - asteriskCount++; - val.appendChar(ch); - break; - case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR: - hasQuestion = true; - val.appendChar(ch); - break; - case '\n': - case '\t': - case ' ': - case '+': - case '-': - case '!': - case '(': - case ')': - case ':': - case '^': - case '[': - case ']': - case '{': - case '}': - case '~': - case '"': - // create new QueryToken - reader->UnGet(); - completed = true; - break; - default: - val.appendChar(ch); - break; - // end of switch - } - - if(completed || ch==-1 || reader->Eos() ) - break; - else - ch = reader->GetNext(); - } - - // create new QueryToken - if(hasQuestion) - token->set(val.getBuffer(), QueryToken::WILDTERM); - else if(asteriskCount == 1 && val.getBuffer()[val.length() - 1] == '*') - token->set(val.getBuffer(), QueryToken::PREFIXTERM); - else if(asteriskCount > 0) - token->set(val.getBuffer(), QueryToken::WILDTERM); - else if( _tcsicmp(val.getBuffer(), _T("AND"))==0 || _tcscmp(val.getBuffer(), _T("&&"))==0 ) - token->set(val.getBuffer(), QueryToken::AND_); - else if( _tcsicmp(val.getBuffer(), _T("OR"))==0 || _tcscmp(val.getBuffer(), _T("||"))==0) - token->set(val.getBuffer(), QueryToken::OR); - else if( _tcsicmp(val.getBuffer(), _T("NOT"))==0 ) - token->set(val.getBuffer(), QueryToken::NOT); - else { - bool isnum = true; - int32_t nlen=val.length(); - for (int32_t i=0;i<nlen;++i) { - TCHAR ch=val.getBuffer()[i]; - if ( _istalpha(ch) ) { - isnum=false; - break; - } - } - - if ( isnum ) - token->set(val.getBuffer(), QueryToken::NUMBER); - else - token->set(val.getBuffer(), QueryToken::TERM); - } - return true; -} - - -bool Lexer::ReadEscape(TCHAR prev, TCHAR* buf) { - TCHAR ch = prev; - int bp=0; - buf[bp++] = ch; - - ch = reader->GetNext(); - int32_t idx = _tcscspn( buf, _T("\\+-!():^[]{}\"~*") ); - if(idx == 0) { - buf[bp++] = ch; - buf[bp++]=0; - return true; - } - queryparser->throwParserException(_T("Unrecognized escape sequence at %d %d::%d"), ' ', - reader->Column(),reader->Line()); - return false; -} - - -CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-05-02 21:26:19 UTC (rev 3004) @@ -4,8 +4,8 @@ * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ -#ifndef MultiFieldQueryParser_H -#define MultiFieldQueryParser_H +#ifndef _lucene_queryParser_MultiFieldQueryParser_ +#define _lucene_queryParser_MultiFieldQueryParser_ //#include "CLucene/analysis/AnalysisHeader.h" Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-05-02 19:53:35 UTC (rev 3003) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-05-02 21:26:19 UTC (rev 3004) @@ -5,18 +5,30 @@ * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" +#include "_CharStream.h" +#include "_FastCharStream.h" +#include "QueryParserConstants.h" +#include "QueryParserTokenManager.h" #include "QueryParser.h" #include "CLucene/analysis/AnalysisHeader.h" -#include "CLucene/util/CLStreams.h" + #include "CLucene/search/SearchHeader.h" -#include "CLucene/search/BooleanClause.h" + #include "CLucene/search/Query.h" +#include "CLucene/search/TermQuery.h" +#include "CLucene/search/BooleanQuery.h" +#include "CLucene/search/FuzzyQuery.h" +#include "CLucene/search/PhraseQuery.h" +#include "CLucene/search/WildcardQuery.h" +#include "CLucene/search/PrefixQuery.h" +#include "CLucene/search/RangeQuery.h" + #include "CLucene/index/Term.h" -#include "QueryToken.h" +#include "Token.h" -#include "_TokenList.h" -#include "_Lexer.h" +#include "CLucene/util/CLStreams.h" +#include "CLucene/util/StringBuffer.h" CL_NS_USE(util) CL_NS_USE(index) @@ -25,484 +37,1424 @@ CL_NS_DEF(queryParser) - QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){ - //Func - Constructor. - // Instantiates a QueryParser for the named field _field - //Pre - _field != NULL - //Post - An instance has been created +const TCHAR* QueryParserConstants::tokenImage[] = { + _T("<EOF>"), + _T("<_NUM_CHAR>"), + _T("<_ESCAPED_CHAR>"), + _T("<_TERM_START_CHAR>"), + _T("<_TERM_CHAR>"), + _T("<_WHITESPACE>"), + _T("<token of kind 6>"), + _T("<AND>"), + _T("<OR>"), + _T("<NOT>"), + _T("\"+\""), + _T("\"-\""), + _T("\"(\""), + _T("\")\""), + _T("\":\""), + _T("\"*\""), + _T("\"^\""), + _T("<QUOTED>"), + _T("<TERM>"), + _T("<FUZZY_SLOP>"), + _T("<PREFIXTERM>"), + _T("<WILDTERM>"), + _T("\"[\""), + _T("\"{\""), + _T("<NUMBER>"), + _T("\"TO\""), + _T("\"]\""), + _T("<RANGEIN_QUOTED>"), + _T("<RANGEIN_GOOP>"), + _T("\"TO\""), + _T("\"}\""), + _T("<RANGEEX_QUOTED>"), + _T("<RANGEEX_GOOP>") +}; - if ( _field ) - field = STRDUP_TtoT(_field); - else - field = NULL; - tokens = NULL; - lowercaseExpandedTerms = true; - } +const int32_t QueryParser::jj_la1_0[] = {0x180,0x180,0xe00,0xe00,0x1f69f80,0x48000,0x10000,0x1f69000,0x1348000,0x80000,0x80000,0x10000,0x18000000,0x2000000,0x18000000,0x10000,0x80000000,0x20000000,0x80000000,0x10000,0x80000,0x10000,0x1f68000}; +const int32_t QueryParser::jj_la1_1[] = {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0}; - QueryParser::~QueryParser() { - //Func - Destructor - //Pre - true - //Post - The instance has been destroyed +QueryParser::QueryParser(const TCHAR* f, Analyzer* a) : _operator(OR_OPERATOR), + lowercaseExpandedTerms(true),useOldRangeQuery(false),allowLeadingWildcard(false),enablePositionIncrements(false), + analyzer(a),field(NULL),phraseSlop(0),fuzzyMinSim(FuzzyQuery::defaultMinSimilarity), + fuzzyPrefixLength(FuzzyQuery::defaultPrefixLength),/*locale(NULL),*/ + dateResolution(CL_NS(document)::DateTools::NO_RESOLUTION),fieldToDateResolution(NULL), + token_source(NULL),token(NULL),jj_nt(NULL),_firstToken(NULL),jj_ntk(-1),jj_scanpos(NULL),jj_lastpos(NULL),jj_la(0), + lookingAhead(false),jj_gen(0),jj_2_rtns(NULL),jj_rescan(false),jj_gc(0),jj_expentries(NULL),jj_expentry(NULL), + jj_kind(-1),jj_endpos(0) +{ + StringReader* rdr = _CLNEW StringReader(_T("")); + _init(_CLNEW FastCharStream(rdr, true)); - _CLDELETE_CARRAY(field); + if ( f ) + field = STRDUP_TtoT(f); +} + +void QueryParser::_deleteTokens(){ + Token* t = _firstToken; + while (true){ + if (_firstToken == NULL) break; + t = _firstToken->next; + _CLLDELETE(_firstToken); + _firstToken = t; } +} - //static - Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){ - //Func - Returns a new instance of the Query class with a specified query, field and - // analyzer values. - //Pre - query != NULL and holds the query to parse - // field != NULL and holds the default field for query terms - // analyzer holds a valid reference to an Analyzer and is used to - // find terms in the query text - //Post - query has been parsed and an instance of Query has been returned +QueryParser::~QueryParser(){ + _CLLDELETE(fieldToDateResolution); + _CLLDELETE(token_source); - CND_PRECONDITION(query != NULL, "query is NULL"); - CND_PRECONDITION(field != NULL, "field is NULL"); + _deleteTokens(); - QueryParser parser(field, analyzer); - return parser.parse(query); + _CLLDELETE(jj_expentries); + _CLLDELETE(jj_expentry); + _CLLDELETE(jj_2_rtns); + + _CLLDELETE(field); +} + +Query* QueryParser::parse(const TCHAR* _query) +{ + StringReader* rdr = _CLNEW StringReader(_query); + ReInit(_CLNEW FastCharStream(rdr, true)); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + Query* res = TopLevelQuery(field); + return (res!=NULL) ? res : _CLNEW BooleanQuery(); } + catch (CLuceneError& e) { + // rethrow to include the original query: + if (e.number()==CL_ERR_Parse || e.number()==CL_ERR_TokenMgr) { + TCHAR* _twhat = e.twhat(); + const size_t errLen = _tcslen(_twhat) + _tcslen(_query) + 20; // make sure we have enough room for our error message + TCHAR *err = _CL_NEWARRAY(TCHAR,errLen); + cl_stprintf(err, errLen, _T("Cannot parse '%s': %s"), _query,_twhat); + _CLTHROWT_DEL(CL_ERR_Parse, err); + } else if (e.number()==CL_ERR_TooManyClauses) { + const size_t errLen = _tcslen(_query) + 25; // make sure we have enough room for our error message + TCHAR *err = _CL_NEWARRAY(TCHAR,errLen); + cl_stprintf(err, errLen, _T("Cannot parse '%s': too many boolean clauses"), _query); + _CLTHROWT_DEL(CL_ERR_Parse, err); + } else + throw e; + } +} - Query* QueryParser::parse(const TCHAR* query){ - //Func - Returns a parsed Query instance - //Pre - query != NULL and contains the query value to be parsed - //Post - Returns a parsed Query Instance +Analyzer* QueryParser::getAnalyzer() const { + return analyzer; +} - CND_PRECONDITION(query != NULL, "query is NULL"); +TCHAR* QueryParser::getField() const { + return field; +} - //Instantie a Stringer that can read the query string - BufferedReader* r = _CLNEW StringReader(query); +float_t QueryParser::getFuzzyMinSim() const { + return fuzzyMinSim; +} - //Check to see if r has been created properly - CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r"); +void QueryParser::setFuzzyMinSim(const float_t _fuzzyMinSim) { + fuzzyMinSim = _fuzzyMinSim; +} - //Pointer for the return value - Query* ret = NULL; +int32_t QueryParser::getFuzzyPrefixLength() const { + return fuzzyPrefixLength; +} - try{ - //Parse the query managed by the StringReader R and return a parsed Query instance - //into ret - ret = parse(r); - }_CLFINALLY ( - _CLDELETE(r); - ); +void QueryParser::setFuzzyPrefixLength(const int32_t _fuzzyPrefixLength) { + fuzzyPrefixLength = _fuzzyPrefixLength; +} - return ret; +void QueryParser::setPhraseSlop(const int32_t _phraseSlop) { + phraseSlop = _phraseSlop; +} +int32_t QueryParser::getPhraseSlop() const { + return phraseSlop; +} +void QueryParser::setAllowLeadingWildcard(const bool _allowLeadingWildcard) { + allowLeadingWildcard = _allowLeadingWildcard; +} +bool QueryParser::getAllowLeadingWildcard() const { + return allowLeadingWildcard; +} +void QueryParser::setEnablePositionIncrements(const bool _enable) { + enablePositionIncrements = _enable; +} +bool QueryParser::getEnablePositionIncrements() const { + return enablePositionIncrements; +} +void QueryParser::setDefaultOperator(Operator _op) { + _operator = _op; +} +QueryParser::Operator QueryParser::getDefaultOperator() const { + return _operator; +} +void QueryParser::setLowercaseExpandedTerms(const bool _lowercaseExpandedTerms) { + lowercaseExpandedTerms = _lowercaseExpandedTerms; +} +bool QueryParser::getLowercaseExpandedTerms() const { + return lowercaseExpandedTerms; +} +void QueryParser::setUseOldRangeQuery(const bool _useOldRangeQuery) { + useOldRangeQuery = _useOldRangeQuery; +} +bool QueryParser::getUseOldRangeQuery() const { + return useOldRangeQuery; +} +void QueryParser::setDateResolution(const CL_NS(document)::DateTools::Resolution _dateResolution) { + dateResolution = _dateResolution; +} +void QueryParser::setDateResolution(const TCHAR* fieldName, const CL_NS(document)::DateTools::Resolution _dateResolution) { + if (fieldName == NULL) + _CLTHROWA(CL_ERR_IllegalArgument, "Field cannot be null."); + + if (fieldToDateResolution == NULL) { + // lazily initialize HashMap + fieldToDateResolution = _CLNEW CL_NS(util)::CLHashMap<const TCHAR*, + CL_NS(document)::DateTools::Resolution, + CL_NS(util)::Compare::TChar, + CL_NS(util)::Equals::TChar, + CL_NS(util)::Deletor::tcArray, + CL_NS(util)::Deletor::DummyInt32 + >(); } - Query* QueryParser::parse(BufferedReader* reader){ - //Func - Returns a parsed Query instance - //Pre - reader contains a valid reference to a Reader and manages the query string - //Post - A parsed Query instance has been returned or + fieldToDateResolution->put(fieldName, _dateResolution); +} +CL_NS(document)::DateTools::Resolution QueryParser::getDateResolution(const TCHAR* fieldName) const { + if (fieldName == NULL) + _CLTHROWA(CL_ERR_IllegalArgument,"Field cannot be null."); - //instantiate the TokenList tokens - TokenList _tokens; - this->tokens = &_tokens; + if (fieldToDateResolution == NULL) { + // no field specific date resolutions set; return default date resolution instead + return dateResolution; + } - //Instantiate a lexer - Lexer lexer(this, reader); + CL_NS(document)::DateTools::Resolution resolution = fieldToDateResolution->get(fieldName); + if (resolution == NULL) { + // no date resolutions set for the given field; return default date resolution instead + resolution = dateResolution; + } - //tokens = lexer.Lex(); - //Lex the tokens - lexer.Lex(tokens); + return resolution; +} - //Peek to the first token and check if is an EOF - if (tokens->peek()->Type == QueryToken::EOF_){ - // The query string failed to yield any tokens. We discard the - // TokenList tokens and raise an exceptioin. - QueryToken* token = this->tokens->extract(); - _CLDELETE(token); - _CLTHROWA(CL_ERR_Parse, "No query given."); - } +void QueryParser::addClause(std::vector<BooleanClause*>& clauses, int32_t conj, int32_t mods, Query* q){ + bool required, prohibited; - //Return the parsed Query instance - Query* ret = MatchQuery(field); - this->tokens = NULL; - return ret; + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + const uint32_t nPreviousClauses = clauses.size(); + if (nPreviousClauses > 0 && conj == CONJ_AND) { + BooleanClause* c = clauses[nPreviousClauses-1]; + if (!c->isProhibited()) + c->setOccur(BooleanClause::MUST); } - int32_t QueryParser::MatchConjunction(){ - //Func - matches for CONJUNCTION - // CONJUNCTION ::= <AND> | <OR> - //Pre - tokens != NULL - //Post - if the first token is an AND or an OR then - // the token is extracted and deleted and CONJ_AND or CONJ_OR is returned - // otherwise CONJ_NONE is returned + if (nPreviousClauses > 0 && _operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause* c = clauses[nPreviousClauses-1]; + if (!c->isProhibited()) + c->setOccur(BooleanClause::SHOULD); + } - CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == NULL) + return; - switch(tokens->peek()->Type){ - case QueryToken::AND_ : - //Delete the first token of tokenlist - ExtractAndDeleteToken(); - return CONJ_AND; - case QueryToken::OR : - //Delete the first token of tokenlist - ExtractAndDeleteToken(); - return CONJ_OR; - default : - return CONJ_NONE; + if (_operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); } + if (required && !prohibited) + clauses.push_back(_CLNEW BooleanClause(q,true, BooleanClause::MUST)); + else if (!required && !prohibited) + clauses.push_back(_CLNEW BooleanClause(q,true, BooleanClause::SHOULD)); + else if (!required && prohibited) + clauses.push_back(_CLNEW BooleanClause(q,true, BooleanClause::MUST_NOT)); + else { + _CLTHROWA(CL_ERR_Runtime, "Clause cannot be both required and prohibited"); + } +} - int32_t QueryParser::MatchModifier(){ - //Func - matches for MODIFIER - // MODIFIER ::= <PLUS> | <MINUS> | <NOT> - //Pre - tokens != NULL - //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned - // if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned - // otherwise MOD_NONE is returned - CND_PRECONDITION(tokens != NULL, "tokens is NULL"); +Query* QueryParser::getFieldQuery(const TCHAR* _field, const TCHAR* queryText) { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count - switch(tokens->peek()->Type){ - case QueryToken::PLUS : - //Delete the first token of tokenlist - ExtractAndDeleteToken(); - return MOD_REQ; - case QueryToken::MINUS : - case QueryToken::NOT : - //Delete the first token of tokenlist - ExtractAndDeleteToken(); - return MOD_NOT; - default : - return MOD_NONE; + StringReader reader(queryText); + TokenStream* source = analyzer->tokenStream(_field, &reader); + + CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v; + CL_NS(analysis)::Token* t = NULL; + int32_t positionCount = 0; + bool severalTokensAtSamePosition = false; + + while (true) { + t = NULL; + try { + t = source->next(t); } + _CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);_CLDELETE_LCARRAY(queryText);,{ + t = NULL; + }); + if (t == NULL) + break; + v.push_back(t); + if (t->getPositionIncrement() != 0) + positionCount += t->getPositionIncrement(); + else + severalTokensAtSamePosition = true; } + try { + source->close(); + } + _CLCATCH_ERR(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);_CLDELETE_LCARRAY(queryText);},/*ignore CL_ERR_IO */); + _CLLDELETE(source); - Query* QueryParser::MatchQuery(const TCHAR* field){ - //Func - matches for QUERY - // QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)* - //Pre - field != NULL - //Post - + if (v.size() == 0) + return NULL; + else if (v.size() == 1) { + Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer()); + Query* ret = _CLNEW TermQuery( tm ); + _CLDECDELETE(tm); + return ret; + } else { + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery* q = _CLNEW BooleanQuery(true); + for(size_t i=0; i<v.size(); i++ ){ + Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer()); + q->add(_CLNEW TermQuery(tm),BooleanClause::SHOULD); + _CLDECDELETE(tm); + } + return q; + } + else { + _CLDELETE_LCARRAY(queryText); + _CLTHROWA(CL_ERR_UnsupportedOperation, "MultiPhraseQuery NOT Implemented"); + /* + // TODO: phrase query: + MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery(); + mpq.setSlop(phraseSlop); + List multiTerms = new ArrayList(); + int32_t position = -1; + for (int32_t i = 0; i < v.size(); i++) { + t = (org.apache.lucene.analysis.Token) v.elementAt(i); + if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpq.add((Term[])multiTerms.toArray(new Term[0]),position); + } else { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += t.getPositionIncrement(); + multiTerms.add(_CLNEW Term(field, t.termText())); + } + if (enablePositionIncrements) { + mpq.add((Term[])multiTerms.toArray(new Term[0]),position); + } else { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + } + return mpq; + */ + } + } + else { + PhraseQuery* pq = _CLNEW PhraseQuery(); + pq->setSlop(phraseSlop); + int32_t position = -1; - CND_PRECONDITION(tokens != NULL, "tokens is NULL"); + for (size_t i = 0; i < v.size(); i++) { + t = v.at(i); + Term* tm = _CLNEW Term(_field, t->termBuffer()); + if (enablePositionIncrements) { + position += t->getPositionIncrement(); + pq->add(tm,position); + } else { + pq->add(tm); + } + _CLDECDELETE(tm); + } + return pq; + } + } +} - vector<BooleanClause*> clauses; +Query* QueryParser::getFieldQuery(const TCHAR* _field, const TCHAR* queryText, const int32_t slop) { + Query* query = getFieldQuery(_field, queryText); - Query* q = NULL; + if ( query && strcmp(query->getQueryName(),PhraseQuery::getClassName()) == 0) { + static_cast<PhraseQuery*>(query)->setSlop(slop); + } + /* + // TODO: Add MultiPhraseQuery support + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } + */ + return query; +} - int32_t mods = MOD_NONE; - int32_t conj = CONJ_NONE; +Query* QueryParser::getRangeQuery(const TCHAR* _field, TCHAR* part1, TCHAR* part2, const bool inclusive) +{ + if (lowercaseExpandedTerms) { + _tcslwr(part1); + _tcslwr(part2); + } + /* + // TODO: Complete porting of the code below + try { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); + df.setLenient(true); + Date d1 = df.parse(part1); + Date d2 = df.parse(part2); + if (inclusive) { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + Calendar cal = Calendar.getInstance(locale); + cal.setTime(d2); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.set(Calendar.MILLISECOND, 999); + d2 = cal.getTime(); + } + CL_NS(document)::DateTools::Resolution resolution = getDateResolution(_field); + if (resolution == NULL) { + // no default or field specific date resolution has been set, + // use deprecated DateField to maintain compatibilty with + // pre-1.9 Lucene versions. + part1 = DateField.dateToString(d1); + part2 = DateField.dateToString(d2); + } else { + part1 = CL_NS(document)::DateTools::dateToString(d1, resolution); + part2 = CL_NS(document)::DateTools::dateToString(d2, resolution); + } + } + catch (...) { } + */ - //match for MODIFIER - mods = MatchModifier(); + //if(useOldRangeQuery) + //{ + Term* t1 = _CLNEW Term(_field,part1); + Term* t2 = _CLNEW Term(_field,part2); + Query* ret = _CLNEW RangeQuery(t1, t2, inclusive); + _CLDECDELETE(t1); + _CLDECDELETE(t2); + return ret; + /*} + else + { + // TODO: Port ConstantScoreRangeQuery and enable this section + return _CLNEW ConstantScoreRangeQuery(_field,part1,part2,inclusive,inclusive); + }*/ +} - //match for CLAUSE - q = MatchClause(field); - AddClause(clauses, CONJ_NONE, mods, q); +Query* QueryParser::getBooleanQuery(std::vector<CL_NS(search)::BooleanClause*>& clauses, bool disableCoord) +{ + if (clauses.size()==0) { + return NULL; // all clause words were filtered away by the analyzer. + } + BooleanQuery* query = _CLNEW BooleanQuery(disableCoord); - // match for CLAUSE* - while(true){ - QueryToken* p = tokens->peek(); - if(p->Type == QueryToken::EOF_){ - QueryToken* qt = MatchQueryToken(QueryToken::EOF_); - _CLDELETE(qt); - break; - } + for (size_t i = 0; i < clauses.size(); i++) { + query->add(clauses[i]); + } + return query; +} - if(p->Type == QueryToken::RPAREN){ - //MatchQueryToken(QueryToken::RPAREN); - break; - } +Query* QueryParser::getWildcardQuery(const TCHAR* _field, TCHAR* termStr) +{ + if (_tcscmp(_T("*"), _field) == 0) { + if (_tcscmp(_T("*"), termStr) == 0) return NULL; + // TODO: Implement MatchAllDocsQuery + //return _CLNEW MatchAllDocsQuery(); + } + if (!allowLeadingWildcard && (termStr[0]==_T('*') || termStr[0]==_T('?'))){ + _CLDELETE_LCARRAY(termStr); + _CLTHROWT(CL_ERR_Parse,_T("'*' or '?' not allowed as first character in WildcardQuery")); + } + if (lowercaseExpandedTerms) { + _tcslwr(termStr); + } - //match for a conjuction (AND OR NOT) - conj = MatchConjunction(); - //match for a modifier - mods = MatchModifier(); + Term* t = _CLNEW Term(_field, termStr); + Query* q = _CLNEW WildcardQuery(t); + _CLDECDELETE(t); - q = MatchClause(field); - if ( q != NULL ) - AddClause(clauses, conj, mods, q); - } + return q; +} - // finalize query - if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL - BooleanClause* c = clauses[0]; - Query* q = c->getQuery(); +Query* QueryParser::getPrefixQuery(const TCHAR* _field, TCHAR* _termStr) +{ + if (!allowLeadingWildcard && _termStr[0] == _T('*')){ + _CLDELETE_LCARRAY(_termStr); + _CLTHROWT(CL_ERR_Parse,_T("'*' not allowed as first character in PrefixQuery")); + } + if (lowercaseExpandedTerms) { + _tcslwr(_termStr); + } + Term* t = _CLNEW Term(_field, _termStr); + Query *q = _CLNEW PrefixQuery(t); + _CLDECDELETE(t); + return q; +} - //Condition check to be sure clauses[0] is valid - CND_CONDITION(c != NULL, "c is NULL"); +Query* QueryParser::getFuzzyQuery(const TCHAR* _field, TCHAR* termStr, const float_t minSimilarity) +{ + if (lowercaseExpandedTerms) { + _tcslwr(termStr); + } - //Tell the boolean clause not to delete its query - c->deleteQuery=false; - //Clear the clauses list - clauses.clear(); - _CLDELETE(c); + Term* t = _CLNEW Term(_field, termStr); + Query *q = _CLNEW FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + _CLDECDELETE(t); + return q; +} - return q; - }else{ - return GetBooleanQuery(clauses); - } +TCHAR* QueryParser::discardEscapeChar(TCHAR* input, TCHAR* output) { + // Create char array to hold unescaped char sequence + const size_t inputLen = _tcslen(input); + bool outputOwned=false; + if (output == NULL){ + output = _CL_NEWARRAY(TCHAR, inputLen + 1); + outputOwned=true; } - Query* QueryParser::MatchClause(const TCHAR* field){ - //Func - matches for CLAUSE - // CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>)) - //Pre - field != NULL - //Post - + // The length of the output can be less than the input + // due to discarded escape chars. This variable holds + // the actual length of the output + int32_t length = 0; - Query* q = NULL; - const TCHAR* sfield = field; - bool delField = false; + // We remember whether the last processed character was + // an escape character + bool lastCharWasEscapeChar = false; - QueryToken *DelToken = NULL; + // The multiplier the current unicode digit must be multiplied with. + // E. g. the first digit must be multiplied with 16^3, the second with 16^2... + uint32_t codePointMultiplier = 0; - //match for [TERM <COLON>] - QueryToken* term = tokens->extract(); - if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){ - DelToken = MatchQueryToken(QueryToken::COLON); + // Used to calculate the codepoint of the escaped unicode character + int32_t codePoint = 0; - CND_CONDITION(DelToken != NULL,"DelToken is NULL"); - _CLDELETE(DelToken); - - TCHAR* tmp = STRDUP_TtoT(term->Value); - discardEscapeChar(tmp); - delField = true; - sfield = tmp; - _CLDELETE(term); - }else{ - tokens->push(term); - term = NULL; + for (size_t i = 0; i < in... [truncated message content] |
From: <syn...@us...> - 2009-05-02 19:53:48
|
Revision: 3003 http://clucene.svn.sourceforge.net/clucene/?rev=3003&view=rev Author: synhershko Date: 2009-05-02 19:53:35 +0000 (Sat, 02 May 2009) Log Message: ----------- renaming macro to _CLCATCH_ERR and fixing a typo Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/debug/error.h Modified: branches/lucene2_3_2/src/core/CLucene/debug/error.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/debug/error.h 2009-04-28 11:27:08 UTC (rev 3002) +++ branches/lucene2_3_2/src/core/CLucene/debug/error.h 2009-05-02 19:53:35 UTC (rev 3003) @@ -78,7 +78,7 @@ //#define _THROWS //does nothing #define _TRY try - #define _CLCATCH_ERR(err_num, cleanup, x) catch(CLuceneError& err){if (err.number!=err_num){cleanup;throw err;}else {x;}} + #define _CLCATCH_ERR(err_num, cleanup, x) catch(CLuceneError& err){if (err.number()!=err_num){cleanup;throw err;}else {x;}} #define _CLFINALLY(x) catch(...){ x; throw; } x //note: code x is not run if return is called #define _CLTHROWA(number, str) throw CLuceneError(number, str,false) #define _CLTHROWT(number, str) throw CLuceneError(number, str,false) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-28 11:27:20
|
Revision: 3002 http://clucene.svn.sourceforge.net/clucene/?rev=3002&view=rev Author: synhershko Date: 2009-04-28 11:27:08 +0000 (Tue, 28 Apr 2009) Log Message: ----------- Fixing initialization issue in BooleanClause, introduced in rev. 2668 Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp Modified: branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp 2009-04-22 18:12:11 UTC (rev 3001) +++ branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp 2009-04-28 11:27:08 UTC (rev 3002) @@ -511,7 +511,7 @@ } BooleanClause::BooleanClause(Query* q, const bool DeleteQuery, Occur o): - query(query), + query(q), occur(o), deleteQuery(DeleteQuery) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-22 18:12:21
|
Revision: 3001 http://clucene.svn.sourceforge.net/clucene/?rev=3001&view=rev Author: synhershko Date: 2009-04-22 18:12:11 +0000 (Wed, 22 Apr 2009) Log Message: ----------- Tweaking CLuceneError a bit Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/debug/error.cpp branches/lucene2_3_2/src/core/CLucene/debug/error.h Modified: branches/lucene2_3_2/src/core/CLucene/debug/error.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/debug/error.cpp 2009-04-22 17:54:32 UTC (rev 3000) +++ branches/lucene2_3_2/src/core/CLucene/debug/error.cpp 2009-04-22 18:12:11 UTC (rev 3001) @@ -43,8 +43,8 @@ this->_twhat = STRDUP_TtoT(clone._twhat); } CLuceneError::~CLuceneError() throw(){ - _CLDELETE_CARRAY(_twhat); - _CLDELETE_CaARRAY(_awhat); + _CLDELETE_LCARRAY(_twhat); + _CLDELETE_LCaARRAY(_awhat); } char* CLuceneError::what(){ #ifdef _ASCII Modified: branches/lucene2_3_2/src/core/CLucene/debug/error.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/debug/error.h 2009-04-22 17:54:32 UTC (rev 3000) +++ branches/lucene2_3_2/src/core/CLucene/debug/error.h 2009-04-22 18:12:11 UTC (rev 3001) @@ -55,29 +55,30 @@ */ #else - class CLUCENE_EXPORT CLuceneError - { +class CLUCENE_EXPORT CLuceneError +{ char* _awhat; TCHAR* _twhat; - int error_number; - public: - CLuceneError(); - CLuceneError(const CLuceneError& clone); - CLuceneError(int num, const char* str, bool ownstr); + int error_number; +public: + CLuceneError(); + CLuceneError(const CLuceneError& clone); + CLuceneError(int num, const char* str, bool ownstr); #ifdef _UCS2 - CLuceneError(int num, const TCHAR* str, bool ownstr); + CLuceneError(int num, const TCHAR* str, bool ownstr); #endif - int number(){return error_number;} - char* what(); - TCHAR* twhat(); - ~CLuceneError() throw(); + int number(){return error_number;} + char* what(); + TCHAR* twhat(); + ~CLuceneError() throw(); - void set(int num, const char*, bool ownstr=false); - void set(int num, const TCHAR*, bool ownstr=false); - }; + void set(int num, const char*, bool ownstr=false); + void set(int num, const TCHAR*, bool ownstr=false); +}; //#define _THROWS //does nothing #define _TRY try + #define _CLCATCH_ERR(err_num, cleanup, x) catch(CLuceneError& err){if (err.number!=err_num){cleanup;throw err;}else {x;}} #define _CLFINALLY(x) catch(...){ x; throw; } x //note: code x is not run if return is called #define _CLTHROWA(number, str) throw CLuceneError(number, str,false) #define _CLTHROWT(number, str) throw CLuceneError(number, str,false) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-22 17:54:37
|
Revision: 3000 http://clucene.svn.sourceforge.net/clucene/?rev=3000&view=rev Author: synhershko Date: 2009-04-22 17:54:32 +0000 (Wed, 22 Apr 2009) Log Message: ----------- signed/unsigned mismatch fix Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp Modified: branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp 2009-04-22 16:59:00 UTC (rev 2999) +++ branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp 2009-04-22 17:54:32 UTC (rev 3000) @@ -102,7 +102,7 @@ { //now see if term->text() starts with prefixText - int32_t termLen = lastTerm->textLength(); + size_t termLen = lastTerm->textLength(); if ( prefixLen>termLen ) break; //the prefix is longer than the term, can't be matched @@ -243,7 +243,7 @@ lastTerm = enumerator->term(false); if (lastTerm != NULL && lastTerm->field() == prefixField ){ //now see if term->text() starts with prefixText - int32_t termLen = lastTerm->textLength(); + size_t termLen = lastTerm->textLength(); if ( prefixLen>termLen ) break; //the prefix is longer than the term, can't be matched This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-22 16:59:14
|
Revision: 2999 http://clucene.svn.sourceforge.net/clucene/?rev=2999&view=rev Author: synhershko Date: 2009-04-22 16:59:00 +0000 (Wed, 22 Apr 2009) Log Message: ----------- Added cl_stprintf Modified Paths: -------------- branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h Modified: branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2009-04-22 16:56:58 UTC (rev 2998) +++ branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2009-04-22 16:59:00 UTC (rev 2999) @@ -33,9 +33,11 @@ #ifdef _CL_HAVE_SAFE_CRT #define cl_sprintf sprintf_s + #define cl_stprintf _stprintf_s #define cl_strcpy(Dst,Src,DstLen) strcpy_s(Dst,DstLen,Src) #else #define cl_sprintf _snprintf + #define cl_stprintf _sntprintf #define cl_strcpy(Dst,Src,DstLen) strcpy(Dst,Src) #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-22 16:57:08
|
Revision: 2998 http://clucene.svn.sourceforge.net/clucene/?rev=2998&view=rev Author: synhershko Date: 2009-04-22 16:56:58 +0000 (Wed, 22 Apr 2009) Log Message: ----------- Comment correction Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/util/_FastCharStream.h Modified: branches/lucene2_3_2/src/core/CLucene/util/_FastCharStream.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/_FastCharStream.h 2009-04-20 16:19:18 UTC (rev 2997) +++ branches/lucene2_3_2/src/core/CLucene/util/_FastCharStream.h 2009-04-22 16:56:58 UTC (rev 2998) @@ -21,7 +21,7 @@ int64_t resetPos; int32_t col; int32_t line; - // read character from stream return false on error + // read character from stream throws an exception on error void readChar(TCHAR &); public: BufferedReader* input; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-20 16:19:28
|
Revision: 2997 http://clucene.svn.sourceforge.net/clucene/?rev=2997&view=rev Author: ustramooner Date: 2009-04-20 16:19:18 +0000 (Mon, 20 Apr 2009) Log Message: ----------- what is ben working on? Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/files_list.txt Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-20 12:54:21
|
Revision: 2996 http://clucene.svn.sourceforge.net/clucene/?rev=2996&view=rev Author: ustramooner Date: 2009-04-20 12:54:17 +0000 (Mon, 20 Apr 2009) Log Message: ----------- support dmalloc. a few linux fixes Modified Paths: -------------- branches/lucene2_3_2/CMakeLists.txt branches/lucene2_3_2/INSTALL branches/lucene2_3_2/cmake/DefineOptions.cmake branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/demo/CMakeLists.txt branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h branches/lucene2_3_2/src/shared/CMakeLists.txt branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/testall.cpp Modified: branches/lucene2_3_2/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -50,6 +50,9 @@ OPTION(DISABLE_MULTITHREADING "disable multithreading - remove all locking code" OFF) +OPTION(ENABLE_DMALLOC + "enable dmalloc memory leak checker" + OFF) OPTION(ENABLE_ASCII_MODE "enable ascii support" OFF) Modified: branches/lucene2_3_2/INSTALL =================================================================== --- branches/lucene2_3_2/INSTALL 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/INSTALL 2009-04-20 12:54:17 UTC (rev 2996) @@ -140,6 +140,21 @@ #valgrind --leak-check=full <program> +Memory leak tracking with dmalloc +--------------------------------- +dmalloc (http://dmalloc.com/) is also a nice tool for finding memory leaks. +To enable, set the ENABLE_DMALLOC flag to ON in cmake. You will of course +have to have the dmalloc lib installed for this to work. + +The cl_test file will by default print a low number of errors and leaks into +the dmalloc.log.txt file (however, this has a tendency to print false positives). +You can override this by setting your environment variable DMALLOC_OPTIONS. +See http://dmalloc.com/ or dmalloc --usage for more information on how to use dmalloc + +For example: +# DMALLOC_OPTIONS=medium,log=dmalloc.log.txt +# export DMALLOC_OPTIONS + Performance with gprof ---------------------- Compile with gprof turned on (ENABLE_GPROF in cmake gui or using ccmake). Modified: branches/lucene2_3_2/cmake/DefineOptions.cmake =================================================================== --- branches/lucene2_3_2/cmake/DefineOptions.cmake 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/cmake/DefineOptions.cmake 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,26 +1,35 @@ #define global options, this makes it easy to use ccmake, or the cmake gui -MACRO (DEFINE_OPTIONS extraOptions) - IF(ENABLE_DEBUG) - SET (${extraOptions} "${${extraOptions}} -D_DEBUG") - ENDIF(ENABLE_DEBUG) - - IF(ENABLE_MMAP) - SET (${extraOptions} "${${extraOptions}} -DLUCENE_FS_MMAP") - ENDIF(ENABLE_MMAP) - - IF(DISABLE_MULTITHREADING) - SET (${extraOptions} "${${extraOptions}} -D_CL_DISABLE_MULTITHREADING") - ELSE(DISABLE_MULTITHREADING) - SET(${extraOptions} "${${extraOptions}} -D_REENTRANT") - ENDIF(DISABLE_MULTITHREADING) - - IF(ENABLE_ASCII_MODE) - SET (${extraOptions} "${${extraOptions}} -D_ASCII") - ELSE(ENABLE_ASCII_MODE) - SET (${extraOptions} "${${extraOptions}} -D_UCS2") - SET (${extraOptions} "${${extraOptions}} -D_UNICODE") - ENDIF(ENABLE_ASCII_MODE) +MACRO (DEFINE_OPTIONS extraOptions extraLibs) + IF(ENABLE_DEBUG) + SET (${extraOptions} "${${extraOptions}} -D_DEBUG") + ENDIF(ENABLE_DEBUG) + IF(ENABLE_MMAP) + SET (${extraOptions} "${${extraOptions}} -DLUCENE_FS_MMAP") + ENDIF(ENABLE_MMAP) + + IF(ENABLE_DMALLOC) + SET (${extraOptions} "${${extraOptions}} -DDMALLOC") + IF ( DISABLE_MULTITHREADING ) + SET (${extraLibs} ${${extraLibs}} "dmalloccxx") + ELSE( DISABLE_MULTITHREADING ) + SET (${extraLibs} ${${extraLibs}} "dmallocthcxx") + ENDIF ( DISABLE_MULTITHREADING ) + ENDIF(ENABLE_DMALLOC) + + IF(DISABLE_MULTITHREADING) + SET (${extraOptions} "${${extraOptions}} -D_CL_DISABLE_MULTITHREADING") + ELSE(DISABLE_MULTITHREADING) + SET(${extraOptions} "${${extraOptions}} -D_REENTRANT") + ENDIF(DISABLE_MULTITHREADING) + + IF(ENABLE_ASCII_MODE) + SET (${extraOptions} "${${extraOptions}} -D_ASCII") + ELSE(ENABLE_ASCII_MODE) + SET (${extraOptions} "${${extraOptions}} -D_UCS2") + SET (${extraOptions} "${${extraOptions}} -D_UNICODE") + ENDIF(ENABLE_ASCII_MODE) + IF ( MSVC80 OR MSVC90) #todo: remove this once crt functions are fixed... SET (${extraOptions} "${${extraOptions}} -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE") Modified: branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,7 +1,7 @@ PROJECT(clucene-benchmarker) INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS}) file(GLOB_RECURSE benchmarker_HEADERS ${clucene-benchmarker_SOURCE_DIR}/*.h) @@ -17,4 +17,4 @@ ) ADD_EXECUTABLE(cl_benchmarker EXCLUDE_FROM_ALL ${benchmarker_files} ) -TARGET_LINK_LIBRARIES(cl_benchmarker clucene-core clucene-shared) +TARGET_LINK_LIBRARIES(cl_benchmarker clucene-core clucene-shared ${EXTRA_LIBS}) Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,7 +1,7 @@ PROJECT(clucene-contribs-lib-test) INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS}) INCLUDE_DIRECTORIES( ${clucene-contribs-lib-test_SOURCE_DIR} ) @@ -29,5 +29,5 @@ #link the executable against the releavent clucene-shared library (if we aren't using the object files) IF ( NOT USE_SHARED_OBJECT_FILES ) - TARGET_LINK_LIBRARIES(cl_contribs-lib-test clucene-core clucene-shared clucene-contribs-lib) + TARGET_LINK_LIBRARIES(cl_contribs-lib-test clucene-core clucene-shared clucene-contribs-lib ${EXTRA_LIBS}) ENDIF ( NOT USE_SHARED_OBJECT_FILES ) Modified: branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,7 +1,7 @@ PROJECT(clucene-contribs-lib) INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS} -DMAKE_CLUCENE_CONTRIBS_LIB) set(CMAKE_MODULE_PATH "${clucene-contribs-lib_SOURCE_DIR}/cmake") @@ -71,7 +71,7 @@ ./CLucene/snowball/src_c/stem_UTF_8_spanish.c ./CLucene/snowball/src_c/stem_UTF_8_swedish.c ) -SET ( clucene_contrib_extras clucene-core clucene-shared ) +SET ( clucene_contrib_extra_libs clucene-core clucene-shared ${EXTRA_LIBS}) #find our headers file(GLOB_RECURSE HEADERS ${clucene-contribs-lib_SOURCE_DIR}/*.h) @@ -82,7 +82,7 @@ MESSAGE ( FATAL "ZLib not found" ) ENDIF ( NOT ZLIB_FOUND ) INCLUDE_DIRECTORIES( ${ZLIB_INCLUDE_DIR} ) -SET ( clucene_contrib_extras "${clucene_contrib_extras}" ${ZLIB_LIBRARIES} ) +SET ( clucene_contrib_extra_libs "${clucene_contrib_extra_libs}" ${ZLIB_LIBRARIES} ) find_package(Iconv) #find_package(Strigi) @@ -99,7 +99,7 @@ add_library(clucene-contribs-lib SHARED ${clucene_contribs_Files} ${clucene_shared_Files} ${HEADERS} ) -TARGET_LINK_LIBRARIES(clucene-contribs-lib ${clucene_contrib_extras}) +TARGET_LINK_LIBRARIES(clucene-contribs-lib ${clucene_contrib_extra_libs}) #set properties on the libraries SET_TARGET_PROPERTIES(clucene-contribs-lib PROPERTIES Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp 2009-04-20 12:54:17 UTC (rev 2996) @@ -137,7 +137,7 @@ //Delete the buffer if necessary - free(buffer); + if ( buffer != NULL ) free(buffer); //Delete termInfo if necessary _CLDELETE(termInfo); Modified: branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-04-20 12:54:17 UTC (rev 2996) @@ -330,7 +330,7 @@ * @throws IOException */ //bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result); - CL_NS(util)::ObjectArray<TermFreqVector>* SegmentReader::getTermFreqVectors(int32_t docNumber); + CL_NS(util)::ObjectArray<TermFreqVector>* getTermFreqVectors(int32_t docNumber); private: //Open all norms files for all fields void openNorms(CL_NS(store)::Directory* cfsDir); Modified: branches/lucene2_3_2/src/core/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/core/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/core/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -2,7 +2,7 @@ #define command line options INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS} -DMAKE_CLUCENE_CORE_LIB) #add the files to our groups and core @@ -148,7 +148,7 @@ #link the clucene-core library against the releavent clucene-shared library (if we aren't using the object files) IF ( NOT USE_SHARED_OBJECT_FILES ) - TARGET_LINK_LIBRARIES(clucene-core clucene-shared) + TARGET_LINK_LIBRARIES(clucene-core clucene-shared ${EXTRA_LIBS}) ENDIF ( NOT USE_SHARED_OBJECT_FILES ) Modified: branches/lucene2_3_2/src/demo/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/demo/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/demo/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,7 +1,7 @@ PROJECT(cl_demo) INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS}) INCLUDE_DIRECTORIES( ${clucene-demo_SOURCE_DIR} ) @@ -21,4 +21,4 @@ ${demo_HEADERS} ) -TARGET_LINK_LIBRARIES(cl_demo clucene-core clucene-shared) +TARGET_LINK_LIBRARIES(cl_demo clucene-core clucene-shared ${EXTRA_LIBS}) Modified: branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h 2009-04-20 12:54:17 UTC (rev 2996) @@ -87,7 +87,7 @@ #define CL_CLASS_DEF(sub,clazz) namespace lucene { namespace sub{ class clazz; } } #define CL_CLASS_DEF2(sub,sub2, clazz) namespace lucene { namespace sub{ namespace sub2{ class clazz; } } } - #define CL_TEMPATE_DEF(sub, clazz, typedefs) namespace lucene { namespace sub{ template<typedefs> class clazz; }} + #define CL_TEMPATE_DEF(sub, clazz, typedefs) namespace lucene { namespace sub{ template<typedefs> class clazz; }} #define CL_TYPE_DEF(sub, clazz, def) namespace lucene { namespace sub{ typedef def clazz; }} #else #define CL_NS_DEF(sub) @@ -170,14 +170,14 @@ //////////////////////////////////////////////////////// -//todo: put this logic in cmake +//todo: put this logic in cmake #if defined(_MSC_VER) #if _MSC_FULL_VER >= 140050320 #define _CL_DEPRECATE_TEXT(_Text) __declspec(deprecated(_Text)) #elif _MSC_VER >= 1300 #define _CL_DEPRECATE_TEXT(_Text) __declspec(deprecated) #else - #define _CL_DEPRECATE_TEXT(_Text) + #define _CL_DEPRECATE_TEXT(_Text) #endif #elif (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) #define _CL_DEPRECATE_TEXT(_Text) __attribute__((__deprecated__)) @@ -196,4 +196,10 @@ //memory handling macros/functions #include "CLucene/debug/mem.h" +#ifdef DMALLOC + #include <stdlib.h> + #include <string.h> + #include <dmalloc.h> +#endif + #endif //lucene_sharedheader_h Modified: branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2009-04-20 12:54:17 UTC (rev 2996) @@ -24,6 +24,7 @@ #include "CLucene/LuceneThreads.h" #include "CLucene/config/repl_tchar.h" #include "CLucene/config/repl_wchar.h" +#include "CLucene/config/repl_wctype.h" //replacements for functions #define cl_min(a,b) ((a)>(b) ? (b) : (a)) #define cl_min3(a,b,c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) @@ -56,9 +57,6 @@ //if a wide character is being converted to a ascii character and it //cannot fit, this character is used instead. #define LUCENE_OOR_CHAR(c) ((char)(((unsigned short)c)&0xFF)) + - -#include "CLucene/config/repl_tchar.h" //replacements for functions -#include "CLucene/config/repl_wctype.h" //replacements for functions - #endif //lucene_internal_sharedheader_h Modified: branches/lucene2_3_2/src/shared/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/shared/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/shared/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -2,7 +2,7 @@ #define command line options INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS} -DMAKE_CLUCENE_SHARED_LIB) # include specific modules @@ -302,6 +302,9 @@ SOVERSION ${CLUCENE_SOVERSION} COMPILE_DEFINITIONS_DEBUG _DEBUG ) +IF ( ${EXTRA_LIBS} ) + TARGET_LINK_LIBRARIES(clucene-shared ${EXTRA_LIBS}) +ENDIF ( ${EXTRA_LIBS} ) install(TARGETS clucene-shared DESTINATION lib Modified: branches/lucene2_3_2/src/test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/test/CMakeLists.txt 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/test/CMakeLists.txt 2009-04-20 12:54:17 UTC (rev 2996) @@ -1,7 +1,7 @@ PROJECT(clucene-test) INCLUDE (DefineOptions) -DEFINE_OPTIONS(EXTRA_OPTIONS) +DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS}) INCLUDE_DIRECTORIES( ${clucene-test_SOURCE_DIR} ) @@ -53,7 +53,7 @@ #link the executable against the releavent clucene-shared library (if we aren't using the object files) IF ( NOT USE_SHARED_OBJECT_FILES ) - TARGET_LINK_LIBRARIES(cl_test clucene-core clucene-shared) + TARGET_LINK_LIBRARIES(cl_test clucene-core clucene-shared ${EXTRA_LIBS}) ENDIF ( NOT USE_SHARED_OBJECT_FILES ) ############################ Modified: branches/lucene2_3_2/src/test/testall.cpp =================================================================== --- branches/lucene2_3_2/src/test/testall.cpp 2009-04-16 17:00:05 UTC (rev 2995) +++ branches/lucene2_3_2/src/test/testall.cpp 2009-04-20 12:54:17 UTC (rev 2996) @@ -39,6 +39,17 @@ _crtBreakAlloc=-1; #endif #endif + + #ifdef DMALLOC + if ( getenv("DMALLOC_OPTIONS") == NULL ){ + dmalloc_debug_setup("low,log=dmalloc.log.txt"); + }else{ + //apparently cygwin has to have this code.... + dmalloc_debug_setup(getenv("DMALLOC_OPTIONS")); + } + #endif + + int ret_result = 0; int i=0; int exclude = 0; @@ -203,8 +214,7 @@ _CLDELETE_CaARRAY(cl_tempDir) _lucene_shutdown(); //clears all static memory - //print lucenebase debug - + if ( ret_result != 0 ) return ret_result; else This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-16 17:00:08
|
Revision: 2995 http://clucene.svn.sourceforge.net/clucene/?rev=2995&view=rev Author: ustramooner Date: 2009-04-16 17:00:05 +0000 (Thu, 16 Apr 2009) Log Message: ----------- fix contribs tests Modified Paths: -------------- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp 2009-04-16 16:59:19 UTC (rev 2994) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp 2009-04-16 17:00:05 UTC (rev 2995) @@ -245,7 +245,7 @@ CuSuite *testhighlighter(void) { CuSuite *suite = CuSuiteNew(_T("CLucene Highlight Test")); - (suite, setupHighlighter); + SUITE_ADD_TEST(suite, setupHighlighter); SUITE_ADD_TEST(suite, testSimpleHighlighter); SUITE_ADD_TEST(suite, testGetBestFragmentsSimpleQuery); Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp 2009-04-16 16:59:19 UTC (rev 2994) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp 2009-04-16 17:00:05 UTC (rev 2995) @@ -40,7 +40,7 @@ InputStream* sb2 = doc2.getField(_T("test"))->streamValue(); GZipInputStream zip2(sb2, GZipInputStream::ZLIBFORMAT); - int rd = zip2.read(tmp, 1, 0); + int rd = zip2.read(tmp, 100000, 0); std::string str((const char*) tmp, rd); CLUCENE_ASSERT(str.compare(str2) == 0); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-16 16:59:24
|
Revision: 2994 http://clucene.svn.sourceforge.net/clucene/?rev=2994&view=rev Author: ustramooner Date: 2009-04-16 16:59:19 +0000 (Thu, 16 Apr 2009) Log Message: ----------- export contribs-lib properly for windows Modified Paths: -------------- branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h Modified: branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h 2009-04-16 16:57:04 UTC (rev 2993) +++ branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h 2009-04-16 16:59:19 UTC (rev 2994) @@ -145,7 +145,7 @@ #else #define CLUCENE_EXPORT CLUCENE_IMPORT_DECL #endif -#if defined(clucene_contribs_EXPORTS) +#if defined(clucene_contribs_lib_EXPORTS) #define CLUCENE_CONTRIBS_EXPORT CLUCENE_EXPORT_DECL #define CLUCENE_LOCAL CLUCENE_LOCAL_DECL #elif defined(MAKE_CLUCENE_CONTRIBS_LIB) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-16 16:57:14
|
Revision: 2993 http://clucene.svn.sourceforge.net/clucene/?rev=2993&view=rev Author: ustramooner Date: 2009-04-16 16:57:04 +0000 (Thu, 16 Apr 2009) Log Message: ----------- fix for new stream mechanism Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2009-04-16 16:46:14 UTC (rev 2992) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2009-04-16 16:57:04 UTC (rev 2993) @@ -169,7 +169,7 @@ //how do wemake sure we read the entire index in now??? //todo: we need to have a max amount, and guarantee its all in or throw an error.. //todo: make this value configurable.... - int32_t rl = stream->read(sd, sz, 0); + int32_t rl = stream->read(sd, sz, 1); if ( rl < 0 ){ fieldsStream->writeVInt(0); //todo: could we detect this earlier and not actually write the field?? @@ -183,11 +183,15 @@ }else if ( field->stringValue() == NULL ){ //we must be using readerValue CND_PRECONDITION(!field->isIndexed(), "Cannot store reader if it is indexed too") - Reader* r = field->readerValue(); + Reader* r = field->readerValue(); + int32_t sz = r->size(); + if ( sz < 0 ) + sz = 10000000; //todo: we should warn the developer here.... + //read the entire string const TCHAR* rv; - int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE, 0); + int64_t rl = r->read(rv, sz, 1); if ( rl > LUCENE_INT32_MAX_SHOULDBE ) _CLTHROWA(CL_ERR_Runtime,"Field length too long"); else if ( rl < 0 ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-16 16:46:17
|
Revision: 2992 http://clucene.svn.sourceforge.net/clucene/?rev=2992&view=rev Author: ustramooner Date: 2009-04-16 16:46:14 +0000 (Thu, 16 Apr 2009) Log Message: ----------- fix tests, fix code for windows Modified Paths: -------------- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Encoder.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Formatter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Fragmenter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/HighlightScorer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Scorer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleFragmenter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLEncoder.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TextFragment.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/WeightedTerm.h branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Encoder.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Encoder.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Encoder.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -23,7 +23,7 @@ * Encodes original text. The Encoder works with the Formatter to generate the output. * */ -class Encoder:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT Encoder:LUCENE_BASE { public: /** Virtual destructor */ Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Formatter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Formatter.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Formatter.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -25,7 +25,7 @@ * of mark-up to highlight terms in HTML search results pages. * */ -class Formatter:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT Formatter:LUCENE_BASE { public: Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Fragmenter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Fragmenter.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Fragmenter.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -27,7 +27,7 @@ * by the {@link Highlighter} class. A sophisticated implementation may do this on the basis * of detecting end of sentences in the text. */ -class Fragmenter:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT Fragmenter:LUCENE_BASE { public: /** Virtual destructor */ Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/HighlightScorer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/HighlightScorer.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/HighlightScorer.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -27,7 +27,7 @@ /** * Adds to the score for a fragment based on its tokens */ -class HighlightScorer:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT HighlightScorer:LUCENE_BASE { public: virtual ~HighlightScorer(){ Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -41,7 +41,7 @@ * and tokenizers. * {@link Encoder} and tokenizers. */ -class Highlighter :LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT Highlighter :LUCENE_BASE { private: int32_t maxDocBytesToAnalyze; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp 2009-04-16 16:46:14 UTC (rev 2992) @@ -82,7 +82,7 @@ float_t QueryScorer::getTokenScore(Token * token) { - const TCHAR* termText=token->termText(); + const TCHAR* termText=token->termBuffer(); const WeightedTerm* queryTerm = _termsToFind.get(termText); if(queryTerm==NULL) Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -39,7 +39,7 @@ */ //TODO: provide option to boost score of fragments near beginning of document // based on fragment.getFragNum() -class QueryScorer : public HighlightScorer +class CLUCENE_CONTRIBS_EXPORT QueryScorer : public HighlightScorer { private: TextFragment * _currentTextFragment; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -34,7 +34,7 @@ * expanded terms. * */ -class QueryTermExtractor +class CLUCENE_CONTRIBS_EXPORT QueryTermExtractor { QueryTermExtractor(){ } Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Scorer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Scorer.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Scorer.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -28,10 +28,10 @@ class TextFragment; -class Scorer +class CLUCENE_CONTRIBS_EXPORT Scorer { public: - virtual ~Scorer() = 0; + virtual ~Scorer(){}; /** * called when a new fragment is started for consideration * @param newFragment Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleFragmenter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleFragmenter.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleFragmenter.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -27,7 +27,7 @@ * fragments with no concerns over spotting sentence boundaries. */ -class SimpleFragmenter:public Fragmenter +class CLUCENE_CONTRIBS_EXPORT SimpleFragmenter:public Fragmenter { private: LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_FRAGMENT_SIZE =100 ); Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLEncoder.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLEncoder.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLEncoder.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -26,7 +26,7 @@ * Simple {@link Encoder} implementation to escape text for HTML output * */ -class SimpleHTMLEncoder:public Encoder +class CLUCENE_CONTRIBS_EXPORT SimpleHTMLEncoder:public Encoder { public: SimpleHTMLEncoder(void); Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -26,7 +26,7 @@ * Simple {@link Formatter} implementation to highlight terms with a pre and post tag * */ -class SimpleHTMLFormatter :public Formatter +class CLUCENE_CONTRIBS_EXPORT SimpleHTMLFormatter :public Formatter { private: const TCHAR* _preTag; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TextFragment.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TextFragment.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TextFragment.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -27,7 +27,7 @@ * Low-level class used to record information about a section of a document * with a score. */ -class TextFragment:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT TextFragment:LUCENE_BASE { int32_t _fragNum; int32_t _textStartPos; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp 2009-04-16 16:46:14 UTC (rev 2992) @@ -47,7 +47,7 @@ startOffset=cl_min(startOffset,token->startOffset()); endOffset=cl_max(endOffset,token->endOffset()); } - tokens[numTokens].set(token->termText(),token->startOffset(),token->endOffset(),token->type());; + tokens[numTokens].set(token->termBuffer(),token->startOffset(),token->endOffset(),token->type());; scores[numTokens]=score; numTokens++; } Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -26,7 +26,7 @@ * One, or several overlapping tokens, along with the score(s) and the * scope of the original text */ -class TokenGroup: LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT TokenGroup: LUCENE_BASE { LUCENE_STATIC_CONSTANT(int32_t,MAX_NUM_TOKENS_PER_GROUP=50); CL_NS(analysis)::Token* tokens; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2009-04-16 16:46:14 UTC (rev 2992) @@ -98,7 +98,7 @@ } */ //code to reconstruct the original sequence of Tokens const TCHAR** terms=tpv->getTerms(); - const Array<int32_t>* freq=tpv->getTermFrequencies(); + const ValueArray<int32_t>* freq=tpv->getTermFrequencies(); size_t totalTokens=0; for (int32_t i = 0; i < freq->length; i++) @@ -108,11 +108,11 @@ CLSetList<Token*,TokenOrderCompare>* unsortedTokens = NULL; for (int32_t t = 0; t < freq->length; t++) { - Array<TermVectorOffsetInfo>* offsets=tpv->getOffsets(t); + ObjectArray<TermVectorOffsetInfo>* offsets=tpv->getOffsets(t); if(offsets==NULL) return NULL; - Array<int32_t>* pos=NULL; + ValueArray<int32_t>* pos=NULL; if(tokenPositionsGuaranteedContiguous) { //try get the token position info to speed up assembly of tokens into sorted sequence @@ -130,8 +130,8 @@ for (int32_t tp=0; tp < offsets->length; tp++) { unsortedTokens->insert(_CLNEW Token(terms[t], - (*offsets)[tp].getStartOffset(), - (*offsets)[tp].getEndOffset())); + (*offsets)[tp]->getStartOffset(), + (*offsets)[tp]->getEndOffset())); } } else @@ -145,8 +145,8 @@ for (int32_t tp = 0; tp < pos->length; tp++) { tokensInOriginalOrder[(*pos)[tp]]=_CLNEW Token(terms[t], - (*offsets)[tp].getStartOffset(), - (*offsets)[tp].getEndOffset()); + (*offsets)[tp]->getStartOffset(), + (*offsets)[tp]->getEndOffset()); } } } @@ -192,8 +192,9 @@ //convenience method TokenStream* TokenSources::getTokenStream(IndexReader* reader,int32_t docId, TCHAR* field,Analyzer* analyzer) { - CL_NS(document)::Document* doc=reader->document(docId); - const TCHAR* contents=doc->get(field); + CL_NS(document)::Document doc; + reader->document(docId, doc); + const TCHAR* contents=doc.get(field); if(contents==NULL) { TCHAR buf[250]; @@ -218,7 +219,7 @@ } Token* t = tokens[currentToken++]; - token->set(t->termText(),t->startOffset(),t->endOffset(),t->type());; + token->set(t->termBuffer(),t->startOffset(),t->endOffset(),t->type());; return true; } void TokenSources::StoredTokenStream::close(){ Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -16,7 +16,7 @@ CL_NS_DEF2(search,highlight) -class TokenSources: LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT TokenSources: LUCENE_BASE { //an object used to iterate across an array of tokens class StoredTokenStream:public CL_NS(analysis)::TokenStream Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/WeightedTerm.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/WeightedTerm.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/WeightedTerm.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -24,7 +24,7 @@ /** Lightweight class to hold term and a weight value used for scoring this term */ -class WeightedTerm:LUCENE_BASE +class CLUCENE_CONTRIBS_EXPORT WeightedTerm:LUCENE_BASE { private: float_t _weight; // multiplier Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -13,7 +13,7 @@ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in * {@link EnglishStemmer} is named "English". */ -class SnowballAnalyzer: public Analyzer { +class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { const TCHAR* language; CLTCSetList* stopSet; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -26,7 +26,7 @@ CL_NS_DEF(util) -class GZipCompressInputStream : public InputStream{ +class CLUCENE_CONTRIBS_EXPORT GZipCompressInputStream : public InputStream{ private: class Internal; Internal* internal; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp 2009-04-16 16:46:14 UTC (rev 2992) @@ -52,7 +52,7 @@ break; case Z_STREAM_END: if (zstream->avail_in) { - input->reset(input->getPosition()-zstream->avail_in); + input->reset(input->position()-zstream->avail_in); } // we are finished decompressing, // (but this stream is not yet finished) @@ -72,7 +72,7 @@ const signed char* begin; int32_t nread; - int64_t pos = input->getPosition(); + int64_t pos = input->position(); nread = input->read(begin, 2, 2); input->reset(pos); if (nread != 2) { Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h 2009-04-16 16:46:14 UTC (rev 2992) @@ -13,7 +13,7 @@ CL_NS_DEF(util) -class GZipInputStream : public CL_NS(util)::BufferedInputStream { +class CLUCENE_CONTRIBS_EXPORT GZipInputStream : public CL_NS(util)::BufferedInputStream { public: enum ZipFormat { ZLIBFORMAT, GZIPFORMAT, ZIPFORMAT}; private: Modified: branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt 2009-04-16 16:25:40 UTC (rev 2991) +++ branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt 2009-04-16 16:46:14 UTC (rev 2992) @@ -73,6 +73,9 @@ ) SET ( clucene_contrib_extras clucene-core clucene-shared ) +#find our headers +file(GLOB_RECURSE HEADERS ${clucene-contribs-lib_SOURCE_DIR}/*.h) + #add extra capabilities find_package(ZLIB) IF ( NOT ZLIB_FOUND ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-04-16 16:25:51
|
Revision: 2991 http://clucene.svn.sourceforge.net/clucene/?rev=2991&view=rev Author: ustramooner Date: 2009-04-16 16:25:40 +0000 (Thu, 16 Apr 2009) Log Message: ----------- various fixes for Itamar's code. rollback Token code, potential problems there... fix some deprecation stuff Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h branches/lucene2_3_2/src/core/CLucene/util/Equators.cpp branches/lucene2_3_2/src/core/CLucene/util/Equators.h branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-16 16:25:40 UTC (rev 2991) @@ -67,8 +67,7 @@ Token::~Token(){ #ifndef LUCENE_TOKEN_WORD_LENGTH - //free(_termText); - delete[] _termText; + free(_termText); #endif _CLLDELETE(payload); } @@ -148,16 +147,13 @@ if(bufferTextLen>=size) return; #ifndef LUCENE_TOKEN_WORD_LENGTH - if ( _termText == NULL ) - //_termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); - _termText = new TCHAR[size * sizeof(TCHAR)]; - else{ - // ISH: Use new/delete[] instead of realloc, since a copy is being made anyway and there's no - // need to preserve the current content - //_termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); - TCHAR* __termText = new TCHAR[size * sizeof(TCHAR)]; - delete[] _termText; - _termText = __termText; + if ( _termText == NULL ){ + _termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); + _termText[0] = NULL; + }else{ + //use realloc. growBuffer is public, therefore could be called + //without a subsequent call to overwriting the memory + _termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); } bufferTextLen = size; #else Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp 2009-04-16 16:25:40 UTC (rev 2991) @@ -223,4 +223,4 @@ return _CLNEW SkipBuffer(*this); } -CL_NS_END \ No newline at end of file +CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h 2009-04-16 16:25:40 UTC (rev 2991) @@ -204,8 +204,8 @@ CL_NS(store)::IndexInput* tvf; int64_t _size; // TODO: size_t ? - // The docID offset where our docs begin in the index - // file. This will be 0 if we have our own private file. + // The docID offset where our docs begin in the index + // file. This will be 0 if we have our own private file. int32_t docStoreOffset; int32_t tvdFormat; @@ -231,12 +231,12 @@ public: void get(const int32_t docNum, const TCHAR* field, TermVectorMapper* mapper); - /** - * Retrieve the term vector for the given document and field - * @param docNum The document number to retrieve the vector for - * @param field The field within the document to retrieve - * @return The TermFreqVector for the document and field or null if there is no termVector for this field. - * @throws IOException if there is an error reading the term vector files + /** + * Retrieve the term vector for the given document and field + * @param docNum The document number to retrieve the vector for + * @param field The field within the document to retrieve + * @return The TermFreqVector for the document and field or null if there is no termVector for this field. + * @throws IOException if there is an error reading the term vector files */ TermFreqVector* get(const int32_t docNum, const TCHAR* field); @@ -253,19 +253,19 @@ void get(const int32_t docNumber, TermVectorMapper* mapper); private: - CL_NS(util)::ObjectArray<SegmentTermVector>* readTermVectors(const int32_t docNum, - const TCHAR** fields, const int64_t* tvfPointers, const int32_t len); - - void readTermVectors(const TCHAR** fields, const int64_t* tvfPointers, - const int32_t len, TermVectorMapper* mapper); + CL_NS(util)::ObjectArray<SegmentTermVector>* readTermVectors(const int32_t docNum, + const TCHAR** fields, const int64_t* tvfPointers, const int32_t len); - /** - * - * @param field The field to read in - * @param tvfPointer The pointer within the tvf file where we should start reading - * @param mapper The mapper used to map the TermVector - * @return The TermVector located at that position - * @throws IOException + void readTermVectors(const TCHAR** fields, const int64_t* tvfPointers, + const int32_t len, TermVectorMapper* mapper); + + /** + * + * @param field The field to read in + * @param tvfPointer The pointer within the tvf file where we should start reading + * @param mapper The mapper used to map the TermVector + * @return The TermVector located at that position + * @throws IOException */ void readTermVector(const TCHAR* field, const int64_t tvfPointer, TermVectorMapper* mapper); @@ -429,4 +429,4 @@ }; CL_NS_END -#endif \ No newline at end of file +#endif Modified: branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp 2009-04-16 16:25:40 UTC (rev 2991) @@ -183,7 +183,7 @@ positions->push_back(position); } - void PhraseQuery::getPositions(Array<int32_t>& result) const{ + void PhraseQuery::getPositions(ValueArray<int32_t>& result) const{ result.length = positions->size(); result.values = _CL_NEWARRAY(int32_t,result.length); for(size_t i = 0; i < result.length; i++){ @@ -353,7 +353,7 @@ Scorer* ret = NULL; - Array<int32_t> positions; + ValueArray<int32_t> positions; _this->getPositions(positions); int32_t slop = _this->getSlop(); if ( slop != 0) Modified: branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h 2009-04-16 16:25:40 UTC (rev 2991) @@ -102,7 +102,7 @@ /** * Returns the relative positions of terms in this phrase. */ - void getPositions(CL_NS(util)::Array<int32_t>& result) const; + void getPositions(CL_NS(util)::ValueArray<int32_t>& result) const; const TCHAR* getFieldName() const{ return field; } /** Prints a user-readable version of this query. */ Modified: branches/lucene2_3_2/src/core/CLucene/util/Equators.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Equators.cpp 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/util/Equators.cpp 2009-04-16 16:25:40 UTC (rev 2991) @@ -89,7 +89,23 @@ size_t Compare::Char::operator()( const char* val1) const{ return CL_NS(util)::Misc::ahashCode(val1); } +const char* Compare::Char::getValue() const{ return s; } +Compare::Char::Char(){ + s=NULL; +} + Compare::Char::Char(const char* str){ + this->s = str; +} +int32_t Compare::Char::compareTo(void* o){ + try{ + Char* os = (Char*)o; + return strcmp(s,os->s); + }catch(...){ + _CLTHROWA(CL_ERR_Runtime,"Couldnt compare types"); + } +} + #ifdef _UCS2 bool Compare::WChar::operator()( const wchar_t* val1, const wchar_t* val2 ) const{ if ( val1==val2) @@ -100,17 +116,16 @@ size_t Compare::WChar::operator()( const wchar_t* val1) const{ return CL_NS(util)::Misc::whashCode(val1); } -#endif -const TCHAR* Compare::TChar::getValue() const{ return s; } +const wchar_t* Compare::WChar::getValue() const{ return s; } -Compare::TChar::TChar(){ +Compare::WChar::WChar(){ s=NULL; } - Compare::TChar::TChar(const TCHAR* str){ + Compare::WChar::WChar(const wchar_t* str){ this->s = str; } -int32_t Compare::TChar::compareTo(void* o){ +int32_t Compare::WChar::compareTo(void* o){ try{ TChar* os = (TChar*)o; return _tcscmp(s,os->s); @@ -120,14 +135,7 @@ } -bool Compare::TChar::operator()( const TCHAR* val1, const TCHAR* val2 ) const{ - if ( val1==val2) - return false; - bool ret = (_tcscmp( val1,val2 ) < 0); - return ret; -} -size_t Compare::TChar::operator()( const TCHAR* val1) const{ - return CL_NS(util)::Misc::thashCode(val1); -} +#endif + CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/util/Equators.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Equators.h 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/core/CLucene/util/Equators.h 2009-04-16 16:25:40 UTC (rev 2991) @@ -107,34 +107,37 @@ }; - class CLUCENE_INLINE_EXPORT Char: public _base //<char*> + class CLUCENE_INLINE_EXPORT Char: public _base, public Comparable //<char*> { + const char* s; public: + const char* getValue() const; + Char(); + Char(const char* str); + int32_t compareTo(void* o); + bool operator()( const char* val1, const char* val2 ) const; size_t operator()( const char* val1) const; }; #ifdef _UCS2 - class CLUCENE_INLINE_EXPORT WChar: public _base //<wchar_t*> + class CLUCENE_INLINE_EXPORT WChar: public _base, public Comparable //<wchar_t*> { + const wchar_t* s; public: + const wchar_t* getValue() const; + WChar(); + WChar(const wchar_t* str); + int32_t compareTo(void* o); + bool operator()( const wchar_t* val1, const wchar_t* val2 ) const; size_t operator()( const wchar_t* val1) const; }; + typedef WChar TChar; +#else + typedef Char TChar; #endif - class CLUCENE_INLINE_EXPORT TChar: public _base, public Comparable{ - const TCHAR* s; - public: - const TCHAR* getValue() const; - TChar(); - TChar(const TCHAR* str); - int32_t compareTo(void* o); - bool operator()( const TCHAR* val1, const TCHAR* val2 ) const; - size_t operator()( const TCHAR* val1) const; - }; - - template<typename _cl> class CLUCENE_INLINE_EXPORT Void:public _base //<const void*,const void*,bool> { Modified: branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2009-04-13 05:33:02 UTC (rev 2990) +++ branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2009-04-16 16:25:40 UTC (rev 2991) @@ -36,7 +36,7 @@ { private: struct Internal; - Internal* internal; + Internal* _internal; public: mutex_thread(const mutex_thread& clone); mutex_thread(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: Ben v. K. <bva...@gm...> - 2009-04-16 15:15:42
|
I've reverted this change. growBuffer is public, therefore can be called without a subsequent call to overwrite the memory. ben 2009/4/12 synhershko <syn...@us...>: > Revision: 2987 > http://clucene.svn.sourceforge.net/clucene/?rev=2987&view=rev > Author: synhershko > Date: 2009-04-12 17:12:31 +0000 (Sun, 12 Apr 2009) > > Log Message: > ----------- > * Fixes memory leak in StringBuffer which was introduced in revision 2948 > * Tweaks Token::growBuffer by using new/delete[] instead of realloc since a memory copy is being performed right after it anyway > > Revision Links: > -------------- > http://clucene.svn.sourceforge.net/clucene/?rev=2948&view=rev > > Modified Paths: > -------------- > branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp > branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp > > Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp > =================================================================== > --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-12 13:42:34 UTC (rev 2986) > +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-12 17:12:31 UTC (rev 2987) > @@ -67,7 +67,8 @@ > > Token::~Token(){ > #ifndef LUCENE_TOKEN_WORD_LENGTH > - free(_termText); > + //free(_termText); > + delete[] _termText; > #endif > _CLLDELETE(payload); > } > @@ -148,9 +149,16 @@ > return; > #ifndef LUCENE_TOKEN_WORD_LENGTH > if ( _termText == NULL ) > - _termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); > - else > - _termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); > + //_termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); > + _termText = new TCHAR[size * sizeof(TCHAR)]; > + else{ > + // ISH: Use new/delete[] instead of realloc, since a copy is being made anyway and there's no > + // need to preserve the current content > + //_termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); > + TCHAR* __termText = new TCHAR[size * sizeof(TCHAR)]; > + delete[] _termText; > + _termText = __termText; > + } > bufferTextLen = size; > #else > _CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer"); > > Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp > =================================================================== > --- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-04-12 13:42:34 UTC (rev 2986) > +++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-04-12 17:12:31 UTC (rev 2987) > @@ -40,7 +40,7 @@ > len = 0; > //Allocate a buffer of length bufferLength > buffer = _CL_NEWARRAY(TCHAR,bufferLength); > - bufferOwner = !consumeBuffer; > + bufferOwner = consumeBuffer; > } > > StringBuffer::StringBuffer(const TCHAR* value){ > > > This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. > > ------------------------------------------------------------------------------ > This SF.net email is sponsored by: > High Quality Requirements in a Collaborative Environment. > Download a free trial of Rational Requirements Composer Now! > http://p.sf.net/sfu/www-ibm-com > _______________________________________________ > Clucene-cvs mailing list > Clu...@li... > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > |
From: <syn...@us...> - 2009-04-13 05:33:06
|
Revision: 2990 http://clucene.svn.sourceforge.net/clucene/?rev=2990&view=rev Author: synhershko Date: 2009-04-13 05:33:02 +0000 (Mon, 13 Apr 2009) Log Message: ----------- Some code cleanup Modified Paths: -------------- branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp Modified: branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2009-04-12 22:19:07 UTC (rev 2989) +++ branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2009-04-13 05:33:02 UTC (rev 2990) @@ -57,7 +57,7 @@ { private: struct Internal; - Internal* internal; + Internal* _internal; public: mutex_thread(const mutex_thread& clone); mutex_thread(); Modified: branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp 2009-04-12 22:19:07 UTC (rev 2989) +++ branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp 2009-04-13 05:33:02 UTC (rev 2990) @@ -29,30 +29,30 @@ }; mutex_thread::mutex_thread(const mutex_thread& clone): - internal(new Internal) + _internal(new Internal) { - InitializeCriticalSection(&internal->mtx); + InitializeCriticalSection(&_internal->mtx); } mutex_thread::mutex_thread(): - internal(new Internal) + _internal(new Internal) { - InitializeCriticalSection(&internal->mtx); + InitializeCriticalSection(&_internal->mtx); } mutex_thread::~mutex_thread() { - DeleteCriticalSection(&internal->mtx); - delete internal; + DeleteCriticalSection(&_internal->mtx); + delete _internal; } void mutex_thread::lock() { - EnterCriticalSection(&internal->mtx); + EnterCriticalSection(&_internal->mtx); } void mutex_thread::unlock() { - LeaveCriticalSection(&internal->mtx); + LeaveCriticalSection(&_internal->mtx); } _LUCENE_THREADID_TYPE mutex_thread::_GetCurrentThreadId(){ @@ -95,23 +95,23 @@ }; mutex_thread::mutex_thread(const mutex_thread& clone): - internal(new Internal) + _internal(new Internal) { #ifdef _CL_HAVE_PTHREAD_MUTEX_RECURSIVE - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, &mutex_thread_attr), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, &mutex_thread_attr), "mutex_thread(clone) constructor failed") #else #if defined(__hpux) && defined(_DECTHREADS_) - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, pthread_mutexattr_default), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, pthread_mutexattr_default), "mutex_thread(clone) constructor failed") #else - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, 0), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, 0), "mutex_thread(clone) constructor failed") #endif - internal->lockCount=0; - internal->lockOwner=0; + _internal->lockCount=0; + _internal->lockOwner=0; #endif } mutex_thread::mutex_thread(): - internal(new Internal) + _internal(new Internal) { #ifdef _CL_HAVE_PTHREAD_MUTEX_RECURSIVE @@ -120,22 +120,22 @@ pthread_mutexattr_settype(&mutex_thread_attr, PTHREAD_MUTEX_RECURSIVE); mutex_pthread_attr_initd = true; } - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, &mutex_thread_attr), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, &mutex_thread_attr), "mutex_thread(clone) constructor failed") #else #if defined(__hpux) && defined(_DECTHREADS_) - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, pthread_mutexattr_default), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, pthread_mutexattr_default), "mutex_thread(clone) constructor failed") #else - _CLPTHREAD_CHECK(pthread_mutex_init(&internal->mtx, 0), "mutex_thread(clone) constructor failed") + _CLPTHREAD_CHECK(pthread_mutex_init(&_internal->mtx, 0), "mutex_thread(clone) constructor failed") #endif - internal->lockCount=0; - internal->lockOwner=0; + _internal->lockCount=0; + _internal->lockOwner=0; #endif } mutex_thread::~mutex_thread() { - _CLPTHREAD_CHECK(pthread_mutex_destroy(&internal->mtx), "~mutex_thread destructor failed") - delete internal; + _CLPTHREAD_CHECK(pthread_mutex_destroy(&_internal->mtx), "~mutex_thread destructor failed") + delete _internal; } _LUCENE_THREADID_TYPE mutex_thread::_GetCurrentThreadId(){ @@ -155,29 +155,29 @@ { #ifndef _CL_HAVE_PTHREAD_MUTEX_RECURSIVE pthread_t currentThread = pthread_self(); - if( pthread_equal( internal->lockOwner, currentThread ) ) { - ++internal->lockCount; + if( pthread_equal( _internal->lockOwner, currentThread ) ) { + ++_internal->lockCount; } else { - _CLPTHREAD_CHECK(pthread_mutex_lock(&internal->mtx), "mutex_thread::lock") - internal->lockOwner = currentThread; - internal->lockCount = 1; + _CLPTHREAD_CHECK(pthread_mutex_lock(&_internal->mtx), "mutex_thread::lock") + _internal->lockOwner = currentThread; + _internal->lockCount = 1; } #else - _CLPTHREAD_CHECK(pthread_mutex_lock(&internal->mtx), "mutex_thread::lock") + _CLPTHREAD_CHECK(pthread_mutex_lock(&_internal->mtx), "mutex_thread::lock") #endif } void mutex_thread::unlock() { #ifndef _CL_HAVE_PTHREAD_MUTEX_RECURSIVE - --internal->lockCount; - if( internal->lockCount == 0 ) + --_internal->lockCount; + if( _internal->lockCount == 0 ) { - internal->lockOwner = 0; - _CLPTHREAD_CHECK(pthread_mutex_unlock(&internal->mtx), "mutex_thread::unlock") + _internal->lockOwner = 0; + _CLPTHREAD_CHECK(pthread_mutex_unlock(&_internal->mtx), "mutex_thread::unlock") } #else - _CLPTHREAD_CHECK(pthread_mutex_unlock(&internal->mtx), "mutex_thread::unlock") + _CLPTHREAD_CHECK(pthread_mutex_unlock(&_internal->mtx), "mutex_thread::unlock") #endif } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-12 22:19:30
|
Revision: 2989 http://clucene.svn.sourceforge.net/clucene/?rev=2989&view=rev Author: synhershko Date: 2009-04-12 22:19:07 +0000 (Sun, 12 Apr 2009) Log Message: ----------- Fixes many memory leaks resulted by recent changes. There're still several more, will be eliminated soon Also adds various minor tweaks Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/test/search/TestTermVector.cpp Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -10,7 +10,6 @@ #include "../AnalysisHeader.h" #include "../Analyzers.h" #include "StandardTokenizerConstants.h" -#include "CLucene/util/StringBuffer.h" CL_NS_USE(analysis) CL_NS_USE(util) Modified: branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -13,7 +13,6 @@ #include "NumberTools.h" #include "CLucene/util/Misc.h" -#include "CLucene/util/StringBuffer.h" CL_NS_DEF(document) Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -35,38 +35,28 @@ /*Posting*/ -DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset):offsets(NULL) +DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset) { -//Func - Constructor -//Pre - t contains a valid reference to a Term -//Post - Instance has been created freq = 1; term = _CL_POINTER(t); - this->positions = _CLNEW ValueArray<int32_t>(1); - this->positions->values[0] = position; - //positions.values = (int32_t*)malloc(sizeof(int32_t)); - //positions.values[0] = position; - //positions.length = 1; + positions.values = (int32_t*)malloc(sizeof(int32_t)); + positions.values[0] = position; + positions.length = 1; if ( offset != NULL ){ - this->offsets = _CLNEW ObjectArray<TermVectorOffsetInfo>(1); - this->offsets->values[0] = offset; - //this->offsets.values = (TermVectorOffsetInfo**)malloc(sizeof(TermVectorOffsetInfo)); - //this->offsets.values[0] = offset; - //this->offsets.length = 1; + this->offsets.values = (TermVectorOffsetInfo**)malloc(sizeof(TermVectorOffsetInfo*)); + this->offsets.values[0] = offset; + this->offsets.length = 1; } } DocumentWriter::Posting::~Posting(){ -//Func - Destructor -//Pre - true -//Post - The instance has been destroyed - - _CLDELETE_LARRAY(this->positions->values); - //_CLLDELETE(this->positions); - //_CLLDELETE(this->offsets); - if ( this->offsets != NULL ) - _CLDELETE_LARRAY(this->offsets->values); + free(this->positions.values); + this->positions.values = NULL; + if (this->offsets.values != NULL){ + free(this->offsets.values); + this->offsets.values=NULL; + } _CLDECDELETE(this->term); } @@ -82,18 +72,18 @@ postingTable(_CLNEW PostingTableType), fieldLengths(NULL), fieldPositions(NULL), + fieldOffsets(NULL), fieldBoosts(NULL), - termBuffer(_CLNEW Term){ + termBuffer(_CLNEW Term) +{ //Pre - d contains a valid reference to a Directory // d contains a valid reference to a Analyzer // mfl > 0 and contains the maximum field length //Post - Instance has been created -CND_PRECONDITION(((mfl > 0) || (mfl == IndexWriter::FIELD_TRUNC_POLICY__WARN)), - "mfl is 0 or smaller than IndexWriter::FIELD_TRUNC_POLICY__WARN") + CND_PRECONDITION(((mfl > 0) || (mfl == IndexWriter::FIELD_TRUNC_POLICY__WARN)), + "mfl is 0 or smaller than IndexWriter::FIELD_TRUNC_POLICY__WARN") - fieldInfos = NULL; - fieldLengths = NULL; } DocumentWriter::DocumentWriter(CL_NS(store)::Directory* d, CL_NS(analysis)::Analyzer* a, IndexWriter* writer): @@ -106,19 +96,17 @@ postingTable(_CLNEW PostingTableType), fieldLengths(NULL), fieldPositions(NULL), + fieldOffsets(NULL), fieldBoosts(NULL), - termBuffer(_CLNEW Term){ + termBuffer(_CLNEW Term) +{ //Pre - d contains a valid reference to a Directory // d contains a valid reference to a Analyzer // mfl > 0 and contains the maximum field length //Post - Instance has been created -CND_PRECONDITION(((maxFieldLength > 0) || (maxFieldLength == IndexWriter::FIELD_TRUNC_POLICY__WARN)), - "mfl is 0 or smaller than IndexWriter::FIELD_TRUNC_POLICY__WARN") - - fieldInfos = NULL; - fieldLengths = NULL; - + CND_PRECONDITION(((maxFieldLength > 0) || (maxFieldLength == IndexWriter::FIELD_TRUNC_POLICY__WARN)), + "mfl is 0 or smaller than IndexWriter::FIELD_TRUNC_POLICY__WARN") } DocumentWriter::~DocumentWriter(){ @@ -171,7 +159,9 @@ fieldLengths = _CL_NEWARRAY(int32_t,size); // init fieldLengths fieldPositions = _CL_NEWARRAY(int32_t,size); // init fieldPositions fieldOffsets = _CL_NEWARRAY(int32_t,size); // init fieldOffsets + memset(fieldLengths, 0, sizeof(int32_t) * size); memset(fieldPositions, 0, sizeof(int32_t) * size); + memset(fieldOffsets, 0, sizeof(int32_t) * size); //initialise fieldBoost array with default boost int32_t fbl = fieldInfos->size(); @@ -217,19 +207,19 @@ _CLDELETE_ARRAY( postings ); } -void DocumentWriter::sortPostingTable(Posting**& array, int32_t& arraySize) { +void DocumentWriter::sortPostingTable(Posting**& _array, int32_t& arraySize) { // copy postingTable into an array arraySize = postingTable->size(); - array = _CL_NEWARRAY(Posting*,arraySize); + _array = _CL_NEWARRAY(Posting*,arraySize); PostingTableType::iterator postings = postingTable->begin(); int32_t i=0; while ( postings != postingTable->end() ){ - array[i] = (Posting*)postings->second; + _array[i] = (Posting*)postings->second; postings++; i++; } // sort the array - quickSort(array, 0, i - 1); + quickSort(_array, 0, i - 1); } @@ -369,7 +359,7 @@ } _CLFINALLY ( _CLDELETE(fields); ); -} // Document:;invertDocument +} // Document::invertDocument void DocumentWriter::addPosition(const TCHAR* field, @@ -382,19 +372,19 @@ Posting* ti = postingTable->get(termBuffer); if (ti != NULL) { // word seen before int32_t freq = ti->freq; - if (ti->positions->length == freq) { - // positions array is full, realloc its size - ti->positions->length = freq*2; - ti->positions->values = (int32_t*)realloc(ti->positions->values, ti->positions->length * sizeof(int32_t)); + if (ti->positions.length == freq) { + // positions array is full, realloc its size + ti->positions.length = freq*2; + ti->positions.values = (int32_t*)realloc(ti->positions.values, ti->positions.length * sizeof(int32_t)); } - ti->positions->values[freq] = position; // add new position + ti->positions.values[freq] = position; // add new position if (offset != NULL) { - if (ti->offsets->length == freq){ - ti->offsets->length = freq*2; - ti->offsets->values = (TermVectorOffsetInfo**)realloc(ti->offsets->values, ti->offsets->length * sizeof(TermVectorOffsetInfo)); + if (ti->offsets.length == freq){ + ti->offsets.length = freq*2; + ti->offsets.values = (TermVectorOffsetInfo**)realloc(ti->offsets.values, ti->offsets.length * sizeof(TermVectorOffsetInfo*)); } - ti->offsets->values[freq] = offset; + ti->offsets.values[freq] = offset; } ti->freq = freq + 1; // update frequency @@ -496,8 +486,8 @@ int32_t lastPosition = 0; // write positions for (int32_t j = 0; j < postingFreq; ++j) { // use delta-encoding - prox->writeVInt(posting->positions->values[j] - lastPosition); - lastPosition = posting->positions->values[j]; + prox->writeVInt(posting->positions.values[j] - lastPosition); + lastPosition = posting->positions.values[j]; } // check to see if we switched to a new field @@ -519,7 +509,7 @@ } } if (termVectorWriter != NULL && termVectorWriter->isFieldOpen()) { - termVectorWriter->addTerm(posting->term->text(), postingFreq, posting->positions, posting->offsets); + termVectorWriter->addTerm(posting->term->text(), postingFreq, &posting->positions, &posting->offsets); } } if (termVectorWriter != NULL) Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -90,7 +90,7 @@ add(field->name(), field->isIndexed(), field->isTermVectorStored(), field->isStorePositionWithTermVector(), field->isStoreOffsetWithTermVector(), field->getOmitNorms()); } - _CLDELETE(fields); + _CLLDELETE(fields); } void FieldInfos::addIndexed(const TCHAR** names, const bool storeTermVectors, const bool storePositionWithTermVector, Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -107,7 +107,7 @@ if (field->isStored()) storedCount++; } - _CLDELETE(fields); + _CLLDELETE(fields); fieldsStream->writeVInt(storedCount); fields = doc->getFields(); Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -366,10 +366,6 @@ //return the enumeration return _termPositions; } - - bool IndexReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result){ - return this->getTermFreqVectors(docNumber, result); - } bool IndexReader::document(int32_t n, CL_NS(document)::Document* doc){ return document(n, *doc); Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-04-12 22:19:07 UTC (rev 2989) @@ -260,7 +260,7 @@ * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ - virtual bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result) =0; + virtual CL_NS(util)::ObjectArray<TermFreqVector>* getTermFreqVectors(int32_t docNumber) =0; /** * Return a term frequency vector for the specified document and field. The Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -103,9 +103,9 @@ _CLDELETE(internal); } -bool MultiReader::getTermFreqVectors(int32_t n, ObjectArray<TermFreqVector>& result){ +ObjectArray<TermFreqVector>* MultiReader::getTermFreqVectors(int32_t n){ int32_t i = readerIndex(n); // find segment num - return subReaders[i]->getTermFreqVectors(n - starts[i], result); // dispatch to segment + return subReaders[i]->getTermFreqVectors(n - starts[i]); // dispatch to segment } TermFreqVector* MultiReader::getTermFreqVector(int32_t n, const TCHAR* field){ Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h 2009-04-12 22:19:07 UTC (rev 2989) @@ -58,7 +58,7 @@ * in a given vectorized field. * If no such fields existed, the method returns null. */ - bool getTermFreqVectors(int32_t n, CL_NS(util)::ObjectArray<TermFreqVector>& result); + CL_NS(util)::ObjectArray<TermFreqVector>* getTermFreqVectors(int32_t n); TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field); Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -307,10 +307,11 @@ if (reader->isDeleted(docNum)) continue; - ObjectArray<TermFreqVector> tmp; - if ( reader->getTermFreqVectors(docNum, (ObjectArray<TermFreqVector>&)tmp) ) - termVectorsWriter->addAllDocVectors((ObjectArray<TermFreqVector>&)tmp); - tmp.deleteValues(); + ObjectArray<TermFreqVector>* tmp = reader->getTermFreqVectors(docNum); + if ( tmp != NULL ) + termVectorsWriter->addAllDocVectors(*tmp); + //tmp->deleteValues(); + _CLLDELETE(tmp); } } }_CLFINALLY( _CLDELETE(termVectorsWriter); ); Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -817,16 +817,15 @@ return termVectorsReader->get(docNumber, field); } - bool SegmentReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result) { + ObjectArray<TermFreqVector>* SegmentReader::getTermFreqVectors(int32_t docNumber) { if (termVectorsReaderOrig == NULL) - return false; + return NULL; TermVectorsReader* termVectorsReader = getTermVectorsReader(); if (termVectorsReader == NULL) - return false; + return NULL; - result = (*termVectorsReader->get(docNumber)); - return true; + return termVectorsReader->get(docNumber); } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -202,7 +202,9 @@ ParallelArrayTermVectorMapper* mapper = _CLNEW ParallelArrayTermVectorMapper(); get(docNum, field, (TermVectorMapper*)mapper); - return mapper->materializeVector(); + TermFreqVector* ret = mapper->materializeVector(); + _CLLDELETE(mapper); + return ret; } @@ -305,12 +307,13 @@ ObjectArray<SegmentTermVector>* TermVectorsReader::readTermVectors(const int32_t docNum, const TCHAR** fields, const int64_t* tvfPointers, const int32_t len){ ObjectArray<SegmentTermVector>* res = _CLNEW CL_NS(util)::ObjectArray<SegmentTermVector>(len); + ParallelArrayTermVectorMapper* mapper = _CLNEW ParallelArrayTermVectorMapper(); for (int32_t i = 0; i < len; i++) { - ParallelArrayTermVectorMapper* mapper = _CLNEW ParallelArrayTermVectorMapper(); mapper->setDocumentNumber(docNum); readTermVector(fields[i], tvfPointers[i], mapper); res->values[i] = static_cast<SegmentTermVector*>(mapper->materializeVector()); } + _CLLDELETE(mapper); return res; } @@ -490,7 +493,7 @@ { } ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper(){ - _CLDELETE_LCARRAY(field); + //_CLDELETE_LCARRAY(field); } void ParallelArrayTermVectorMapper::setExpectations(const TCHAR* _field, const int32_t numTerms, Modified: branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h 2009-04-12 22:19:07 UTC (rev 2989) @@ -32,8 +32,8 @@ public: Term* term; // the Term int32_t freq; // its frequency in doc - CL_NS(util)::ValueArray<int32_t>* positions; // positions it occurs at - CL_NS(util)::ObjectArray<TermVectorOffsetInfo>* offsets; + CL_NS(util)::ValueArray<int32_t> positions; // positions it occurs at + CL_NS(util)::ObjectArray<TermVectorOffsetInfo> offsets; Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset); ~Posting(); @@ -79,7 +79,7 @@ void addPosition(const TCHAR* field, const TCHAR* text, const int32_t position, TermVectorOffsetInfo* offset); - void sortPostingTable(Posting**& array, int32_t& arraySize); + void sortPostingTable(Posting**& _array, int32_t& arraySize); static void quickSort(Posting**& postings, const int32_t lo, const int32_t hi); Modified: branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-04-12 22:19:07 UTC (rev 2989) @@ -329,7 +329,8 @@ * If no such fields existed, the method returns null. * @throws IOException */ - bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result); + //bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result); + CL_NS(util)::ObjectArray<TermFreqVector>* SegmentReader::getTermFreqVectors(int32_t docNumber); private: //Open all norms files for all fields void openNorms(CL_NS(store)::Directory* cfsDir); Modified: branches/lucene2_3_2/src/test/search/TestTermVector.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestTermVector.cpp 2009-04-12 18:06:46 UTC (rev 2988) +++ branches/lucene2_3_2/src/test/search/TestTermVector.cpp 2009-04-12 22:19:07 UTC (rev 2989) @@ -6,8 +6,8 @@ ------------------------------------------------------------------------------*/ #include "test.h" -IndexSearcher* tv_searcher; -RAMDirectory* tv_directory; +IndexSearcher* tv_searcher = NULL; +RAMDirectory* tv_directory = NULL; void testTermPositionVectors(CuTest *tc) { CLUCENE_ASSERT(tv_searcher!=NULL); @@ -21,10 +21,11 @@ for (int32_t i = 0; i < hits->length(); i++) { - ObjectArray<TermFreqVector> vector; - CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), *(ObjectArray<TermFreqVector>*)(&vector))); - CLUCENE_ASSERT(vector.length== 1); - vector.deleteValues(); + ObjectArray<TermFreqVector>* vector = tv_searcher->getReader()->getTermFreqVectors(hits->id(i)); + CLUCENE_ASSERT(vector != NULL); + CLUCENE_ASSERT(vector->length== 1); + vector->deleteValues(); + _CLLDELETE(vector); } _CLDELETE(hits); @@ -48,10 +49,11 @@ for (int32_t i = 0; i < hits->length(); i++) { - ObjectArray<TermFreqVector> vector; - CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), (ObjectArray<TermFreqVector>&)vector)); - CLUCENE_ASSERT(vector.length == 1); - vector.deleteValues(); + ObjectArray<TermFreqVector>* vector = tv_searcher->getReader()->getTermFreqVectors(hits->id(i)); + CLUCENE_ASSERT(vector != NULL); + CLUCENE_ASSERT(vector->length == 1); + vector->deleteValues(); + _CLLDELETE(vector); } //test mem leaks with vectors @@ -70,11 +72,6 @@ } } -void testTVCleanup(CuTest *tc) { - _CLDELETE(tv_searcher); - tv_directory->close(); - _CLDELETE(tv_directory); -} void testTVSetup(CuTest *tc) { SimpleAnalyzer a; tv_directory = _CLNEW RAMDirectory(); @@ -82,7 +79,7 @@ writer.setUseCompoundFile(false); TCHAR buf[200]; - for (int32_t i = 0; i < 1000; i++) { //todo: was 1000 + for (int32_t i = 0; i < 1000; i++) { Document doc; English::IntToEnglish(i,buf,200); @@ -90,7 +87,7 @@ int mod2 = i % 2; int termVector = 0; if (mod2 == 0 && mod3 == 0) - termVector = Field::TERMVECTOR_WITH_POSITIONS; + termVector = Field::TERMVECTOR_WITH_POSITIONS_OFFSETS; else if (mod2 == 0) termVector = Field::TERMVECTOR_WITH_POSITIONS; else if (mod3 == 0) @@ -104,8 +101,12 @@ writer.close(); tv_searcher = _CLNEW IndexSearcher(tv_directory); } +void testTVCleanup(CuTest *tc) { + _CLDELETE(tv_searcher); + tv_directory->close(); + _CLDELETE(tv_directory); +} - void setupDoc(Document& doc, const TCHAR* text) { doc.add(*new Field(_T("field"), text, Field::STORE_YES | This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-12 18:06:51
|
Revision: 2988 http://clucene.svn.sourceforge.net/clucene/?rev=2988&view=rev Author: synhershko Date: 2009-04-12 18:06:46 +0000 (Sun, 12 Apr 2009) Log Message: ----------- signed/unsigned mismatch Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h Modified: branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp 2009-04-12 17:12:31 UTC (rev 2987) +++ branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp 2009-04-12 18:06:46 UTC (rev 2988) @@ -39,7 +39,7 @@ _i64tot(l, tmp, NUMBERTOOLS_RADIX); size_t len = _tcslen(tmp); _tcscpy(buf+(STR_SIZE-len),tmp); - for ( int32_t i=1;i<STR_SIZE-len;i++ ) + for ( size_t i=1;i<STR_SIZE-len;i++ ) buf[i] = (int)'0'; buf[STR_SIZE] = 0; Modified: branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h 2009-04-12 17:12:31 UTC (rev 2987) +++ branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h 2009-04-12 18:06:46 UTC (rev 2988) @@ -47,7 +47,7 @@ /** * The length of (all) strings returned by {@link #longToString} */ - LUCENE_STATIC_CONSTANT (int32_t, STR_SIZE = 14); + LUCENE_STATIC_CONSTANT (size_t, STR_SIZE = 14); /** * Converts a long to a String suitable for indexing. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-12 17:12:50
|
Revision: 2987 http://clucene.svn.sourceforge.net/clucene/?rev=2987&view=rev Author: synhershko Date: 2009-04-12 17:12:31 +0000 (Sun, 12 Apr 2009) Log Message: ----------- * Fixes memory leak in StringBuffer which was introduced in revision 2948 * Tweaks Token::growBuffer by using new/delete[] instead of realloc since a memory copy is being performed right after it anyway Revision Links: -------------- http://clucene.svn.sourceforge.net/clucene/?rev=2948&view=rev Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-12 13:42:34 UTC (rev 2986) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-04-12 17:12:31 UTC (rev 2987) @@ -67,7 +67,8 @@ Token::~Token(){ #ifndef LUCENE_TOKEN_WORD_LENGTH - free(_termText); + //free(_termText); + delete[] _termText; #endif _CLLDELETE(payload); } @@ -148,9 +149,16 @@ return; #ifndef LUCENE_TOKEN_WORD_LENGTH if ( _termText == NULL ) - _termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); - else - _termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); + //_termText = (TCHAR*)malloc( size * sizeof(TCHAR) ); + _termText = new TCHAR[size * sizeof(TCHAR)]; + else{ + // ISH: Use new/delete[] instead of realloc, since a copy is being made anyway and there's no + // need to preserve the current content + //_termText = (TCHAR*)realloc( _termText, size * sizeof(TCHAR) ); + TCHAR* __termText = new TCHAR[size * sizeof(TCHAR)]; + delete[] _termText; + _termText = __termText; + } bufferTextLen = size; #else _CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer"); Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-04-12 13:42:34 UTC (rev 2986) +++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-04-12 17:12:31 UTC (rev 2987) @@ -40,7 +40,7 @@ len = 0; //Allocate a buffer of length bufferLength buffer = _CL_NEWARRAY(TCHAR,bufferLength); - bufferOwner = !consumeBuffer; + bufferOwner = consumeBuffer; } StringBuffer::StringBuffer(const TCHAR* value){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-12 13:42:38
|
Revision: 2986 http://clucene.svn.sourceforge.net/clucene/?rev=2986&view=rev Author: synhershko Date: 2009-04-12 13:42:34 +0000 (Sun, 12 Apr 2009) Log Message: ----------- Fixes issues with previous commit Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h Modified: branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) +++ branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp 2009-04-12 13:42:34 UTC (rev 2986) @@ -19,9 +19,9 @@ } DefaultSkipListReader::~DefaultSkipListReader(){ - _CLLDELETE(freqPointer); - _CLLDELETE(proxPointer); - _CLLDELETE(payloadLength); + _CLDELETE_LARRAY(freqPointer); + _CLDELETE_LARRAY(proxPointer); + _CLDELETE_LARRAY(payloadLength); } void DefaultSkipListReader::init(const int64_t _skipPointer, const int64_t freqBasePointer, const int64_t proxBasePointer, const int32_t df, const bool storesPayloads) { @@ -48,14 +48,14 @@ } void DefaultSkipListReader::seekChild(const int32_t level) { - //super.seekChild(level); + MultiLevelSkipListReader::seekChild(level); freqPointer[level] = lastFreqPointer; proxPointer[level] = lastProxPointer; payloadLength[level] = lastPayloadLength; } void DefaultSkipListReader::setLastSkipData(const int32_t level) { - //super.setLastSkipData(level); + MultiLevelSkipListReader::setLastSkipData(level); lastFreqPointer = freqPointer[level]; lastProxPointer = proxPointer[level]; lastPayloadLength = payloadLength[level]; @@ -83,4 +83,4 @@ return delta; } -CL_NS_END \ No newline at end of file +CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp 2009-04-12 13:42:34 UTC (rev 2986) @@ -12,15 +12,14 @@ MultiLevelSkipListReader::MultiLevelSkipListReader(IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval): - numberOfLevelsToBuffer(1),skipStream(NULL),skipPointer(NULL),skipInterval(NULL), - numSkipped(NULL),skipDoc(_CL_NEWARRAY(int32_t,maxSkipLevels)),childPointer(NULL) + maxNumberOfSkipLevels(maxSkipLevels),numberOfLevelsToBuffer(1), + skipStream(_CL_NEWARRAY(IndexInput*,maxSkipLevels)), + skipPointer(_CL_NEWARRAY(int64_t,maxSkipLevels)), + skipInterval(_CL_NEWARRAY(int32_t,maxSkipLevels)), + numSkipped(_CL_NEWARRAY(int32_t,maxSkipLevels)), + skipDoc(_CL_NEWARRAY(int32_t,maxSkipLevels)), + childPointer(_CL_NEWARRAY(int64_t,maxSkipLevels)) { - this->skipStream = _CL_NEWARRAY(IndexInput*,maxSkipLevels); - this->skipPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); - this->childPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); - this->numSkipped = _CL_NEWARRAY(int32_t,maxSkipLevels); - this->maxNumberOfSkipLevels = maxSkipLevels; - this->skipInterval = _CL_NEWARRAY(int32_t,maxSkipLevels); this->skipStream[0] = _skipStream; this->inputIsBuffered = (strcmp(_skipStream->getObjectName(),"BufferedIndexInput") == 0); this->skipInterval[0] = _skipInterval; @@ -203,12 +202,12 @@ } void MultiLevelSkipListReader::SkipBuffer::readBytes(uint8_t* b, const int32_t len) { - memcpy(b,data+pos,len); + memcpy(b,data+pos,len*sizeof(uint8_t)); pos += len; } void MultiLevelSkipListReader::SkipBuffer::seek(const int64_t _pos) { - this->pos = (int32_t) (_pos - pointer); + this->pos = static_cast<int32_t>(_pos - pointer); } const char* MultiLevelSkipListReader::SkipBuffer::getObjectName(){ return "SkipBuffer"; } Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h 2009-04-12 13:21:07 UTC (rev 2985) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h 2009-04-12 13:42:34 UTC (rev 2986) @@ -118,6 +118,7 @@ uint8_t readByte(); + /* Make sure b is passed after the offset has been calculated into it, if necessary! */ void readBytes(uint8_t* b, const int32_t len); void seek(const int64_t _pos); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-12 13:21:14
|
Revision: 2985 http://clucene.svn.sourceforge.net/clucene/?rev=2985&view=rev Author: synhershko Date: 2009-04-12 13:21:07 +0000 (Sun, 12 Apr 2009) Log Message: ----------- Brings 2.3.2 support for TermVectorReader and supporting classes. Also: * Updates ObjectArray with a better coding interface and memory management * Makes several dependent classes to use ObjectArray/ValueArray instead of Array<> * Updates tests to comply with new Array.h changes Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/files_list.txt branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVector.h branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVectorWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/Terms.h branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h branches/lucene2_3_2/src/core/CLucene/index/_MultiReader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h branches/lucene2_3_2/src/core/CLucene/search/PhrasePositions.cpp branches/lucene2_3_2/src/core/CLucene/search/_PhrasePositions.h branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/test/search/TestTermVector.cpp Added Paths: ----------- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt =================================================================== (Binary files differ) Added: branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -0,0 +1,86 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ + +#include "CLucene/_ApiHeader.h" +#include "DefaultSkipListReader.h" + +CL_NS_DEF(index) + +DefaultSkipListReader::DefaultSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval) + : MultiLevelSkipListReader(_skipStream, maxSkipLevels, _skipInterval) +{ + freqPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); + proxPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); + payloadLength = _CL_NEWARRAY(int32_t,maxSkipLevels); +} + +DefaultSkipListReader::~DefaultSkipListReader(){ + _CLLDELETE(freqPointer); + _CLLDELETE(proxPointer); + _CLLDELETE(payloadLength); +} + +void DefaultSkipListReader::init(const int64_t _skipPointer, const int64_t freqBasePointer, const int64_t proxBasePointer, const int32_t df, const bool storesPayloads) { + MultiLevelSkipListReader::init(_skipPointer, df); + this->currentFieldStoresPayloads = storesPayloads; + lastFreqPointer = freqBasePointer; + lastProxPointer = proxBasePointer; + + for (int32_t j=0; j<numberOfSkipLevels; j++){ + freqPointer[j] = freqBasePointer; + proxPointer[j] = proxBasePointer; + payloadLength[j] = 0; + } +} + +int64_t DefaultSkipListReader::getFreqPointer() const { + return lastFreqPointer; +} +int64_t DefaultSkipListReader::getProxPointer() const { + return lastProxPointer; +} +int32_t DefaultSkipListReader::getPayloadLength() const { + return lastPayloadLength; +} + +void DefaultSkipListReader::seekChild(const int32_t level) { + //super.seekChild(level); + freqPointer[level] = lastFreqPointer; + proxPointer[level] = lastProxPointer; + payloadLength[level] = lastPayloadLength; +} + +void DefaultSkipListReader::setLastSkipData(const int32_t level) { + //super.setLastSkipData(level); + lastFreqPointer = freqPointer[level]; + lastProxPointer = proxPointer[level]; + lastPayloadLength = payloadLength[level]; +} + +int32_t DefaultSkipListReader::readSkipData(const int32_t level, CL_NS(store)::IndexInput* _skipStream) { + int32_t delta; + if (currentFieldStoresPayloads) { + // the current field stores payloads. + // if the doc delta is odd then we have + // to read the current payload length + // because it differs from the length of the + // previous payload + delta = _skipStream->readVInt(); + if ((delta & 1) != 0) { + payloadLength[level] = _skipStream->readVInt(); + } + delta = (int32_t)(((uint32_t)delta) >> (uint32_t)1); + } else { + delta = _skipStream->readVInt(); + } + freqPointer[level] += _skipStream->readVInt(); + proxPointer[level] += _skipStream->readVInt(); + + return delta; +} + +CL_NS_END \ No newline at end of file Added: branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h 2009-04-12 13:21:07 UTC (rev 2985) @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_index_DefaultSkipListReader_ +#define _lucene_index_DefaultSkipListReader_ + +#include "MultiLevelSkipListReader.h" + +CL_NS_DEF(index) + +/** + * Implements the skip list reader for the default posting list format + * that stores positions and payloads. + * + */ +class DefaultSkipListReader: public MultiLevelSkipListReader { +private: + bool currentFieldStoresPayloads; + int64_t* freqPointer; + int64_t* proxPointer; + int32_t* payloadLength; + + int64_t lastFreqPointer; + int64_t lastProxPointer; + int32_t lastPayloadLength; + +public: + DefaultSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval); + virtual ~DefaultSkipListReader(); + + void init(const int64_t _skipPointer, const int64_t freqBasePointer, const int64_t proxBasePointer, const int32_t df, const bool storesPayloads); + + /** Returns the freq pointer of the doc to which the last call of + * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ + int64_t getFreqPointer() const; + + /** Returns the prox pointer of the doc to which the last call of + * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ + int64_t getProxPointer() const; + + /** Returns the payload length of the payload stored just before + * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} + * has skipped. */ + int32_t getPayloadLength() const; + +protected: + void seekChild(const int32_t level); + + void setLastSkipData(const int32_t level); + + int32_t readSkipData(const int32_t level, CL_NS(store)::IndexInput* _skipStream); +}; +CL_NS_END +#endif Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -35,7 +35,7 @@ /*Posting*/ -DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset) +DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset):offsets(NULL) { //Func - Constructor //Pre - t contains a valid reference to a Term @@ -43,14 +43,18 @@ freq = 1; term = _CL_POINTER(t); - positions.values = (int32_t*)malloc(sizeof(int32_t)); - positions.values[0] = position; - positions.length = 1; + this->positions = _CLNEW ValueArray<int32_t>(1); + this->positions->values[0] = position; + //positions.values = (int32_t*)malloc(sizeof(int32_t)); + //positions.values[0] = position; + //positions.length = 1; if ( offset != NULL ){ - this->offsets.values = (TermVectorOffsetInfo*)malloc(sizeof(TermVectorOffsetInfo)); - this->offsets.values[0] = *offset; - this->offsets.length = 1; + this->offsets = _CLNEW ObjectArray<TermVectorOffsetInfo>(1); + this->offsets->values[0] = offset; + //this->offsets.values = (TermVectorOffsetInfo**)malloc(sizeof(TermVectorOffsetInfo)); + //this->offsets.values[0] = offset; + //this->offsets.length = 1; } } DocumentWriter::Posting::~Posting(){ @@ -58,9 +62,11 @@ //Pre - true //Post - The instance has been destroyed - free(this->positions.values); - if ( this->offsets.values != NULL ) - free(this->offsets.values); + _CLDELETE_LARRAY(this->positions->values); + //_CLLDELETE(this->positions); + //_CLLDELETE(this->offsets); + if ( this->offsets != NULL ) + _CLDELETE_LARRAY(this->offsets->values); _CLDECDELETE(this->term); } @@ -376,19 +382,19 @@ Posting* ti = postingTable->get(termBuffer); if (ti != NULL) { // word seen before int32_t freq = ti->freq; - if (ti->positions.length == freq) { + if (ti->positions->length == freq) { // positions array is full, realloc its size - ti->positions.length = freq*2; - ti->positions.values = (int32_t*)realloc(ti->positions.values, ti->positions.length * sizeof(int32_t)); + ti->positions->length = freq*2; + ti->positions->values = (int32_t*)realloc(ti->positions->values, ti->positions->length * sizeof(int32_t)); } - ti->positions.values[freq] = position; // add new position + ti->positions->values[freq] = position; // add new position if (offset != NULL) { - if (ti->offsets.length == freq){ - ti->offsets.length = freq*2; - ti->offsets.values = (TermVectorOffsetInfo*)realloc(ti->offsets.values, ti->offsets.length * sizeof(TermVectorOffsetInfo)); + if (ti->offsets->length == freq){ + ti->offsets->length = freq*2; + ti->offsets->values = (TermVectorOffsetInfo**)realloc(ti->offsets->values, ti->offsets->length * sizeof(TermVectorOffsetInfo)); } - ti->offsets[freq] = *offset; + ti->offsets->values[freq] = offset; } ti->freq = freq + 1; // update frequency @@ -490,8 +496,8 @@ int32_t lastPosition = 0; // write positions for (int32_t j = 0; j < postingFreq; ++j) { // use delta-encoding - prox->writeVInt(posting->positions.values[j] - lastPosition); - lastPosition = posting->positions.values[j]; + prox->writeVInt(posting->positions->values[j] - lastPosition); + lastPosition = posting->positions->values[j]; } // check to see if we switched to a new field @@ -513,7 +519,7 @@ } } if (termVectorWriter != NULL && termVectorWriter->isFieldOpen()) { - termVectorWriter->addTerm(posting->term->text(), postingFreq, &posting->positions, &posting->offsets); + termVectorWriter->addTerm(posting->term->text(), postingFreq, posting->positions, posting->offsets); } } if (termVectorWriter != NULL) Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -367,7 +367,7 @@ return _termPositions; } - bool IndexReader::getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result){ + bool IndexReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result){ return this->getTermFreqVectors(docNumber, result); } Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-04-12 13:21:07 UTC (rev 2985) @@ -260,7 +260,7 @@ * @throws IOException if index cannot be accessed * @see org.apache.lucene.document.Field.TermVector */ - virtual bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::Array<TermFreqVector*>& result) =0; + virtual bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result) =0; /** * Return a term frequency vector for the specified document and field. The Added: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -0,0 +1,227 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "CLucene/_ApiHeader.h" +#include "MultiLevelSkipListReader.h" + +CL_NS_USE(store) +CL_NS_DEF(index) + +MultiLevelSkipListReader::MultiLevelSkipListReader(IndexInput* _skipStream, const int32_t maxSkipLevels, + const int32_t _skipInterval): + numberOfLevelsToBuffer(1),skipStream(NULL),skipPointer(NULL),skipInterval(NULL), + numSkipped(NULL),skipDoc(_CL_NEWARRAY(int32_t,maxSkipLevels)),childPointer(NULL) +{ + this->skipStream = _CL_NEWARRAY(IndexInput*,maxSkipLevels); + this->skipPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); + this->childPointer = _CL_NEWARRAY(int64_t,maxSkipLevels); + this->numSkipped = _CL_NEWARRAY(int32_t,maxSkipLevels); + this->maxNumberOfSkipLevels = maxSkipLevels; + this->skipInterval = _CL_NEWARRAY(int32_t,maxSkipLevels); + this->skipStream[0] = _skipStream; + this->inputIsBuffered = (strcmp(_skipStream->getObjectName(),"BufferedIndexInput") == 0); + this->skipInterval[0] = _skipInterval; + for (int32_t i = 1; i < maxSkipLevels; i++) { + // cache skip intervals + this->skipInterval[i] = this->skipInterval[i - 1] * _skipInterval; + } + memset(skipDoc,0,maxSkipLevels*sizeof(int32_t)); // TODO: artificial init +} +MultiLevelSkipListReader::~MultiLevelSkipListReader(){ + close(); + _CLDELETE_LARRAY(skipStream); + _CLDELETE_LARRAY(skipPointer); + _CLDELETE_LARRAY(childPointer); + _CLDELETE_LARRAY(numSkipped); + _CLDELETE_LARRAY(skipInterval); + _CLDELETE_LARRAY(skipDoc); +} + +int32_t MultiLevelSkipListReader::getDoc() const { + return lastDoc; +} + +int32_t MultiLevelSkipListReader::skipTo(const int32_t target) { + if (!haveSkipped) { + // first time, load skip levels + loadSkipLevels(); + haveSkipped = true; + } + + // walk up the levels until highest level is found that has a skip + // for this target + int32_t level = 0; + while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) { + level++; + } + + while (level >= 0) { + if (target > skipDoc[level]) { + if (!loadNextSkip(level)) { + continue; + } + } else { + // no more skips on this level, go down one level + if (level > 0 && lastChildPointer > skipStream[level - 1]->getFilePointer()) { + seekChild(level - 1); + } + level--; + } + } + + return numSkipped[0] - skipInterval[0] - 1; +} + +bool MultiLevelSkipListReader::loadNextSkip(const int32_t level) { + // we have to skip, the target document is greater than the current + // skip list entry + setLastSkipData(level); + + numSkipped[level] += skipInterval[level]; + + if (numSkipped[level] > docCount) { + // this skip list is exhausted + skipDoc[level] = LUCENE_INT32_MAX_SHOULDBE; + if (numberOfSkipLevels > level) numberOfSkipLevels = level; + return false; + } + + // read next skip entry + skipDoc[level] += readSkipData(level, skipStream[level]); + + if (level != 0) { + // read the child pointer if we are not on the leaf level + childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; + } + return true; +} + +void MultiLevelSkipListReader::seekChild(const int32_t level) { + skipStream[level]->seek(lastChildPointer); + numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; + skipDoc[level] = lastDoc; + if (level > 0) { + childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; + } +} + +void MultiLevelSkipListReader::close() { + for (int32_t i = 1; i < maxNumberOfSkipLevels; i++) { + if (skipStream[i] != NULL) { + //skipStream[i]->close(); + _CLLDELETE(skipStream[i]); + } + } +} + +void MultiLevelSkipListReader::init(const int64_t _skipPointer, const int32_t df) { + this->skipPointer[0] = _skipPointer; + this->docCount = df; + for (int32_t j=0; j<numberOfSkipLevels; j++){ + skipDoc[j] = 0; + numSkipped[j] = 0; + childPointer[j] = 0; + } + + haveSkipped = false; + for (int32_t i = 1; i < numberOfSkipLevels; i++) { + _CLDELETE(skipStream[i]); + } +} + +void MultiLevelSkipListReader::loadSkipLevels() { + numberOfSkipLevels = (docCount == 0) ? 0 : (int32_t)floor(log((double)docCount) / log((double)skipInterval[0])); + if (numberOfSkipLevels > maxNumberOfSkipLevels) { + numberOfSkipLevels = maxNumberOfSkipLevels; + } + + skipStream[0]->seek(skipPointer[0]); + + int32_t toBuffer = numberOfLevelsToBuffer; + + for (int32_t i = numberOfSkipLevels - 1; i > 0; i--) { + // the length of the current level + int64_t length = skipStream[0]->readVLong(); + + // the start pointer of the current level + skipPointer[i] = skipStream[0]->getFilePointer(); + if (toBuffer > 0) { + // buffer this level + skipStream[i] = static_cast<IndexInput*>(_CLNEW SkipBuffer(skipStream[0], (int32_t) length)); + toBuffer--; + } else { + // clone this stream, it is already at the start of the current level + skipStream[i] = (IndexInput*) skipStream[0]->clone(); + if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) { + ((BufferedIndexInput*) skipStream[i])->setBufferSize((int32_t) length); + } + + // move base stream beyond the current level + skipStream[0]->seek(skipStream[0]->getFilePointer() + length); + } + } + + // use base stream for the lowest level + skipPointer[0] = skipStream[0]->getFilePointer(); +} + +void MultiLevelSkipListReader::setLastSkipData(const int32_t level) { + lastDoc = skipDoc[level]; + lastChildPointer = childPointer[level]; +} + +MultiLevelSkipListReader::SkipBuffer::SkipBuffer(IndexInput* input, const int32_t _length):pos(0) +{ + data = _CL_NEWARRAY(uint8_t,_length); + this->_datalength = _length; + pointer = input->getFilePointer(); + input->readBytes(data, _length); +} +MultiLevelSkipListReader::SkipBuffer::~SkipBuffer() +{ + _CLLDELETE(data); +} + +void MultiLevelSkipListReader::SkipBuffer::close() { + _CLDELETE(data); + _datalength=0; +} + +int64_t MultiLevelSkipListReader::SkipBuffer::getFilePointer() const { + return pointer + pos; +} + +int64_t MultiLevelSkipListReader::SkipBuffer::length() const { + return _datalength; +} + +uint8_t MultiLevelSkipListReader::SkipBuffer::readByte() { + return data[pos++]; +} + +void MultiLevelSkipListReader::SkipBuffer::readBytes(uint8_t* b, const int32_t len) { + memcpy(b,data+pos,len); + pos += len; +} + +void MultiLevelSkipListReader::SkipBuffer::seek(const int64_t _pos) { + this->pos = (int32_t) (_pos - pointer); +} + +const char* MultiLevelSkipListReader::SkipBuffer::getObjectName(){ return "SkipBuffer"; } +const char* MultiLevelSkipListReader::SkipBuffer::getDirectoryType() const{ return "SKIP"; } +MultiLevelSkipListReader::SkipBuffer::SkipBuffer(const SkipBuffer& other): IndexInput(other){ + data = _CL_NEWARRAY(uint8_t,other._datalength); + memcpy(data,other.data,other._datalength * sizeof(uint8_t)); + this->_datalength = other._datalength; + this->pointer = other.pointer; + this->pos = other.pos; +} +IndexInput* MultiLevelSkipListReader::SkipBuffer::clone() const{ + return _CLNEW SkipBuffer(*this); +} + +CL_NS_END \ No newline at end of file Added: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h (rev 0) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h 2009-04-12 13:21:07 UTC (rev 2985) @@ -0,0 +1,132 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef _lucene_index_MultiLevelSkipListReader_ +#define _lucene_index_MultiLevelSkipListReader_ + +#include "CLucene/store/IndexInput.h" +//#include "CLucene/util/Array.h" + +CL_NS_DEF(index) + +/** + * This abstract class reads skip lists with multiple levels. + * + * See {@link MultiLevelSkipListWriter} for the information about the encoding + * of the multi level skip lists. + * + * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} + * which defines the actual format of the skip data. + */ +class MultiLevelSkipListReader : LUCENE_BASE { +private: + // the maximum number of skip levels possible for this index + int32_t maxNumberOfSkipLevels; + +protected: + // number of levels in this skip list + int32_t numberOfSkipLevels; + +private: + // Expert: defines the number of top skip levels to buffer in memory. + // Reducing this number results in less memory usage, but possibly + // slower performance due to more random I/Os. + // Please notice that the space each level occupies is limited by + // the skipInterval. The top level can not contain more than + // skipLevel entries, the second top level can not contain more + // than skipLevel^2 entries and so forth. + int32_t numberOfLevelsToBuffer; + + int32_t docCount; + bool haveSkipped; + + CL_NS(store)::IndexInput** skipStream; // skipStream for each level + int64_t* skipPointer; // the start pointer of each skip level + int32_t* skipInterval; // skipInterval of each level + int32_t* numSkipped; // number of docs skipped per level + + int32_t* skipDoc; // doc id of current skip entry per level + int32_t lastDoc; // doc id of last read skip entry with docId <= target + int64_t* childPointer; // child pointer of current skip entry per level + int64_t lastChildPointer; // childPointer of last read skip entry with docId <= target + + bool inputIsBuffered; + +public: + MultiLevelSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval); + virtual ~MultiLevelSkipListReader(); + + /** Returns the id of the doc to which the last call of {@link #skipTo(int)} + * has skipped. */ + int32_t getDoc() const; + + /** Skips entries to the first beyond the current whose document number is + * greater than or equal to <i>target</i>. Returns the current doc count. + */ + int32_t skipTo(const int32_t target); + +private: + bool loadNextSkip(const int32_t level); + +protected: + /** Seeks the skip entry on the given level */ + virtual void seekChild(const int32_t level); + + void close(); + + /** initializes the reader */ + void init(const int64_t _skipPointer, const int32_t df); + +private: + /** Loads the skip levels */ + void loadSkipLevels(); + +protected: + /** + * Subclasses must implement the actual skip data encoding in this method. + * + * @param level the level skip data shall be read from + * @param skipStream the skip stream to read from + */ + virtual int32_t readSkipData(const int32_t level, CL_NS(store)::IndexInput* skipStream) = 0; + + /** Copies the values of the last read skip entry on this level */ + virtual void setLastSkipData(const int32_t level); + +protected: + /** used to buffer the top skip levels */ + class SkipBuffer : public CL_NS(store)::IndexInput { + private: + uint8_t* data; + int64_t pointer; + int32_t pos; + size_t _datalength; + + public: + SkipBuffer(CL_NS(store)::IndexInput* input, const int32_t length); + virtual ~SkipBuffer(); + + private: + void close(); + + int64_t getFilePointer() const; + + int64_t length() const; + + uint8_t readByte(); + + void readBytes(uint8_t* b, const int32_t len); + + void seek(const int64_t _pos); + + SkipBuffer(const SkipBuffer& other); + CL_NS(store)::IndexInput* clone() const; + const char* getObjectName(); + const char* getDirectoryType() const; + }; +}; +CL_NS_END +#endif Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -103,7 +103,7 @@ _CLDELETE(internal); } -bool MultiReader::getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result){ +bool MultiReader::getTermFreqVectors(int32_t n, ObjectArray<TermFreqVector>& result){ int32_t i = readerIndex(n); // find segment num return subReaders[i]->getTermFreqVectors(n - starts[i], result); // dispatch to segment } Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h 2009-04-12 13:21:07 UTC (rev 2985) @@ -58,7 +58,7 @@ * in a given vectorized field. * If no such fields existed, the method returns null. */ - bool getTermFreqVectors(int32_t n, CL_NS(util)::Array<TermFreqVector*>& result); + bool getTermFreqVectors(int32_t n, CL_NS(util)::ObjectArray<TermFreqVector>& result); TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field); Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -307,9 +307,9 @@ if (reader->isDeleted(docNum)) continue; - ObjectArray<TermFreqVector*> tmp; - if ( reader->getTermFreqVectors(docNum, (Array<TermFreqVector*>&)tmp) ) - termVectorsWriter->addAllDocVectors((Array<TermFreqVector*>&)tmp); + ObjectArray<TermFreqVector> tmp; + if ( reader->getTermFreqVectors(docNum, (ObjectArray<TermFreqVector>&)tmp) ) + termVectorsWriter->addAllDocVectors((ObjectArray<TermFreqVector>&)tmp); tmp.deleteValues(); } } Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -817,7 +817,7 @@ return termVectorsReader->get(docNumber, field); } - bool SegmentReader::getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result) { + bool SegmentReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result) { if (termVectorsReaderOrig == NULL) return false; @@ -825,7 +825,8 @@ if (termVectorsReader == NULL) return false; - return termVectorsReader->get(docNumber, result); + result = (*termVectorsReader->get(docNumber)); + return true; } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -13,40 +13,15 @@ CL_NS_DEF(index) - SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent){ - //Func - Constructor - //Pre - Paren != NULL - //Post - The instance has been created - - CND_PRECONDITION(_parent != NULL,"Parent is NULL"); - - parent = _parent; - deletedDocs = parent->deletedDocs; - - _doc = 0; - _freq = 0; - count = 0; - df = 0; - - skipInterval=0; - numSkips=0; - skipCount=0; - skipStream=NULL; - skipDoc=0; - freqPointer=0; - proxPointer=0; - skipPointer=0; - haveSkipped=false; - - freqStream = parent->freqStream->clone(); - skipInterval = parent->tis->getSkipInterval(); + SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent) : parent(_parent),freqStream(_parent->freqStream->clone()), + count(0),df(0),deletedDocs(_parent->deletedDocs),_doc(0),_freq(0),skipInterval(_parent->tis->getSkipInterval()), + maxSkipLevels(_parent->tis->getMaxSkipLevels()),skipListReader(NULL),freqBasePointer(0),proxBasePointer(0), + skipPointer(0),haveSkipped(false) + { + CND_CONDITION(_parent != NULL,"Parent is NULL"); } SegmentTermDocs::~SegmentTermDocs() { - //Func - Destructor - //Pre - true - //Post - The instance has been destroyed - close(); } @@ -56,52 +31,47 @@ void SegmentTermDocs::seek(Term* term) { TermInfo* ti = parent->tis->get(term); - seek(ti); + seek(ti, term); _CLDELETE(ti); } void SegmentTermDocs::seek(TermEnum* termEnum){ TermInfo* ti=NULL; + Term* term = NULL; // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if ( termEnum->getObjectName() == SegmentTermEnum::getClassName() && ((SegmentTermEnum*)termEnum)->fieldInfos == parent->fieldInfos ){ - ti = ((SegmentTermEnum*)termEnum)->getTermInfo(); - }else{ - ti = parent->tis->get(termEnum->term(false)); + SegmentTermEnum* segmentTermEnum = ((SegmentTermEnum*) termEnum); + term = segmentTermEnum->term(false); + ti = segmentTermEnum->getTermInfo(); + }else{ + term = termEnum->term(false); + ti = parent->tis->get(term); } - seek(ti); + seek(ti,term); _CLDELETE(ti); } - void SegmentTermDocs::seek(const TermInfo* ti) { - count = 0; - if (ti == NULL) { - df = 0; - } else { - df = ti->docFreq; - _doc = 0; - skipDoc = 0; - skipCount = 0; - numSkips = df / skipInterval; - freqPointer = ti->freqPointer; - proxPointer = ti->proxPointer; - skipPointer = freqPointer + ti->skipOffset; - freqStream->seek(freqPointer); - haveSkipped = false; - } + void SegmentTermDocs::seek(const TermInfo* ti,Term* term) { + count = 0; + FieldInfo* fi = parent->fieldInfos->fieldInfo(term->field()); + currentFieldStoresPayloads = (fi != NULL) ? fi->storePayloads : false; + if (ti == NULL) { + df = 0; + } else { // punt case + df = ti->docFreq; + _doc = 0; + freqBasePointer = ti->freqPointer; + proxBasePointer = ti->proxPointer; + skipPointer = freqBasePointer + ti->skipOffset; + freqStream->seek(freqBasePointer); + haveSkipped = false; + } } void SegmentTermDocs::close() { - - //Check if freqStream still exists - if (freqStream != NULL){ - freqStream->close(); //todo: items like these can probably be delete, because deleting the object also closes it...do everywhere - _CLDELETE( freqStream ); - } - if (skipStream != NULL){ - skipStream->close(); - _CLDELETE( skipStream ); - } + _CLDELETE( freqStream ); + _CLDELETE( skipListReader ); } int32_t SegmentTermDocs::doc()const { @@ -132,76 +102,51 @@ } int32_t SegmentTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length) { - int32_t i = 0; -//todo: one optimization would be to get the pointer buffer for ram or mmap dirs -//and iterate over them instead of using readByte() intensive functions. - while (i<length && count < df) { - uint32_t docCode = freqStream->readVInt(); - _doc += docCode >> 1; - if ((docCode & 1) != 0) // if low bit is set - _freq = 1; // _freq is one - else - _freq = freqStream->readVInt(); // else read _freq - count++; + int32_t i = 0; + //todo: one optimization would be to get the pointer buffer for ram or mmap dirs + //and iterate over them instead of using readByte() intensive functions. + while (i<length && count < df) { + // manually inlined call to next() for speed + uint32_t docCode = freqStream->readVInt(); + _doc += docCode >> 1; + if ((docCode & 1) != 0) // if low bit is set + _freq = 1; // _freq is one + else + _freq = freqStream->readVInt(); // else read _freq + count++; - if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) { - docs[i] = _doc; - freqs[i] = _freq; - i++; - } - } - return i; + if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) { + docs[i] = _doc; + freqs[i] = _freq; + i++; + } + } + return i; } bool SegmentTermDocs::skipTo(const int32_t target){ assert(count <= df ); if (df >= skipInterval) { // optimized case - if (skipStream == NULL) - skipStream = freqStream->clone(); // lazily clone + if (skipListReader == NULL) + skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), maxSkipLevels, skipInterval); // lazily clone - if (!haveSkipped) { // lazily seek skip stream - skipStream->seek(skipPointer); - haveSkipped = true; - } + if (!haveSkipped) { // lazily initialize skip stream + skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); + haveSkipped = true; + } - // scan skip data - int32_t lastSkipDoc = skipDoc; - int64_t lastFreqPointer = freqStream->getFilePointer(); - int64_t lastProxPointer = -1; - int32_t numSkipped = -1 - (count % skipInterval); + int32_t newCount = skipListReader->skipTo(target); + if (newCount > count) { + freqStream->seek(skipListReader->getFreqPointer()); + skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); + + _doc = skipListReader->getDoc(); + count = newCount; + } + } - while (target > skipDoc) { - lastSkipDoc = skipDoc; - lastFreqPointer = freqPointer; - lastProxPointer = proxPointer; - - if (skipDoc != 0 && skipDoc >= _doc) - numSkipped += skipInterval; - - if(skipCount >= numSkips) - break; - - skipDoc += skipStream->readVInt(); - freqPointer += skipStream->readVInt(); - proxPointer += skipStream->readVInt(); - - skipCount++; - } - - // if we found something to skip, then skip it - if (lastFreqPointer > freqStream->getFilePointer()) { - freqStream->seek(lastFreqPointer); - skipProx(lastProxPointer); - - _doc = lastSkipDoc; - count += numSkipped; - } - - } - // done skipping, now just scan - do { if (!next()) return false; Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -13,25 +13,13 @@ CL_NS_DEF(index) SegmentTermPositions::SegmentTermPositions(const SegmentReader* _parent): - SegmentTermDocs(_parent){ -//Func - Constructor -//Pre - Parent != NULL -//Post - The instance has been created - - CND_PRECONDITION(_parent != NULL, "Parent is NULL"); - - proxStream = _parent->proxStream->clone(); - - CND_CONDITION(proxStream != NULL,"proxStream is NULL"); - - position = 0; - proxCount = 0; + SegmentTermDocs(_parent), proxStream(NULL)// the proxStream will be cloned lazily when nextPosition() is called for the first time + ,lazySkipPointer(-1), lazySkipProxCount(0) +{ + CND_CONDITION(_parent != NULL, "Parent is NULL"); } SegmentTermPositions::~SegmentTermPositions() { -//Func - Destructor -//Pre - true -//Post - The intance has been closed close(); } @@ -42,43 +30,61 @@ return (TermPositions*) this; } -void SegmentTermPositions::seek(const TermInfo* ti) { - SegmentTermDocs::seek(ti); +void SegmentTermPositions::seek(const TermInfo* ti, Term* term) { + SegmentTermDocs::seek(ti, term); if (ti != NULL) - //lazySkipPointer = ti->proxPointer; - proxStream->seek(ti->proxPointer); + lazySkipPointer = ti->proxPointer; - //lazySkipDocCount = 0; + lazySkipProxCount = 0; proxCount = 0; + payloadLength = 0; + needToLoadPayload = false; } void SegmentTermPositions::close() { -//Func - Frees the resources -//Pre - true -//Post - The resources have been freed - SegmentTermDocs::close(); //Check if proxStream still exists if(proxStream){ - proxStream->close(); + proxStream->close(); _CLDELETE( proxStream ); } } int32_t SegmentTermPositions::nextPosition() { - /* DSR:CL_BUG: Should raise exception if proxCount == 0 at the + /* todo: DSR:CL_BUG: Should raise exception if proxCount == 0 at the ** beginning of this method, as in ** if (--proxCount == 0) throw ...; ** The JavaDocs for TermPositions.nextPosition declare this constraint, ** but CLucene doesn't enforce it. */ - //lazySkip(); + lazySkip(); proxCount--; - return position += proxStream->readVInt(); + return position += readDeltaPosition(); } +int32_t SegmentTermPositions::readDeltaPosition() { + int32_t delta = proxStream->readVInt(); + if (currentFieldStoresPayloads) { + // if the current field stores payloads then + // the position delta is shifted one bit to the left. + // if the LSB is set, then we have to read the current + // payload length + if ((delta & 1) != 0) { + payloadLength = proxStream->readVInt(); + } + delta = (int32_t)((uint32_t)delta >> (uint32_t)1); + needToLoadPayload = true; + } + return delta; +} + +void SegmentTermPositions::skippingDoc() { + lazySkipProxCount += _freq; +} + bool SegmentTermPositions::next() { - for (int32_t f = proxCount; f > 0; f--) // skip unread positions - proxStream->readVInt(); + // we remember to skip the remaining positions of the current + // document lazily + lazySkipProxCount += proxCount; if (SegmentTermDocs::next()) { // run super proxCount = _freq; // note frequency @@ -89,35 +95,78 @@ } int32_t SegmentTermPositions::read(int32_t* docs, int32_t* freqs, int32_t length) { - _CLTHROWA(CL_ERR_InvalidState,"TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); + _CLTHROWA(CL_ERR_UnsupportedOperation,"TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); } -void SegmentTermPositions::skippingDoc() { - for (int32_t f = _freq; f > 0; f--) // skip all positions - proxStream->readVInt(); -// lazySkipDocCount += _freq; +void SegmentTermPositions::skipProx(const int64_t proxPointer, const int32_t _payloadLength){ + // we save the pointer, we might have to skip there lazily + lazySkipPointer = proxPointer; + lazySkipProxCount = 0; + proxCount = 0; + this->payloadLength = _payloadLength; + needToLoadPayload = false; } -void SegmentTermPositions::skipProx(int64_t proxPointer){ - proxStream->seek(proxPointer); -// lazySkipPointer = proxPointer; -// lazySkipDocCount = 0; - proxCount = 0; +void SegmentTermPositions::skipPositions(int32_t n) { + for ( int32_t f = n; f > 0; f-- ) { // skip unread positions + readDeltaPosition(); + skipPayload(); + } } -void SegmentTermPositions::skipPositions(int32_t n) { - for ( int32_t f = n; f > 0; f-- ) - proxStream->readVInt(); +void SegmentTermPositions::skipPayload() { + if (needToLoadPayload && payloadLength > 0) { + proxStream->seek(proxStream->getFilePointer() + payloadLength); + } + needToLoadPayload = false; } void SegmentTermPositions::lazySkip() { - if ( lazySkipPointer != 0 ) { - proxStream->seek( lazySkipPointer ); - lazySkipPointer = 0; + if (proxStream == NULL) { + // clone lazily + proxStream = parent->proxStream->clone(); + } + + // we might have to skip the current payload + // if it was not read yet + skipPayload(); + + if (lazySkipPointer != -1) { + proxStream->seek(lazySkipPointer); + lazySkipPointer = -1; + } + + if (lazySkipProxCount != 0) { + skipPositions(lazySkipProxCount); + lazySkipProxCount = 0; + } +} + +int32_t SegmentTermPositions::getPayloadLength() const { return payloadLength; } + +uint8_t* SegmentTermPositions::getPayload(uint8_t* data, const int32_t offset) { + if (!needToLoadPayload) { + _CLTHROWA(CL_ERR_IO, "Payload cannot be loaded more than once for the same term position."); } - if ( lazySkipDocCount != 0 ) { - skipPositions( lazySkipDocCount ); - lazySkipDocCount = 0; + + // read payloads lazily + uint8_t* retArray; + int32_t retOffset; + // TODO: Complete length logic ( possibly using ValueArray ? ) + if (data == NULL /*|| data.length - offset < payloadLength*/) { + // the array is too small to store the payload data, + // so we allocate a new one + _CLDELETE_ARRAY(data); + retArray = _CL_NEWARRAY(uint8_t, payloadLength); + retOffset = 0; + } else { + retArray = data; + retOffset = offset; } -} + proxStream->readBytes(retArray + retOffset, payloadLength); + needToLoadPayload = false; + return retArray; +} +bool SegmentTermPositions::isPayloadAvailable() const { return needToLoadPayload && (payloadLength > 0); } + CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -13,20 +13,18 @@ CL_NS_USE(util) CL_NS_DEF(index) -Array<int32_t> SegmentTermPositionVector::EMPTY_TERM_POS; +ValueArray<int32_t> SegmentTermPositionVector::EMPTY_TERM_POS; -SegmentTermVector::SegmentTermVector(const TCHAR* field, TCHAR** terms, Array<int32_t>* termFreqs) { - this->field = STRDUP_TtoT(field); - this->terms = terms; - this->termsLen = -1; //lazily get the size of the terms - this->termFreqs = termFreqs; +SegmentTermVector::SegmentTermVector(const TCHAR* _field, TCHAR** _terms, ValueArray<int32_t>* _termFreqs) { + this->field = STRDUP_TtoT(_field); // TODO: Try and avoid this dup (using intern'ing perhaps?) + this->terms = _terms; + this->termsLen = -1; //lazily get the size of the terms array + this->termFreqs = _termFreqs; } SegmentTermVector::~SegmentTermVector(){ - _CLDELETE_CARRAY(field); - _CLDELETE_CARRAY_ALL(terms); - - _CLDELETE_ARRAY(termFreqs->values); + _CLDELETE_LCARRAY(field); + _CLDELETE_LCARRAY_ALL(terms); _CLDELETE(termFreqs); } TermPositionVector* SegmentTermVector::__asTermPositionVector(){ @@ -34,45 +32,45 @@ } const TCHAR* SegmentTermVector::getField() { -return field; + return field; } TCHAR* SegmentTermVector::toString() const{ -StringBuffer sb; -sb.appendChar('{'); -sb.append(field); -sb.append(_T(": ")); + StringBuffer sb; + sb.appendChar('{'); + sb.append(field); + sb.append(_T(": ")); -int32_t i=0; -while ( terms && terms[i] != NULL ){ - if (i>0) - sb.append(_T(", ")); - sb.append(terms[i]); - sb.appendChar('/'); + int32_t i=0; + while ( terms && terms[i] != NULL ){ + if (i>0) + sb.append(_T(", ")); + sb.append(terms[i]); + sb.appendChar('/'); - sb.appendInt((*termFreqs)[i]); + sb.appendInt((*termFreqs)[i]); + } + sb.appendChar('}'); + return sb.toString(); } -sb.appendChar('}'); -return sb.toString(); -} int32_t SegmentTermVector::size() { -if ( terms == NULL ) - return 0; + if ( terms == NULL ) + return 0; -if ( termsLen == -1 ){ - termsLen=0; - while ( terms[termsLen] != 0 ) - termsLen++; + if ( termsLen == -1 ){ + termsLen=0; + while ( terms[termsLen] != 0 ) + termsLen++; + } + return termsLen; } -return termsLen; -} const TCHAR** SegmentTermVector::getTerms() { return (const TCHAR**)terms; } -const Array<int32_t>* SegmentTermVector::getTermFrequencies() { +const ValueArray<int32_t>* SegmentTermVector::getTermFrequencies() { return termFreqs; } @@ -103,92 +101,63 @@ return res >= 0 ? res : -1; } -void SegmentTermVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, Array<int32_t>& ret) { +ValueArray<int32_t>* SegmentTermVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len) { // TODO: there must be a more efficient way of doing this. // At least, we could advance the lower bound of the terms array // as we find valid indexes. Also, it might be possible to leverage // this even more by starting in the middle of the termNumbers array // and thus dividing the terms array maybe in half with each found index. - ret.length = len; - ret.values = _CL_NEWARRAY(int32_t,len); + ValueArray<int32_t>* ret = _CLNEW ValueArray<int32_t>(len); for (int32_t i=0; i<len; ++i) { - ret.values[i] = indexOf(termNumbers[start+ i]); + ret->values[i] = indexOf(termNumbers[start+ i]); } + return ret; } +void SegmentTermVector::indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, ValueArray<int32_t>& ret){ + ret = *indexesOf(terms,start,len); +} - -SegmentTermPositionVector::SegmentTermPositionVector(const TCHAR* field, TCHAR** terms, Array<int32_t>* termFreqs, Array< Array<int32_t> >* positions, Array< Array<TermVectorOffsetInfo> >* offsets): - SegmentTermVector(field,terms,termFreqs) +SegmentTermPositionVector::SegmentTermPositionVector(const TCHAR* field, TCHAR** terms, ValueArray<int32_t>* termFreqs, ObjectArray< ValueArray<int32_t> >* _positions, ObjectArray< ObjectArray<TermVectorOffsetInfo> >* _offsets) + : SegmentTermVector(field,terms,termFreqs),offsets(_offsets),positions(_positions) { - this->offsets = offsets; - this->positions = positions; } - -void SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, CL_NS(util)::Array<int32_t>& ret) - { SegmentTermVector::indexesOf(termNumbers, start, len, ret); } - - SegmentTermPositionVector::~SegmentTermPositionVector(){ - if ( offsets ){ - for (size_t i=0;i<offsets->length;i++){ - if ( offsets->values != NULL ){ - Array<TermVectorOffsetInfo>& offs = offsets->values[i]; - for ( size_t j=0;j<offs.length;j++ ){ - _CLDELETE_ARRAY(offs.values); - } - } - } - _CLDELETE_ARRAY(offsets->values); - _CLDELETE(offsets); - } - if ( positions ){ - for (size_t i=0;i<positions->length;i++){ - if ( positions->values != NULL ){ - Array<int32_t>& pos = positions->values[i]; - for ( size_t j=0;j<pos.length;j++ ){ - _CLDELETE_ARRAY(pos.values); - } - } - } - _CLDELETE_ARRAY(positions->values); - _CLDELETE(positions); - } + _CLLDELETE(offsets); + _CLLDELETE(positions); } +ValueArray<int32_t>* SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len) + { return SegmentTermVector::indexesOf(termNumbers, start, len); } + TermPositionVector* SegmentTermPositionVector::__asTermPositionVector(){ return this; } -/** -* Returns an array of TermVectorOffsetInfo in which the term is found. -* -* @param index The position in the array to get the offsets from -* @return An array of TermVectorOffsetInfo objects or the empty list -* @see org.apache.lucene.analysis.Token -*/ -Array<TermVectorOffsetInfo>* SegmentTermPositionVector::getOffsets(const size_t index) { + +ObjectArray<TermVectorOffsetInfo>* SegmentTermPositionVector::getOffsets(const size_t index) { if(offsets == NULL) return NULL; if (index >=0 && index < offsets->length) - return &offsets->values[index]; + return offsets->values[index]; else return &TermVectorOffsetInfo::EMPTY_OFFSET_INFO; } -/** -* Returns an array of positions in which the term is found. -* Terms are identified by the index at which its number appears in the -* term String array obtained from the <code>indexOf</code> method. -*/ -Array<int32_t>* SegmentTermPositionVector::getTermPositions(const size_t index) { +ValueArray<int32_t>* SegmentTermPositionVector::getTermPositions(const size_t index) { if(positions == NULL) return NULL; if (index >=0 && index < positions->length) - return &positions->values[index]; + return positions->values[index]; else return &EMPTY_TERM_POS; } + +void SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, CL_NS(util)::ValueArray<int32_t>& ret) +{ + ret = *indexesOf(termNumbers,start,len); +} + CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVector.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermVector.h 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/TermVector.h 2009-04-12 13:21:07 UTC (rev 2985) @@ -17,10 +17,10 @@ struct TermVectorOffsetInfo; class TermPositionVector; -/** Provides access to stored term vector of - * a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the - * {@link org.apache.lucene.document.Document} and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the - * frequency of getTerms()[5], assuming there are at least 5 terms in the Document. +/** Provides access to stored term vector of + * a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the + * {@link org.apache.lucene.document.Document} and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the + * frequency of getTerms()[5], assuming there are at least 5 terms in the Document. */ class CLUCENE_EXPORT TermFreqVector:LUCENE_BASE { public: @@ -28,7 +28,7 @@ } /** - * The {@link org.apache.lucene.document.Fieldable} name. + * The Field name. * @return The name of the field this vector is associated with. * */ @@ -53,7 +53,7 @@ * The size of the returned array is size() * @memory Returning a pointer to internal data. Do not delete. */ - virtual const CL_NS(util)::Array<int32_t>* getTermFrequencies() = 0; + virtual const CL_NS(util)::ValueArray<int32_t>* getTermFrequencies() = 0; /** Return an index in the term numbers array returned from @@ -73,7 +73,7 @@ * @param start index in the array where the list of terms starts * @param len the number of terms in the list */ - virtual void indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, CL_NS(util)::Array<int32_t>& ret) = 0; + virtual void indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, CL_NS(util)::ValueArray<int32_t>& ret) = 0; /** Solve the diamond inheritence problem by providing a reinterpret function. * No dynamic casting is required and no RTTI data is needed to do this @@ -82,19 +82,45 @@ }; - +/** +* The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s +* offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the +* original content). +*/ struct CLUCENE_EXPORT TermVectorOffsetInfo { +public: + /** + * Convenience declaration when creating a {@link org.apache.lucene.index.TermPositionVector} that stores only position information. + */ +private: int startOffset; int endOffset; -public: - static CL_NS(util)::Array<TermVectorOffsetInfo> EMPTY_OFFSET_INFO; +public: // TODO: Remove after TermVectorWriter has been ported + static CL_NS(util)::ObjectArray<TermVectorOffsetInfo> EMPTY_OFFSET_INFO; TermVectorOffsetInfo(); ~TermVectorOffsetInfo(); TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset); + + /** + * The accessor for the ending offset for the term + * @return The offset + */ int32_t getEndOffset() const; - void setEndOffset(int32_t endOffset); + void setEndOffset(const int32_t _endOffset); + + /** + * The accessor for the starting offset of the term. + * + * @return The offset + */ int32_t getStartOffset() const; - void setStartOffset(int32_t startOffset); + void setStartOffset(const int32_t _startOffset); + + /** + * Two TermVectorOffsetInfos are equals if both the start and end offsets are the same + * @param o The comparison Object + * @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects. + */ bool equals(TermVectorOffsetInfo* o); size_t hashCode() const; }; @@ -112,7 +138,7 @@ * term String array obtained from the <code>indexOf</code> method. * May return null if positions have not been stored. */ - virtual CL_NS(util)::Array<int32_t>* getTermPositions(const size_t index) = 0; + virtual CL_NS(util)::ValueArray<int32_t>* getTermPositions(const size_t index) = 0; /** * Returns an array of TermVectorOffsetInfo in which the term is found. @@ -123,7 +149,7 @@ * @param index The position in the array to get the offsets from * @return An array of TermVectorOffsetInfo objects or the empty list */ - virtual CL_NS(util)::Array<TermVectorOffsetInfo>* getOffsets(const size_t index) = 0; + virtual CL_NS(util)::ObjectArray<TermVectorOffsetInfo>* getOffsets(const size_t index) = 0; virtual ~TermPositionVector(){ } Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2009-04-12 12:53:18 UTC (rev 2984) +++ branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2009-04-12 13:21:07 UTC (rev 2985) @@ -40,14 +40,14 @@ tvf = d->openInput(fbuf, readBufferSize); tvfFormat = checkValidFormat(tvf); if (-1 == docStoreOffset) { - //this->docStoreOffset = 0; - this->_size = static_cast<int32_t>(tvx->length() >> 3); + this->docStoreOffset = 0; + this->_size = static_cast<int64_t>(tvx->length() >> 3); } else { this->docStoreOffset = docStoreOffset; this->_size = size; // Verify the file is long enough to hold all of our // docs - CND_CONDITION( ((int32_t) (tvx->length() / 8)) >= size + docStoreOffset , "file is not ling enought to hold all our docs"); + CND_CONDITION( ((int64_t) (tvx->length() / 8)) >= size + docStoreOffset , "file is not long enough to hold all of our docs"); } } @@ -63,34 +63,6 @@ close(); } }); -/* - char fbuf[CL_MAX_NAME]; - strcpy(fbuf,segment); - char* fpbuf=fbuf+strlen(fbuf); - - strcpy(fpbuf, TermVectorsWriter::LUCENE_TVX_EXTENSION); - if (d->fileExists(fbuf)) { - tvx = d->openInput(fbuf); - checkValidFormat(tvx); - - strcpy(fpbuf, TermVectorsWriter::LUCENE_TVD_EXTENSION); - tvd = d->openInput(fbuf); - tvdFormat = checkValidFormat(tvd); - - strcpy(fpbuf, TermVectorsWriter::LUCENE_TVF_EXTENSION); - tvf = d->openInput(fbuf); - tvfFormat = checkValidFormat(tvf); - - _size = tvx->length() / 8; - }else{ - tvx = NULL; - tvd = NULL; - tvf = NULL; - _size = 0; - } - - this->fieldInfos = fieldInfos; -*/ } TermVectorsReader::TermVectorsReader(const TermVectorsReader& copy) @@ -115,16 +87,32 @@ close(); } +int32_t TermVectorsReader::checkValidFormat(CL_NS(store)::IndexInput* in){ + int32_t format = in->readInt(); + if (format > TermVectorsWriter::FORMAT_VERSION) + { + CL_NS(util)::StringBuffer err; + err.append(_T("Incompatible format version: ")); + err.appendInt(format); + err.append(_T(" expected ")); + err.appendInt(TermVectorsWriter::FORMAT_VERSION); + err.append(_T(" or less")); + _CLTHROWT(CL_ERR_CorruptIndex,err.getBuffer()); + } + return format; +} + void TermVectorsReader::close(){ - // why don't we trap the exception and at least make sure that + // make all effort to close up. Keep the first exception + // and throw it as a new one. + // todo: why don't we trap the exception and at least make sure that // all streams that we can close are closed? CLuceneError keep; bool thrown = false; if (tvx != NULL){ - try{ - tvx->close(); - }catch(CLuceneError& err){ + try{tvx->close();} + catch(CLuceneError& err){ if ( err.number() == CL_ERR_IO ){ keep = err; thrown = true; @@ -134,9 +122,8 @@ _CLDELETE(tvx);//delete even if error thrown } if (tvd != NULL){ - try{ - tvd->close(); - }catch(CLuceneError& err){ + try{tvd->close();} + catch(CLuceneError& err){ if ( err.number() == CL_ERR_IO ){ keep = err; thrown = true; @@ -146,9 +133,8 @@ _CLDELETE(tvd); } if (tvf != NULL){ - try{ - tvf->close(); - }catch(CLuceneError& err){ + try{tvf->close();} + catch(CLuceneError& err){ if ( err.number() == CL_ERR_IO ){ keep = err; thrown = true; @@ -162,16 +148,18 @@ throw keep; } -TermFreqVector* TermVectorsReader::get(const int32_t docNum, const TCHAR* field){ - // Check if no term vectors are available for this segment at all - int32_t fieldNumber = fieldInfos->fieldNumber(field); - TermFreqVector* result = NULL; - if (tvx != NULL) { +int64_t TermVectorsReader::size() const{ + return _size; +} + +void TermVectorsReader::get(const int32_t docNum, const TCHAR* field, TermVectorMapper* mapper){ + if (tvx != NULL) { + int32_t fieldNumber = fieldInfos->fieldNumber(field); //We need to account for the FORMAT_SIZE at when seeking in the tvx //We don't need to do this in other seeks because we already have the // file pointer //that was written in another file - tvx->seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter::FORMAT_SIZE); + tvx->seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE); int64_t position = tvx->readLong(); tvd->seek(position); @@ -182,10 +170,11 @@ int32_t number = 0; int32_t found = -1; for (int32_t i = 0; i < fieldCount; ++i) { - if(tvdFormat == TermVectorsWriter::FORMAT_VERSION) + if(tvdFormat == FORMAT_VERSION) number = tvd->readVInt(); else number += tvd->readVInt(); + if (number == fieldNumber) found = i; } @@ -195,20 +184,34 @@... [truncated message content] |
From: <syn...@us...> - 2009-04-12 12:53:22
|
Revision: 2984 http://clucene.svn.sourceforge.net/clucene/?rev=2984&view=rev Author: synhershko Date: 2009-04-12 12:53:18 +0000 (Sun, 12 Apr 2009) Log Message: ----------- Tweaking CuAssertStrEquals Modified Paths: -------------- branches/lucene2_3_2/src/test/CuTest.cpp branches/lucene2_3_2/src/test/CuTest.h Modified: branches/lucene2_3_2/src/test/CuTest.cpp =================================================================== --- branches/lucene2_3_2/src/test/CuTest.cpp 2009-04-08 11:32:26 UTC (rev 2983) +++ branches/lucene2_3_2/src/test/CuTest.cpp 2009-04-12 12:53:18 UTC (rev 2984) @@ -245,10 +245,13 @@ CuFail(tc, _T("assert failed")); } -void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual) +void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual, bool bDelActual) { CuString* message; - if (_tcscmp(expected, actual) == 0) return; + if (_tcscmp(expected, actual) == 0) { + if (bDelActual) _CLDELETE_LCARRAY(actual); + return; + } message = CuStringNew(); if (preMessage) { CuStringAppend(message, preMessage); @@ -258,6 +261,7 @@ CuStringAppend(message, expected); CuStringAppend(message, _T("\n<----\nbut saw\n---->\n")); CuStringAppend(message, actual); + if (bDelActual) _CLDELETE_LCARRAY(actual); CuStringAppend(message, _T("\n<----")); CuFail(tc, message->buffer); CuStringFree(message); Modified: branches/lucene2_3_2/src/test/CuTest.h =================================================================== --- branches/lucene2_3_2/src/test/CuTest.h 2009-04-08 11:32:26 UTC (rev 2983) +++ branches/lucene2_3_2/src/test/CuTest.h 2009-04-12 12:53:18 UTC (rev 2984) @@ -66,7 +66,7 @@ void CuNotImpl(CuTest* tc, const TCHAR* message); void CuAssert(CuTest* tc, const TCHAR* message, int condition); void CuAssertTrue(CuTest* tc, int condition); -void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual); +void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual, bool bDelActual = false); void CuAssertIntEquals(CuTest* tc, const TCHAR* preMessage, int expected, int actual); void CuAssertPtrEquals(CuTest* tc, const TCHAR* preMessage, const void* expected, const void* actual); void CuAssertPtrNotNull(CuTest* tc, const TCHAR* preMessage, const void* pointer); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-08 11:32:42
|
Revision: 2983 http://clucene.svn.sourceforge.net/clucene/?rev=2983&view=rev Author: synhershko Date: 2009-04-08 11:32:26 +0000 (Wed, 08 Apr 2009) Log Message: ----------- Added shortcut macro for deletion of array and its contents without NULLifying the pointer (mainly used in destructors) Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/debug/mem.h Modified: branches/lucene2_3_2/src/core/CLucene/debug/mem.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/debug/mem.h 2009-04-06 14:14:43 UTC (rev 2982) +++ branches/lucene2_3_2/src/core/CLucene/debug/mem.h 2009-04-08 11:32:26 UTC (rev 2983) @@ -50,6 +50,7 @@ //a shortcut for deleting a carray and all its contents #define _CLDELETE_CARRAY_ALL(x) {if ( x!=NULL ){ for(int xcda=0;x[xcda]!=NULL;xcda++)_CLDELETE_CARRAY(x[xcda]);}_CLDELETE_ARRAY(x)}; +#define _CLDELETE_LCARRAY_ALL(x) {if ( x!=NULL ){ for(int xcda=0;x[xcda]!=NULL;xcda++)_CLDELETE_LCARRAY(x[xcda]);}_CLDELETE_LARRAY(x)}; #define _CLDELETE_CaARRAY_ALL(x) {if ( x!=NULL ){ for(int xcda=0;x[xcda]!=NULL;xcda++)_CLDELETE_CaARRAY(x[xcda]);}_CLDELETE_ARRAY(x)}; #define _CLDELETE_ARRAY_ALL(x) {if ( x!=NULL ){ for(int xcda=0;x[xcda]!=NULL;xcda++)_CLDELETE(x[xcda]);}_CLDELETE_ARRAY(x)}; #ifndef _CLDELETE_CaARRAY This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2009-04-06 14:14:47
|
Revision: 2982 http://clucene.svn.sourceforge.net/clucene/?rev=2982&view=rev Author: synhershko Date: 2009-04-06 14:14:43 +0000 (Mon, 06 Apr 2009) Log Message: ----------- Fix mem leak Modified Paths: -------------- branches/lucene2_3_2/src/test/document/TestNumberTools.cpp Modified: branches/lucene2_3_2/src/test/document/TestNumberTools.cpp =================================================================== --- branches/lucene2_3_2/src/test/document/TestNumberTools.cpp 2009-04-06 12:51:42 UTC (rev 2981) +++ branches/lucene2_3_2/src/test/document/TestNumberTools.cpp 2009-04-06 14:14:43 UTC (rev 2982) @@ -46,7 +46,9 @@ void testMin(CuTest *tc) { // make sure the constants convert to their equivelents CuAssertTrue(tc, LUCENE_INT64_MIN_SHOULDBE == NumberTools::stringToLong(const_cast<TCHAR*>(NumberTools::MIN_STRING_VALUE))); - CuAssertStrEquals(tc, _T("Min value"), NumberTools::MIN_STRING_VALUE, NumberTools::longToString(LUCENE_INT64_MIN_SHOULDBE)); + TCHAR* actual = NumberTools::longToString(LUCENE_INT64_MIN_SHOULDBE); + CuAssertStrEquals(tc, _T("Min value"), NumberTools::MIN_STRING_VALUE, actual); + _CLDELETE_LCARRAY(actual); // test near MIN, too for (int64_t l = LUCENE_INT64_MIN_SHOULDBE; l < LUCENE_INT64_MIN_SHOULDBE + 10000; l++) { @@ -57,7 +59,9 @@ void testMax(CuTest *tc) { // make sure the constants convert to their equivelents CuAssertTrue(tc, LUCENE_INT64_MAX_SHOULDBE == NumberTools::stringToLong(const_cast<TCHAR*>(NumberTools::MAX_STRING_VALUE))); - CuAssertStrEquals(tc, _T("Max value"), NumberTools::MAX_STRING_VALUE, NumberTools::longToString(LUCENE_INT64_MAX_SHOULDBE)); + TCHAR* actual = NumberTools::longToString(LUCENE_INT64_MAX_SHOULDBE); + CuAssertStrEquals(tc, _T("Max value"), NumberTools::MAX_STRING_VALUE, actual); + _CLDELETE_LCARRAY(actual); // test near MAX, too for (int64_t l = LUCENE_INT64_MAX_SHOULDBE; l > LUCENE_INT64_MAX_SHOULDBE - 10000; l--) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |