You can subscribe to this list here.
2004 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(25) |
Dec
(67) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2005 |
Jan
(125) |
Feb
(23) |
Mar
(167) |
Apr
(2) |
May
|
Jun
(19) |
Jul
(304) |
Aug
(181) |
Sep
(189) |
Oct
(145) |
Nov
(110) |
Dec
(44) |
2006 |
Jan
(303) |
Feb
(40) |
Mar
(2) |
Apr
(143) |
May
|
Jun
(74) |
Jul
(31) |
Aug
(7) |
Sep
(21) |
Oct
(33) |
Nov
(102) |
Dec
(36) |
2007 |
Jan
|
Feb
(16) |
Mar
(38) |
Apr
(34) |
May
(3) |
Jun
(4) |
Jul
(4) |
Aug
(13) |
Sep
(5) |
Oct
|
Nov
|
Dec
|
2008 |
Jan
(2) |
Feb
|
Mar
(13) |
Apr
|
May
(18) |
Jun
(48) |
Jul
(136) |
Aug
(45) |
Sep
(21) |
Oct
(32) |
Nov
|
Dec
(9) |
2009 |
Jan
(4) |
Feb
|
Mar
(33) |
Apr
(23) |
May
(6) |
Jun
(3) |
Jul
(11) |
Aug
|
Sep
(5) |
Oct
|
Nov
|
Dec
|
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <ust...@us...> - 2008-10-06 20:06:44
|
Revision: 2934 http://clucene.svn.sourceforge.net/clucene/?rev=2934&view=rev Author: ustramooner Date: 2008-10-06 19:41:07 +0000 (Mon, 06 Oct 2008) Log Message: ----------- finalised fixes for ThreadLocal. This included a fix in FSDirectory::SharedHandle. Removed THIS_LOCK from VoidList, to limit mutex construction to places its only needed Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.h branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h branches/lucene2_3_2/src/core/CLucene/store/_Lock.h branches/lucene2_3_2/src/core/CLucene/util/VoidList.h branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -774,7 +774,7 @@ //Pre - true //Post - All the norms have been destroyed - SCOPED_LOCK_MUTEX(_norms.THIS_LOCK) + SCOPED_LOCK_MUTEX(_norms_LOCK) //Create an interator initialized at the beginning of norms NormsType::iterator itr = _norms.begin(); //Iterate through all the norms Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -118,7 +118,7 @@ void TermVectorsReader::close(){ // why don't we trap the exception and at least make sure that // all streams that we can close are closed? - CLuceneError keep(0,"",false); + CLuceneError keep; bool thrown = false; if (tvx != NULL){ Modified: branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -200,6 +200,7 @@ //Holds all norms for all fields in the segment typedef CL_NS(util)::CLHashtable<const TCHAR*,Norm*,CL_NS(util)::Compare::TChar, CL_NS(util)::Equals::TChar> NormsType; NormsType _norms; + DEFINE_MUTEX(_norms_LOCK) uint8_t* ones; uint8_t* fakeNorms(); Modified: branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -38,6 +38,7 @@ CL_NS(util)::Deletor::Object<BitSetHolder> > CacheType; CacheType cache; + DEFINE_MUTEX(cache_LOCK) Internal(): cache(false,true) { @@ -57,7 +58,7 @@ } BitSet* AbstractCachingFilter::bits(IndexReader* reader){ - SCOPED_LOCK_MUTEX(internal->cache.THIS_LOCK) + SCOPED_LOCK_MUTEX(internal->cache_LOCK) BitSetHolder* cached = internal->cache.get(reader); if ( cached != NULL ) return cached->bits; @@ -67,7 +68,7 @@ return bs; } void AbstractCachingFilter::closeCallback(CL_NS(index)::IndexReader* reader, void*){ - SCOPED_LOCK_MUTEX(internal->cache.THIS_LOCK) + SCOPED_LOCK_MUTEX(internal->cache_LOCK) internal->cache.remove(reader); } Modified: branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -54,6 +54,7 @@ }; hitqueueCacheType* FieldSortedHitQueue::Comparators = _CLNEW hitqueueCacheType(false,true); +DEFINE_MUTEX(FieldSortedHitQueue::Comparators_LOCK) void FieldSortedHitQueue::_shutdown(){ Comparators->clear(); @@ -209,7 +210,7 @@ : _CLNEW FieldCacheImpl::FileEntry (field, type); { - SCOPED_LOCK_MUTEX(Comparators->THIS_LOCK) + SCOPED_LOCK_MUTEX(Comparators_LOCK) hitqueueCacheReaderType* readerCache = Comparators->get(reader); if (readerCache == NULL){ _CLDELETE(entry); @@ -223,7 +224,7 @@ } void FieldSortedHitQueue::closeCallback(CL_NS(index)::IndexReader* reader, void*){ - SCOPED_LOCK_MUTEX(Comparators->THIS_LOCK) + SCOPED_LOCK_MUTEX(Comparators_LOCK) Comparators->remove(reader); } @@ -234,7 +235,7 @@ : _CLNEW FieldCacheImpl::FileEntry (field, type); { - SCOPED_LOCK_MUTEX(Comparators->THIS_LOCK) + SCOPED_LOCK_MUTEX(Comparators_LOCK) hitqueueCacheReaderType* readerCache = Comparators->get(reader); if (readerCache == NULL) { readerCache = _CLNEW hitqueueCacheReaderType(true); Modified: branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -44,6 +44,7 @@ * caches comparators instead of term values. */ static hitqueueCacheType* Comparators; + STATIC_DEFINE_MUTEX(Comparators_LOCK) /** Cleanup static data */ static CLUCENE_LOCAL void _shutdown(); Modified: branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -41,6 +41,7 @@ * synchronize access between readers and writers. */ static CL_NS(util)::CLHashMap<const char*,FSDirectory*,CL_NS(util)::Compare::Char,CL_NS(util)::Equals::Char> DIRECTORIES(false,false); + STATIC_DEFINE_MUTEX(DIRECTORIES_LOCK) bool FSDirectory::disableLocks=false; @@ -57,9 +58,9 @@ int32_t fhandle; int64_t _length; int64_t _fpos; - DEFINE_MUTEX(THIS_LOCK) + DEFINE_MUTEX(*THIS_LOCK) char path[CL_MAX_DIR]; //todo: this is only used for cloning, better to get information from the fhandle - SharedHandle(); + SharedHandle(const char* path); ~SharedHandle(); }; SharedHandle* handle; @@ -116,8 +117,7 @@ if ( __bufferSize == -1 ) __bufferSize = CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE; - SharedHandle* handle = _CLNEW SharedHandle(); - strcpy(handle->path,path); + SharedHandle* handle = _CLNEW SharedHandle(path); //Open the file handle->fhandle = ::_cl_open(path, _O_BINARY | O_RDONLY | _O_RANDOM, _S_IREAD ); @@ -156,16 +156,20 @@ if ( other.handle == NULL ) _CLTHROWA(CL_ERR_NullPointer, "other handle is null"); - SCOPED_LOCK_MUTEX(other.handle->THIS_LOCK) + SCOPED_LOCK_MUTEX(*other.handle->THIS_LOCK) handle = _CL_POINTER(other.handle); _pos = other.handle->_fpos; //note where we are currently... } - FSDirectory::FSIndexInput::SharedHandle::SharedHandle(){ + FSDirectory::FSIndexInput::SharedHandle::SharedHandle(const char* path){ fhandle = 0; _length = 0; _fpos = 0; - path[0]=0; + strcpy(this->path,path); + +#ifdef _LUCENE_THREADMUTEX + THIS_LOCK = new _LUCENE_THREADMUTEX; +#endif } FSDirectory::FSIndexInput::SharedHandle::~SharedHandle() { if ( fhandle >= 0 ){ @@ -191,7 +195,32 @@ } void FSDirectory::FSIndexInput::close() { BufferedIndexInput::close(); +#ifdef _LUCENE_THREADMUTEX + if ( handle != NULL ){ + //here we have a bit of a problem... we need to lock the handle to ensure that we can + //safely delete the handle... but if we delete the handle, then the scoped unlock, + //won't be able to unlock the mutex... + + //take a reference of the lock object... + _LUCENE_THREADMUTEX* mutex = handle->THIS_LOCK; + //lock the mutex + mutex->lock(); + + //determine if we are about to delete the handle... + bool dounlock = ( handle->__cl_refcount > 1 ); + //decdelete (deletes if refcount is down to 0 + _CLDECDELETE(handle); + + //printf("handle=%d\n", handle->__cl_refcount); + if ( dounlock ){ + mutex->unlock(); + }else{ + delete mutex; + } + } +#else _CLDECDELETE(handle); +#endif } void FSDirectory::FSIndexInput::seekInternal(const int64_t position) { @@ -201,9 +230,9 @@ /** IndexInput methods */ void FSDirectory::FSIndexInput::readInternal(uint8_t* b, const int32_t len) { - SCOPED_LOCK_MUTEX(handle->THIS_LOCK) CND_PRECONDITION(handle!=NULL,"shared file handle has closed"); CND_PRECONDITION(handle->fhandle>=0,"file is not open"); + SCOPED_LOCK_MUTEX(*handle->THIS_LOCK) if ( handle->_fpos != _pos ){ if ( fileSeek(handle->fhandle,_pos,SEEK_SET) != _pos ){ @@ -469,7 +498,7 @@ if ( !file || !*file ) _CLTHROWA(CL_ERR_IO,"Invalid directory"); - SCOPED_LOCK_MUTEX(DIRECTORIES.THIS_LOCK) + SCOPED_LOCK_MUTEX(DIRECTORIES_LOCK) dir = DIRECTORIES.get(file); if ( dir == NULL ){ dir = _CLNEW FSDirectory(file,_create,lockFactory); @@ -563,7 +592,7 @@ } void FSDirectory::close(){ - SCOPED_LOCK_MUTEX(DIRECTORIES.THIS_LOCK) + SCOPED_LOCK_MUTEX(DIRECTORIES_LOCK) { SCOPED_LOCK_MUTEX(THIS_LOCK) Modified: branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -60,21 +60,22 @@ - SingleInstanceLock::SingleInstanceLock( LocksType* locks, const char* lockName ) + SingleInstanceLock::SingleInstanceLock( LocksType* locks, _LUCENE_THREADMUTEX* locks_LOCK, const char* lockName ) { this->locks = locks; + this->locks_LOCK = locks_LOCK; this->lockName = lockName; } bool SingleInstanceLock::obtain() { - SCOPED_LOCK_MUTEX(locks->THIS_LOCK); + SCOPED_LOCK_MUTEX(*locks_LOCK); return locks->insert( lockName ).second; } void SingleInstanceLock::release() { - SCOPED_LOCK_MUTEX(locks->THIS_LOCK); + SCOPED_LOCK_MUTEX(*locks_LOCK); LocksType::iterator itr = locks->find( lockName ); if ( itr != locks->end() ) { locks->remove(itr, true); @@ -83,7 +84,7 @@ bool SingleInstanceLock::isLocked() { - SCOPED_LOCK_MUTEX(locks->THIS_LOCK); + SCOPED_LOCK_MUTEX(*locks_LOCK); return locks->find( lockName ) == locks->end(); } Modified: branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp 2008-10-06 19:41:07 UTC (rev 2934) @@ -55,12 +55,12 @@ LuceneLock* SingleInstanceLockFactory::makeLock( const char* lockName ) { - return _CLNEW SingleInstanceLock( locks, lockName ); + return _CLNEW SingleInstanceLock( locks, &locks_LOCK, lockName ); } void SingleInstanceLockFactory::clearLock( const char* lockName ) { - SCOPED_LOCK_MUTEX(locks->THIS_LOCK); + SCOPED_LOCK_MUTEX(locks_LOCK); LocksType::iterator itr = locks->find( lockName ); if ( itr != locks->end() ) { locks->remove( itr ); Modified: branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -37,6 +37,7 @@ class CLUCENE_EXPORT SingleInstanceLockFactory: public LockFactory { private: LocksType* locks; + DEFINE_MUTEX(locks_LOCK); public: SingleInstanceLockFactory(); Modified: branches/lucene2_3_2/src/core/CLucene/store/_Lock.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/_Lock.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/store/_Lock.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -28,9 +28,10 @@ private: const char* lockName; LocksType* locks; + DEFINE_MUTEX(*locks_LOCK); public: - SingleInstanceLock( LocksType* locks, const char* lockName ); + SingleInstanceLock( LocksType* locks, _LUCENE_THREADMUTEX* locks_LOCK, const char* lockName ); bool obtain(); void release(); Modified: branches/lucene2_3_2/src/core/CLucene/util/VoidList.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/VoidList.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/util/VoidList.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -22,8 +22,6 @@ bool dv; typedef _base base; public: - DEFINE_MUTEX(THIS_LOCK) - typedef typename _base::const_iterator const_iterator; typedef typename _base::iterator iterator; Modified: branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h 2008-10-05 00:51:25 UTC (rev 2933) +++ branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h 2008-10-06 19:41:07 UTC (rev 2934) @@ -37,8 +37,6 @@ bool dv; typedef _base base; public: - DEFINE_MUTEX(THIS_LOCK) - typedef typename _base::iterator iterator; typedef typename _base::const_iterator const_iterator; typedef CL_NS_STD(pair)<_kt, _vt> _pair; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-06 19:43:03
|
Revision: 2935 http://clucene.svn.sourceforge.net/clucene/?rev=2935&view=rev Author: ustramooner Date: 2008-10-06 19:42:01 +0000 (Mon, 06 Oct 2008) Log Message: ----------- changelog Modified Paths: -------------- branches/lucene2_3_2/ChangeLog Modified: branches/lucene2_3_2/ChangeLog =================================================================== --- branches/lucene2_3_2/ChangeLog 2008-10-06 19:41:07 UTC (rev 2934) +++ branches/lucene2_3_2/ChangeLog 2008-10-06 19:42:01 UTC (rev 2935) @@ -1,3 +1,5 @@ +Removed jstreams namespace. Sorry, I couldn't think of a way to nicely deprecate jstreams. + version 0.9.23: Changes: * Static object fields have been changed to method accessors (SortField::FIELDDOC now chould be accessed as SortField::FIELDDOC(), for example). Classes changed: FieldCache, ScoreDocComparator, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2008-10-05 00:54:07
|
Revision: 2933 http://clucene.svn.sourceforge.net/clucene/?rev=2933&view=rev Author: synhershko Date: 2008-10-05 00:51:25 +0000 (Sun, 05 Oct 2008) Log Message: ----------- - Fixes [ 2088975 ] MultiFieldQueryParser Crash when searching for stopwords - Commented out ThreadLocal tests, which are not yet compiling well on MSVC - Added a new test suite for MultiFieldQueryParser. It still doesn't pass anything, and not everything is included yet (expect one F in the test-all). - Added non-static parse() method to MultiFieldQueryParser - cl_test mem-leak fix for when a failure happens in assertQueryEquals Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/debug/TestError.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/test.h branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2008-10-05 00:51:25 UTC (rev 2933) @@ -35,12 +35,17 @@ { BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); int32_t i = 0; - while ( fields[i] != NULL ){ - Query* q = QueryParser::parse(query, fields[i], analyzer); + while ( fields[i] != NULL ){ + Query* q = QueryParser::parse(query, fields[i], analyzer); + if (q && (q->getQueryName()!="BooleanQuery" || ((BooleanQuery*)q)->getClauseCount() > 0)) { + //todo: Move to using BooleanClause::Occur bQuery->add(q, true, false, false); + } else { + _CLDELETE(q); + } - i++; - } + i++; + } return bQuery; } @@ -52,25 +57,34 @@ while ( fields[i] != NULL ) { Query* q = QueryParser::parse(query, fields[i], analyzer); - uint8_t flag = flags[i]; - switch (flag) - { + if (q && (q->getQueryName()!="BooleanQuery" || ((BooleanQuery*)q)->getClauseCount() > 0)) { + uint8_t flag = flags[i]; + switch (flag) + { + //todo: Move to using BooleanClause::Occur case MultiFieldQueryParser::REQUIRED_FIELD: - bQuery->add(q, true, true, false); - break; - case MultiFieldQueryParser::PROHIBITED_FIELD: - bQuery->add(q, true, false, true); - break; - default: - bQuery->add(q, true, false, false); - break; - } + bQuery->add(q, true, true, false); + break; + case MultiFieldQueryParser::PROHIBITED_FIELD: + bQuery->add(q, true, false, true); + break; + default: + bQuery->add(q, true, false, false); + break; + } + } else { + _CLDELETE(q); + } i++; } return bQuery; } +//not static +CL_NS(search)::Query* MultiFieldQueryParser::parse(const TCHAR* query) { + return parse(query, this->fields, this->analyzer); +} Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop){ if (field == NULL) { Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2008-10-05 00:51:25 UTC (rev 2933) @@ -88,10 +88,10 @@ * <p><pre> * Usage: * <code> - * String[] fields = {"filename", "contents", "description"}; - * int32_t[] flags = {MultiFieldQueryParser.NORMAL FIELD, - * MultiFieldQueryParser.REQUIRED FIELD, - * MultiFieldQueryParser.PROHIBITED FIELD,}; + * TCHAR** fields = {"filename", "contents", "description"}; + * int8_t* flags = {MultiFieldQueryParser::NORMAL FIELD, + * MultiFieldQueryParser::REQUIRED FIELD, + * MultiFieldQueryParser::PROHIBITED FIELD}; * parse(query, fields, flags, analyzer); * </code> * </pre> @@ -112,8 +112,9 @@ */ static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** fields, const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); + // non-static version of the above + CL_NS(search)::Query* parse(const TCHAR* query); - protected: CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText); CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop); Modified: branches/lucene2_3_2/src/test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/test/CMakeLists.txt 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/test/CMakeLists.txt 2008-10-05 00:51:25 UTC (rev 2933) @@ -22,6 +22,7 @@ ./CuTest.cpp ./testall.cpp ./queryParser/TestQueryParser.cpp +./queryParser/TestMultiFieldQueryParser.cpp ./analysis/TestAnalysis.cpp ./analysis/TestAnalyzers.cpp ./debug/TestError.cpp Modified: branches/lucene2_3_2/src/test/debug/TestError.cpp =================================================================== --- branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-05 00:51:25 UTC (rev 2933) @@ -5,7 +5,7 @@ * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "test.h" -#include "CLucene/util/_ThreadLocal.h" +//#include "CLucene/util/_ThreadLocal.h" #include "CLucene/util/Equators.h" void testError ( CuTest *tc ) @@ -34,6 +34,7 @@ } } +/* typedef CL_NS(util)::ThreadLocal<char*, CL_NS(util)::Deletor::acArray> tlTest; struct Data{ tlTest* tl; @@ -87,13 +88,13 @@ for ( i=0;i<threadsCount;i++ ) _LUCENE_THREAD_JOIN ( threads[i] ); } - +*/ CuSuite *testdebug ( void ) { CuSuite *suite = CuSuiteNew ( _T ( "CLucene Debug Test" ) ); - //SUITE_ADD_TEST ( suite, testError ); - SUITE_ADD_TEST ( suite, testThreadLocal ); + SUITE_ADD_TEST ( suite, testError ); + //SUITE_ADD_TEST ( suite, testThreadLocal ); return suite; } Added: branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp (rev 0) +++ branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp 2008-10-05 00:51:25 UTC (rev 2933) @@ -0,0 +1,166 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "test.h" + +class QPTestFilter: public TokenFilter { +public: + + bool inPhrase; + int32_t savedStart, savedEnd; + + /** + * Filter which discards the token 'stop' and which expands the + * token 'phrase' into 'phrase1 phrase2' + */ + QPTestFilter(TokenStream* in): + TokenFilter(in,true), + inPhrase(false), + savedStart(0), + savedEnd(0) + { + } + + bool next(Token* token) { + if (inPhrase) { + inPhrase = false; + token->set( _T("phrase2"), savedStart, savedEnd); + return true; + }else{ + while( input->next(token) ){ + if ( _tcscmp(token->termBuffer(), _T("phrase")) == 0 ) { + inPhrase = true; + savedStart = token->startOffset(); + savedEnd = token->endOffset(); + token->set( _T("phrase1"), savedStart, savedEnd); + return true; + }else if ( _tcscmp(token->termBuffer(), _T("stop") ) !=0 ){ + return true; + } + } + } + return false; + } +}; + +class QPTestAnalyzer: public Analyzer { +public: + QPTestAnalyzer() { + } + + /** Filters LowerCaseTokenizer with StopFilter. */ + TokenStream* tokenStream(const TCHAR* fieldName, Reader* reader) { + return _CLNEW QPTestFilter(_CLNEW LowerCaseTokenizer(reader)); + } +}; + +void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { + if ( q == NULL ) + return; + + const TCHAR* s = q->toString(); + int ret = _tcscmp(s,result); + _CLDELETE_LCARRAY(s); + _CLDELETE(q); + if ( ret != 0 ) { + CuFail(tc, _T("FAILED Query yielded /%s/, expecting /%s/\n"), s, result); + } +} + +// verify parsing of query using a stopping analyzer +void assertStopQueryEquals(CuTest *tc, TCHAR* qtxt, TCHAR* expectedRes) { + const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; + //Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; + QPTestAnalyzer *a = _CLNEW QPTestAnalyzer(); + MultiFieldQueryParser mfqp(fields, a); + + Query *q = mfqp.parse(qtxt); + assertEquals(tc, expectedRes, q); + + q = MultiFieldQueryParser::parse(qtxt, fields, a); + assertEquals(tc, expectedRes, q); + _CLDELETE(a); +} + +/** test stop words arsing for both the non static form, and for the +* corresponding static form (qtxt, fields[]). */ +void tesStopwordsParsing(CuTest *tc) { + assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("stop"), _T("")); + assertStopQueryEquals(tc, _T("(stop)"), _T("")); + assertStopQueryEquals(tc, _T("((stop))"), _T("")); +} + +void testMFQPSimple(CuTest *tc) { + const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; + Analyzer* a = _CLNEW StandardAnalyzer(); + MultiFieldQueryParser mfqp(fields, a); + + Query *q = mfqp.parse(_T("one")); + assertEquals(tc, _T("b:one t:one"), q); + + q = mfqp.parse(_T("one two")); + assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); + + q = mfqp.parse(_T("+one +two")); + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); + + q = mfqp.parse(_T("+one -two -three")); + assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three t:three)"), q); + + q = mfqp.parse(_T("one^2 two")); + assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); + + q = mfqp.parse(_T("one~ two")); + assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); + + q = mfqp.parse(_T("one~0.8 two^2")); + assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)"), q); + + q = mfqp.parse(_T("one* two*")); + assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); + + q = mfqp.parse(_T("[a TO c] two")); + assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two t:two)"), q); + + q = mfqp.parse(_T("w?ldcard")); + assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); + + q = mfqp.parse(_T("\"foo bar\"")); + assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); + + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); + assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee\" t:\"dd ee\")"), q); + + q = mfqp.parse(_T("\"foo bar\"~4")); + assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); + + // make sure that terms which have a field are not touched: + q = mfqp.parse(_T("one f:two")); + assertEquals(tc, _T("(b:one t:one) f:two"), q); + + // AND mode: + mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); + q = mfqp.parse(_T("one two")); + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); + assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd ee\" t:\"dd ee\")"), q); + + _CLDELETE(a); +} + +CuSuite *testMultiFieldQueryParser(void) +{ + CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); + + SUITE_ADD_TEST(suite, tesStopwordsParsing); + SUITE_ADD_TEST(suite, testMFQPSimple); + + return suite; +} \ No newline at end of file Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2008-10-05 00:51:25 UTC (rev 2933) @@ -86,12 +86,13 @@ return; const TCHAR* s = q->toString(_T("field")); - if ( _tcscmp(s,result) != 0 ) { + int ret = _tcscmp(s,result); + _CLDELETE_CARRAY(s); + _CLDELETE(q); + if ( ret != 0 ) { CuFail(tc, _T("FAILED Query /%s/ yielded /%s/, expecting /%s/\n"), query, s, result); } - _CLDELETE_CARRAY(s); - _CLDELETE(q); } Modified: branches/lucene2_3_2/src/test/test.h =================================================================== --- branches/lucene2_3_2/src/test/test.h 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/test/test.h 2008-10-05 00:51:25 UTC (rev 2933) @@ -17,6 +17,7 @@ #include "CLucene/store/RAMDirectory.h" #include "CLucene/store/Lock.h" #include "CLucene/index/TermVector.h" +#include "CLucene/queryParser/MultiFieldQueryParser.h" CL_NS_USE(index) @@ -38,6 +39,7 @@ CuSuite *testhighlight(void); CuSuite *testpriorityqueue(void); CuSuite *testQueryParser(void); +CuSuite *testMultiFieldQueryParser(void); CuSuite *testqueries(void); CuSuite *testsearch(void); CuSuite *testtermvector(void); Modified: branches/lucene2_3_2/src/test/tests.cpp =================================================================== --- branches/lucene2_3_2/src/test/tests.cpp 2008-10-05 00:32:05 UTC (rev 2932) +++ branches/lucene2_3_2/src/test/tests.cpp 2008-10-05 00:51:25 UTC (rev 2933) @@ -15,6 +15,7 @@ {"highfreq", testhighfreq}, {"priorityqueue", testpriorityqueue}, {"queryparser", testQueryParser}, + {"mfqueryparser", testMultiFieldQueryParser}, {"search", testsearch}, {"queries", testqueries}, {"termvector",testtermvector}, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2008-10-05 00:33:14
|
Revision: 2932 http://clucene.svn.sourceforge.net/clucene/?rev=2932&view=rev Author: synhershko Date: 2008-10-05 00:32:05 +0000 (Sun, 05 Oct 2008) Log Message: ----------- Fixes [ 2088975 ] MultiFieldQueryParser Crash when searching for stopwords in trunk Modified Paths: -------------- trunk/src/CLucene/queryParser/MultiFieldQueryParser.cpp Modified: trunk/src/CLucene/queryParser/MultiFieldQueryParser.cpp =================================================================== --- trunk/src/CLucene/queryParser/MultiFieldQueryParser.cpp 2008-10-04 23:19:23 UTC (rev 2931) +++ trunk/src/CLucene/queryParser/MultiFieldQueryParser.cpp 2008-10-05 00:32:05 UTC (rev 2932) @@ -36,7 +36,10 @@ int32_t i = 0; while ( fields[i] != NULL ){ Query* q = QueryParser::parse(query, fields[i], analyzer); - bQuery->add(q, true, false, false); + if (q && (q->getQueryName()!=_T("BooleanQuery") || ((BooleanQuery*)q)->getClauseCount() > 0)) { + bQuery->add(q, true, false, false); + } else + _CLDELETE(q); i++; } @@ -51,19 +54,23 @@ while ( fields[i] != NULL ) { Query* q = QueryParser::parse(query, fields[i], analyzer); - uint8_t flag = flags[i]; - switch (flag) - { + if (q && (q->getQueryName()!=_T("BooleanQuery") || ((BooleanQuery*)q)->getClauseCount() > 0)) { + uint8_t flag = flags[i]; + switch (flag) + { case MultiFieldQueryParser::REQUIRED_FIELD: - bQuery->add(q, true, true, false); - break; - case MultiFieldQueryParser::PROHIBITED_FIELD: - bQuery->add(q, true, false, true); - break; - default: - bQuery->add(q, true, false, false); - break; - } + bQuery->add(q, true, true, false); + break; + case MultiFieldQueryParser::PROHIBITED_FIELD: + bQuery->add(q, true, false, true); + break; + default: + bQuery->add(q, true, false, false); + break; + } + } else { + _CLDELETE(q); + } i++; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <syn...@us...> - 2008-10-04 23:23:44
|
Revision: 2931 http://clucene.svn.sourceforge.net/clucene/?rev=2931&view=rev Author: synhershko Date: 2008-10-04 23:19:23 +0000 (Sat, 04 Oct 2008) Log Message: ----------- MSVC fix Modified Paths: -------------- branches/lucene2_3_2/src/test/debug/TestError.cpp Modified: branches/lucene2_3_2/src/test/debug/TestError.cpp =================================================================== --- branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-03 16:09:23 UTC (rev 2930) +++ branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-04 23:19:23 UTC (rev 2931) @@ -67,7 +67,7 @@ void testThreadLocal ( CuTest *tc ) { - int threadsCount = 10; + const int threadsCount = 10; //read using multiple threads... _LUCENE_THREADID_TYPE threads[threadsCount]; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-03 16:09:39
|
Revision: 2930 http://clucene.svn.sourceforge.net/clucene/?rev=2930&view=rev Author: ustramooner Date: 2008-10-03 16:09:23 +0000 (Fri, 03 Oct 2008) Log Message: ----------- notes on max_path... this needs more research. this is so convuluted!!! how do people choose the right macro? Modified Paths: -------------- branches/lucene2_3_2/src/shared/cmake/DefineMAXPATHValue.cmake Modified: branches/lucene2_3_2/src/shared/cmake/DefineMAXPATHValue.cmake =================================================================== --- branches/lucene2_3_2/src/shared/cmake/DefineMAXPATHValue.cmake 2008-10-03 16:08:04 UTC (rev 2929) +++ branches/lucene2_3_2/src/shared/cmake/DefineMAXPATHValue.cmake 2008-10-03 16:09:23 UTC (rev 2930) @@ -1,6 +1,12 @@ #checks if snprintf have bugs MACRO ( DEFINE_MAXPATH_VALUE MaxPathValue ) +# also check for MAXPATHLEN +#or this: +#path_max = pathconf (path, _PC_PATH_MAX); + #if (path_max <= 0) + #path_max = 4096; + #use CHOOSE_SYMBOL mechanism to determine which variable to use... #CHOOSE_SYMBOL (_CL_MAX_PATH "PATH_MAX;MAX_PATH;_MAX_PATH;_POSIX_PATH_MAX" DefineMaxPathValue) #IF ( DefineMaxPathValue ) @@ -20,5 +26,5 @@ #HACK!!! #todo: fix this - SET( ${MaxPathValue} "#define CL_MAX_PATH 260") + SET( ${MaxPathValue} "#define CL_MAX_PATH 4096") ENDMACRO ( DEFINE_MAXPATH_VALUE ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-03 16:08:07
|
Revision: 2929 http://clucene.svn.sourceforge.net/clucene/?rev=2929&view=rev Author: ustramooner Date: 2008-10-03 16:08:04 +0000 (Fri, 03 Oct 2008) Log Message: ----------- fixed delete[]/delete mismatch Modified Paths: -------------- branches/lucene2_3_2/src/test/search/TestSort.cpp Modified: branches/lucene2_3_2/src/test/search/TestSort.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-03 15:36:59 UTC (rev 2928) +++ branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-03 16:08:04 UTC (rev 2929) @@ -259,7 +259,7 @@ if ( diff>m ) CuAssert(tc,_T("sort_getScores(multi or incorrect) f1!=f2"),false); - _CLDELETE_ARRAY_ALL(v); + _CLDELETE_CARRAY_ALL(v); }else{ scoreMap->insert ( scorePair(v[0], hits->score(i)) ); _CLDELETE_ARRAY(v); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-03 15:37:09
|
Revision: 2928 http://clucene.svn.sourceforge.net/clucene/?rev=2928&view=rev Author: ustramooner Date: 2008-10-03 15:36:59 +0000 (Fri, 03 Oct 2008) Log Message: ----------- fixed a memory leak Modified Paths: -------------- branches/lucene2_3_2/src/test/testall.cpp Modified: branches/lucene2_3_2/src/test/testall.cpp =================================================================== --- branches/lucene2_3_2/src/test/testall.cpp 2008-10-03 15:34:14 UTC (rev 2927) +++ branches/lucene2_3_2/src/test/testall.cpp 2008-10-03 15:36:59 UTC (rev 2928) @@ -195,9 +195,9 @@ if ( times ) printf("Tests run in %dms\n\n",CL_NS(util)::Misc::currentTimeMillis()-startTime); - CuSuiteListDelete(alltests); - exit_point: + if ( alltests != NULL ) + CuSuiteListDelete(alltests); CuStringFree(output); _CLDELETE_CaARRAY(cl_tempDir) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-03 15:34:25
|
Revision: 2927 http://clucene.svn.sourceforge.net/clucene/?rev=2927&view=rev Author: ustramooner Date: 2008-10-03 15:34:14 +0000 (Fri, 03 Oct 2008) Log Message: ----------- threadlocal fix progress Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp 2008-10-03 14:55:06 UTC (rev 2926) +++ branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp 2008-10-03 15:34:14 UTC (rev 2927) @@ -152,7 +152,7 @@ //Delete the IndexInput _CLDELETE(is); } - enumerators.set(NULL); + enumerators.setNull(); } int64_t TermInfosReader::size() const{ Modified: branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp 2008-10-03 14:55:06 UTC (rev 2926) +++ branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp 2008-10-03 15:34:14 UTC (rev 2927) @@ -26,10 +26,10 @@ #define INIT_THREAD(ret) ret=true extern "C"{ #ifndef _WINBASE_ - __declspec(dllimport) _cl_dword_t __stdcall TlsAlloc( ); + /*__declspec(dllimport) _cl_dword_t __stdcall TlsAlloc( ); __declspec(dllimport) void* __stdcall TlsGetValue(_cl_dword_t dwTlsIndex ); __declspec(dllimport) bool __stdcall TlsSetValue( _cl_dword_t dwTlsIndex, void* lpTlsValue ); - __declspec(dllimport) bool __stdcall TlsFree( _cl_dword_t dwTlsIndex ); + __declspec(dllimport) bool __stdcall TlsFree( _cl_dword_t dwTlsIndex );*/ #define DLL_THREAD_DETACH 3 #endif //_WINBASE_ @@ -38,10 +38,10 @@ _cl_dword_t fdwReason, // reason called void*) // reserved { - if ( fdwReason == DLL_THREAD_DETACH ) - _ThreadLocal::UnregisterCurrentThread(); + if ( fdwReason == DLL_THREAD_DETACH ) + _ThreadLocal::UnregisterCurrentThread(); - return TRUE; + return true; } } #elif defined(_CL_HAVE_PTHREAD) @@ -80,7 +80,7 @@ typedef CL_NS ( util ) ::CLMultiMap<const _LUCENE_THREADID_TYPE, ThreadLocals*, CL_NS ( util ) ::CLuceneThreadIdCompare, CL_NS ( util ) ::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, - CL_NS ( util ) ::Deletor::ConstNullVal<ThreadLocals*> > ThreadDataType; + CL_NS ( util ) ::Deletor::Object<ThreadLocals> > ThreadDataType; static ThreadDataType* threadData; #ifdef _LUCENE_THREADMUTEX @@ -156,6 +156,10 @@ void _ThreadLocal::set ( void* t ) { + if ( t == NULL ){ + setNull(); + return; + } //make sure we have a threadlocal context (for cleanup) bool ret; INIT_THREAD(ret); @@ -174,12 +178,13 @@ SCOPED_LOCK_MUTEX ( *threadData_LOCK ); if ( threadData == NULL ) - threadData = _CLNEW ThreadDataType ( false,false ); + threadData = _CLNEW ThreadDataType ( false, true ); ThreadLocals* threadLocals = threadData->get(id); if ( threadLocals == NULL ){ threadLocals = _CLNEW ThreadLocals; threadData->put(id,threadLocals); + printf("starting thread %d\n",(int)id); } threadLocals->add(this); } @@ -207,13 +212,18 @@ _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; SCOPED_LOCK_MUTEX ( *threadData_LOCK ); - ThreadLocals* threadLocals = threadData->get(id); - threadLocals->UnregisterThread(); + ThreadDataType::iterator itr = threadData->find(id); + if ( itr != threadData->end() ){ + ThreadLocals* threadLocals = itr->second; + threadLocals->UnregisterThread(); + threadData->removeitr(itr); + } } void _ThreadLocal::_shutdown() { - + _CLDELETE(threadData_LOCK); + _CLDELETE(threadData); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-03 14:55:15
|
Revision: 2926 http://clucene.svn.sourceforge.net/clucene/?rev=2926&view=rev Author: ustramooner Date: 2008-10-03 14:55:06 +0000 (Fri, 03 Oct 2008) Log Message: ----------- new simple test for ThreadLocals several memory leak fixes start of ThreadLocal changes - with auto thread end detection. needs some work still Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp branches/lucene2_3_2/src/core/CLucene/util/_ThreadLocal.h branches/lucene2_3_2/src/test/debug/TestError.cpp branches/lucene2_3_2/src/test/testall.cpp Modified: branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp 2008-10-03 14:55:06 UTC (rev 2926) @@ -13,6 +13,7 @@ #include "CLucene/search/FieldSortedHitQueue.h" #include "CLucene/store/LockFactory.h" #include "CLucene/util/_StringIntern.h" +#include "CLucene/util/_ThreadLocal.h" #if defined(_MSC_VER) && defined(_DEBUG) #define CRTDBG_MAP_ALLOC @@ -35,4 +36,6 @@ CLStringIntern::_shutdown(); NoLockFactory::_shutdown(); + +_ThreadLocal::_shutdown(); } Modified: branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/core/CLucene/search/FieldSortedHitQueue.cpp 2008-10-03 14:55:06 UTC (rev 2926) @@ -57,6 +57,7 @@ void FieldSortedHitQueue::_shutdown(){ Comparators->clear(); + _CLDELETE(Comparators); } FieldSortedHitQueue::FieldSortedHitQueue (IndexReader* reader, SortField** _fields, int32_t size): Modified: branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp 2008-10-03 14:55:06 UTC (rev 2926) @@ -1,162 +1,238 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" #include "CLucene/LuceneThreads.h" #include "_ThreadLocal.h" #include "CLucene/config/_threads.h" +#include <assert.h> -CL_NS_DEF(util) +CL_NS_DEF ( util ) -//todo: using http://en.wikipedia.org/wiki/Thread-local_storage#Pthreads_implementation -//would work better... but lots of testing would be needed first... -typedef CL_NS(util)::CLMultiMap<const _LUCENE_THREADID_TYPE, _ThreadLocal*, - CL_NS(util)::CLuceneThreadIdCompare, - CL_NS(util)::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, - CL_NS(util)::Deletor::ConstNullVal<_ThreadLocal*> > ThreadLocalsType; +/* +* The concept of ThreadLocal is that a ThreadLocal class stores specific values for each unique thread. +* Several thread-end detection techniques are used to delete the thread data if the thread dies before the ThreadLocal class is shut. +* +* The class->thread data mapping is stored in the _ThreadLocal class. +* The thread->datas mapping is in ThreadData. +*/ -typedef CL_NS(util)::CLSetList<_ThreadLocal::ShutdownHook*, - CL_NS(util)::Compare::Void<_ThreadLocal::ShutdownHook>, - CL_NS(util)::Deletor::ConstNullVal<_ThreadLocal::ShutdownHook*> > ShutdownHooksType; - + +//predefine for the shared code... +#if defined(_CL_HAVE_WIN32_THREADS) + #define INIT_THREAD(ret) ret=true + extern "C"{ + #ifndef _WINBASE_ + __declspec(dllimport) _cl_dword_t __stdcall TlsAlloc( ); + __declspec(dllimport) void* __stdcall TlsGetValue(_cl_dword_t dwTlsIndex ); + __declspec(dllimport) bool __stdcall TlsSetValue( _cl_dword_t dwTlsIndex, void* lpTlsValue ); + __declspec(dllimport) bool __stdcall TlsFree( _cl_dword_t dwTlsIndex ); + #define DLL_THREAD_DETACH 3 + #endif //_WINBASE_ + + //todo: move this to StdHeader and make it usable by other functions... + bool __stdcall DllMain( unsigned short hinstDLL, // DLL module handle + _cl_dword_t fdwReason, // reason called + void*) // reserved + { + if ( fdwReason == DLL_THREAD_DETACH ) + _ThreadLocal::UnregisterCurrentThread(); + + return TRUE; + } + } +#elif defined(_CL_HAVE_PTHREAD) + pthread_key_t pthread_threadlocal_key; + pthread_once_t pthread_threadlocal_key_once = PTHREAD_ONCE_INIT; + #define INIT_THREAD(ret) \ + pthread_once(&pthread_threadlocal_key_once, pthread_threadlocal_make_key); \ + if (pthread_getspecific(pthread_threadlocal_key) == NULL) { pthread_setspecific(pthread_threadlocal_key, (void*)1); } \ + ret = true; + + //the function that is called when the thread shutsdown + void pthread_threadlocal_destructor(void* _holder){ + _ThreadLocal::UnregisterCurrentThread(); + } + //the key initialiser function + void pthread_threadlocal_make_key() + { + printf("pthread_threadlocal_make_key\n"); + (void) pthread_key_create(&pthread_threadlocal_key, &pthread_threadlocal_destructor); + } +#endif + +class _ThreadLocal; + +/** +* List that holds the list of ThreadLocals that this thread has data in. +*/ +class ThreadLocals: private CL_NS ( util ) ::CLVector<_ThreadLocal*, + CL_NS ( util ) ::Deletor::ConstNullVal<_ThreadLocal*> >{ +public: + void UnregisterThread(); + void add(_ThreadLocal* thread); +}; + +//map of thread<>ThreadLocals +typedef CL_NS ( util ) ::CLMultiMap<const _LUCENE_THREADID_TYPE, ThreadLocals*, + CL_NS ( util ) ::CLuceneThreadIdCompare, + CL_NS ( util ) ::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, + CL_NS ( util ) ::Deletor::ConstNullVal<ThreadLocals*> > ThreadDataType; +static ThreadDataType* threadData; + #ifdef _LUCENE_THREADMUTEX - //the lock for locking ThreadLocalBase_threadLocals - //we don't use STATIC_DEFINE_MUTEX, because then the initialisation order will be undefined. - static _LUCENE_THREADMUTEX *ThreadLocalBase_LOCK = NULL; + //the lock for locking ThreadData + //we don't use STATIC_DEFINE_MUTEX, because then the initialisation order will be undefined. + static _LUCENE_THREADMUTEX *threadData_LOCK = NULL; #endif -static ThreadLocalsType* ThreadLocalBase_threadLocals; //list of thread locals -//todo: make shutdown hooks generic -static ShutdownHooksType* ThreadLocalBase_shutdownHooks; //list of shutdown hooks. +class _ThreadLocal::Internal +{ + public: + typedef CL_NS ( util ) ::CLSet<_LUCENE_THREADID_TYPE, void*, + CL_NS ( util ) ::CLuceneThreadIdCompare, + CL_NS ( util ) ::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, + CL_NS ( util ) ::Deletor::ConstNullVal<void*> > LocalsType; + LocalsType locals; + DEFINE_MUTEX ( locals_LOCK ) + AbstractDeletor* _deletor; -class _ThreadLocal::Internal{ -public: - typedef CL_NS(util)::CLSet<_LUCENE_THREADID_TYPE, void*, - CL_NS(util)::CLuceneThreadIdCompare, - CL_NS(util)::Deletor::ConstNullVal<_LUCENE_THREADID_TYPE>, - CL_NS(util)::Deletor::ConstNullVal<void*> > LocalsType; - LocalsType locals; - DEFINE_MUTEX(locals_LOCK) - AbstractDeletor* _deletor; - - Internal(AbstractDeletor* _deletor): - locals(false,false){ + Internal ( AbstractDeletor* _deletor ) : + locals ( false,false ) + { this->_deletor = _deletor; - } - ~Internal(){ - //remove all the thread local data for this object - LocalsType::iterator itr = locals.begin(); - while ( itr != locals.end() ){ - void* val = itr->second; - locals.removeitr(itr); - _deletor->Delete(val); - itr = locals.begin(); } - - delete _deletor; - } + ~Internal() + { + //remove all the thread local data for this object + LocalsType::iterator itr = locals.begin(); + while ( itr != locals.end() ) + { + void* val = itr->second; + locals.removeitr ( itr ); + _deletor->Delete ( val ); + itr = locals.begin(); + } + + delete _deletor; + } }; -_ThreadLocal::_ThreadLocal(CL_NS(util)::AbstractDeletor* _deletor): - internal(_CLNEW Internal(_deletor)) +_ThreadLocal::_ThreadLocal ( CL_NS ( util ) ::AbstractDeletor* _deletor ) : + internal ( _CLNEW Internal ( _deletor ) ) { - - //add this object to the base's list of ThreadLocalBase_threadLocals to be - //notified in case of UnregisterThread() - _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; - - #ifdef _LUCENE_THREADMUTEX - //slightly un-usual way of initialising mutex, because otherwise our initialisation order would be undefined - if ( ThreadLocalBase_LOCK == NULL ){ - ThreadLocalBase_LOCK = _CLNEW _LUCENE_THREADMUTEX; - } - #endif - - if ( ThreadLocalBase_threadLocals == NULL ){ - ThreadLocalBase_threadLocals = _CLNEW ThreadLocalsType(false,false); - } - SCOPED_LOCK_MUTEX(*ThreadLocalBase_LOCK) - ThreadLocalBase_threadLocals->put( id, this ); } -_ThreadLocal::~_ThreadLocal(){ - //remove this object from the ThreadLocalBase threadLocal list - _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; - SCOPED_LOCK_MUTEX(*ThreadLocalBase_LOCK) - - ThreadLocalsType::iterator itr = ThreadLocalBase_threadLocals->lower_bound(id); - ThreadLocalsType::iterator end = ThreadLocalBase_threadLocals->upper_bound(id); - while ( itr != end ){ - if ( itr->second == this){ - ThreadLocalBase_threadLocals->erase(itr); - break; - } - ++itr; - } - delete internal; +_ThreadLocal::~_ThreadLocal() +{ + delete internal; } -void* _ThreadLocal::get(){ - return internal->locals.get(_LUCENE_CURRTHREADID); +void* _ThreadLocal::get() +{ + SCOPED_LOCK_MUTEX(internal->locals_LOCK) + return internal->locals.get ( _LUCENE_CURRTHREADID ); } -void _ThreadLocal::setNull(){ - set(NULL); +void _ThreadLocal::setNull() +{ + //just delete this thread from the locals list + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; + SCOPED_LOCK_MUTEX(internal->locals_LOCK) + Internal::LocalsType::iterator itr = internal->locals.find ( id ); + if ( itr != internal->locals.end() ) + { + void* val = itr->second; + internal->locals.removeitr ( itr ); + internal->_deletor->Delete ( val ); + } } -void _ThreadLocal::set(void* t){ +void _ThreadLocal::set ( void* t ) +{ + //make sure we have a threadlocal context (for cleanup) + bool ret; + INIT_THREAD(ret); + assert(ret); + _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; - Internal::LocalsType::iterator itr = internal->locals.find(id); - if ( itr != internal->locals.end() ){ - void* val = itr->second; - internal->locals.removeitr(itr); - internal->_deletor->Delete(val); + + //drop a reference to this ThreadLocal in ThreadData + { +#ifdef _LUCENE_THREADMUTEX + //slightly un-usual way of initialising mutex, + //because otherwise our initialisation order would be undefined + if ( threadData_LOCK == NULL ) + threadData_LOCK = _CLNEW _LUCENE_THREADMUTEX; +#endif + SCOPED_LOCK_MUTEX ( *threadData_LOCK ); + + if ( threadData == NULL ) + threadData = _CLNEW ThreadDataType ( false,false ); + + ThreadLocals* threadLocals = threadData->get(id); + if ( threadLocals == NULL ){ + threadLocals = _CLNEW ThreadLocals; + threadData->put(id,threadLocals); + } + threadLocals->add(this); } + + { + SCOPED_LOCK_MUTEX(internal->locals_LOCK) + Internal::LocalsType::iterator itr = internal->locals.find ( id ); + if ( itr != internal->locals.end() ) + { + void* val = itr->second; + internal->locals.removeitr ( itr ); + internal->_deletor->Delete ( val ); + } - if ( t != NULL ) - internal->locals.put( id, t ); + if ( t != NULL ) + internal->locals.put ( id, t ); + } + } -void _ThreadLocal::UnregisterCurrentThread(){ +void _ThreadLocal::UnregisterCurrentThread() +{ + if ( threadData == NULL ) + return; _LUCENE_THREADID_TYPE id = _LUCENE_CURRTHREADID; - SCOPED_LOCK_MUTEX(*ThreadLocalBase_LOCK) + SCOPED_LOCK_MUTEX ( *threadData_LOCK ); + + ThreadLocals* threadLocals = threadData->get(id); + threadLocals->UnregisterThread(); +} + +void _ThreadLocal::_shutdown() +{ - ThreadLocalsType::iterator itr = ThreadLocalBase_threadLocals->lower_bound(id); - ThreadLocalsType::iterator end = ThreadLocalBase_threadLocals->upper_bound(id); - while ( itr != end ){ - itr->second->setNull(); - ++itr; - } } -void _ThreadLocal::_shutdown(){ - SCOPED_LOCK_MUTEX(*ThreadLocalBase_LOCK) - - ThreadLocalsType::iterator itr = ThreadLocalBase_threadLocals->begin(); - while ( itr != ThreadLocalBase_threadLocals->end() ){ - itr->second->setNull(); - ++itr; + + + +void ThreadLocals::UnregisterThread() +{ + //this should only be accessed from its own thread... if this changes, then this access has to be locked. + while ( !this->empty() ) + { + _ThreadLocal* tl = this->back(); + this->pop_back(); + printf ( "shutting down thread %d, object: %d, we have %d objects\n", ( int ) _LUCENE_CURRTHREADID, tl, this->size() ); + tl->setNull(); } - if ( ThreadLocalBase_shutdownHooks != NULL ){ - ShutdownHooksType::iterator itr2 = ThreadLocalBase_shutdownHooks->begin(); - while ( itr2 != ThreadLocalBase_shutdownHooks->end() ){ - ShutdownHook* hook = *itr2; - hook(false); - } - } } -void _ThreadLocal::registerShutdownHook(ShutdownHook* hook){ - SCOPED_LOCK_MUTEX(*ThreadLocalBase_LOCK) - if ( ThreadLocalBase_shutdownHooks == NULL ) - ThreadLocalBase_shutdownHooks = _CLNEW ShutdownHooksType(false); - ThreadLocalBase_shutdownHooks->insert(hook); +void ThreadLocals::add(_ThreadLocal* thread){ + //this should only be accessed from its own thread... if this changes, then this access has to be locked. + this->push_back(thread); } - CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/util/_ThreadLocal.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/_ThreadLocal.h 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/core/CLucene/util/_ThreadLocal.h 2008-10-03 14:55:06 UTC (rev 2926) @@ -1,83 +1,86 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #ifndef _lucene_util_ThreadLocal_H #define _lucene_util_ThreadLocal_H +CL_NS_DEF ( util ) -//#include "CLucene/util/VoidMap.h" -CL_NS_DEF(util) +/** +* A class which holds thread specific data. Calls to get() or set() or to the data kept in the _ThreadLocal +* is invalid after _ThreadLocal has been destroyed. +*/ +class _ThreadLocal +{ + private: + class Internal; + Internal* internal; + public: + _ThreadLocal ( CL_NS ( util ) ::AbstractDeletor* _deletor ); + void* get(); -class _ThreadLocal{ -private: - class Internal; - Internal* internal; - DEFINE_MUTEX(locals_LOCK) -public: - _ThreadLocal(CL_NS(util)::AbstractDeletor* _deletor); - void* get(); - - /** - * Call this function to clear the local thread data for this - * ThreadLocal. Calling set(NULL) does the same thing, except - * this function is virtual and can be called without knowing - * the template. - */ - void setNull(); - void set(void* t); - virtual ~_ThreadLocal(); + /** + * Call this function to clear the local thread data for this + * ThreadLocal. Calling set(NULL) does the same thing, except + * this function is virtual and can be called without knowing + * the template. + */ + void setNull(); + void set ( void* t ); + virtual ~_ThreadLocal(); - /** - * If you want to clean up thread specific memory, then you should - * make sure this thread is called when the thread is not going to be used - * again. This will clean up threadlocal data which can contain quite a lot - * of data, so if you are creating lots of new threads, then it is a good idea - * to use this function, otherwise there will be many memory leaks. - */ - static void UnregisterCurrentThread(); + /** + * For early cleanup of thread data, call this function. It will clear out any + * thread specific data. Useful if you have a long running thread that doesn't + * need to access clucene anymore. + * The thread local code tries to call this automatically when a thread ends. + * Some implementations may be impossible (or not implemented) to detect thread + * endings... then you would have to run this function yourself. + */ + static void UnregisterCurrentThread(); - /** - * Call this function to shutdown CLucene - */ - static CLUCENE_LOCAL void _shutdown(); + /** + * Call this function to shutdown CLucene + */ + static CLUCENE_LOCAL void _shutdown(); - /** - * A hook called when CLucene is starting or shutting down, - * this can be used for setting up and tearing down static - * variables - */ - typedef void ShutdownHook(bool startup); - - /** - * Add this function to the shutdown hook list. This function will be called - * when CLucene is shutdown. - */ - static void registerShutdownHook(ShutdownHook* hook); + /** + * A hook called when CLucene is starting or shutting down, + * this can be used for setting up and tearing down static + * variables + */ + typedef void ShutdownHook ( bool startup ); }; + +/** +* A templated class of _ThreadLocal +* @see _ThreadLocal +*/ template<typename T,typename _deletor> -class ThreadLocal: public _ThreadLocal{ -public: - ThreadLocal(): - _ThreadLocal(_CLNEW _deletor) - { - - } - virtual ~ThreadLocal(){ - } - T get(){ - return (T)_ThreadLocal::get(); - } - void setNull(){ - _ThreadLocal::set((T)NULL); - } - void set(T t){ - _ThreadLocal::set( (T) t); - } +class ThreadLocal: public _ThreadLocal +{ + public: + ThreadLocal() : + _ThreadLocal ( _CLNEW _deletor ) + { + + } + virtual ~ThreadLocal() + { + } + T get() + { + return ( T ) _ThreadLocal::get(); + } + void set ( T t ) + { + _ThreadLocal::set ( ( T ) t ); + } }; CL_NS_END #endif Modified: branches/lucene2_3_2/src/test/debug/TestError.cpp =================================================================== --- branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/test/debug/TestError.cpp 2008-10-03 14:55:06 UTC (rev 2926) @@ -1,38 +1,100 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "test.h" +#include "CLucene/util/_ThreadLocal.h" +#include "CLucene/util/Equators.h" - void testError(CuTest *tc){ - const char* msg = "test"; - CLuceneError err(0,msg,false); - CLuceneError err2 = err; - CLuceneError* err3 = &err; - CuAssert(tc,_T("Error did not copy properly"),err.what()!=err2.what()); - CuAssert(tc,_T("Error values did not correspond"),strcmp(err.what(),err2.what())==0); - - IndexReader* reader = NULL; - try{ - RAMDirectory dir; - reader = IndexReader::open(&dir,true); - }catch(CLuceneError&){ - _CLDELETE(reader); - }catch(...){ - _CLDELETE(reader); - CuAssert(tc,_T("Error did not catch properly"),false); - } +void testError ( CuTest *tc ) +{ + const char* msg = "test"; + CLuceneError err ( 0,msg,false ); + CLuceneError err2 = err; + CLuceneError* err3 = &err; + CuAssert ( tc,_T ( "Error did not copy properly" ),err.what() !=err2.what() ); + CuAssert ( tc,_T ( "Error values did not correspond" ),strcmp ( err.what(),err2.what() ) ==0 ); + + IndexReader* reader = NULL; + try + { + RAMDirectory dir; + reader = IndexReader::open ( &dir,true ); } - + catch ( CLuceneError& ) + { + _CLDELETE ( reader ); + } + catch ( ... ) + { + _CLDELETE ( reader ); + CuAssert ( tc,_T ( "Error did not catch properly" ),false ); + } +} -CuSuite *testdebug(void) +typedef CL_NS(util)::ThreadLocal<char*, CL_NS(util)::Deletor::acArray> tlTest; +struct Data{ + tlTest* tl; + CuTest *tc; +}; +_LUCENE_THREAD_FUNC ( threadLocalTest, arg ) { - CuSuite *suite = CuSuiteNew(_T("CLucene Debug Test")); + Data* data = (Data*)arg; + CuTest *tc = data->tc; + tlTest* tl = data->tl; - SUITE_ADD_TEST(suite, testError); + char* val = tl->get(); - return suite; + CLUCENE_ASSERT(val == NULL); + + tl->set(STRDUP_AtoA("test")); + tl->setNull(); + + val = _CL_NEWARRAY(char, 50); + _snprintf(val, 50, "hello from thread %d", (int)_LUCENE_CURRTHREADID); + + tl->set(val); + + CLUCENE_ASSERT(tl->get() != NULL); + + //wait a bit until thread local deleted our data... + Misc::sleep(1000); + + CLUCENE_ASSERT(tl->get() == NULL); } +void testThreadLocal ( CuTest *tc ) +{ + + int threadsCount = 10; + + //read using multiple threads... + _LUCENE_THREADID_TYPE threads[threadsCount]; + + Data data; + data.tc = tc; + data.tl = _CLNEW tlTest; + + int i; + for ( i=0;i<threadsCount;i++ ) + threads[i] = _LUCENE_THREAD_CREATE ( &threadLocalTest, &data ); + + CL_NS ( util ) ::Misc::sleep ( 500 ); + + _CLDELETE (data.tl); + + for ( i=0;i<threadsCount;i++ ) + _LUCENE_THREAD_JOIN ( threads[i] ); +} + +CuSuite *testdebug ( void ) +{ + CuSuite *suite = CuSuiteNew ( _T ( "CLucene Debug Test" ) ); + + //SUITE_ADD_TEST ( suite, testError ); + SUITE_ADD_TEST ( suite, testThreadLocal ); + + return suite; +} // EOF Modified: branches/lucene2_3_2/src/test/testall.cpp =================================================================== --- branches/lucene2_3_2/src/test/testall.cpp 2008-10-02 21:25:07 UTC (rev 2925) +++ branches/lucene2_3_2/src/test/testall.cpp 2008-10-03 14:55:06 UTC (rev 2926) @@ -11,7 +11,7 @@ #include <stdlib.h> #include <crtdbg.h> #endif -#endif +#endif #include "test.h" @@ -196,11 +196,11 @@ printf("Tests run in %dms\n\n",CL_NS(util)::Misc::currentTimeMillis()-startTime); CuSuiteListDelete(alltests); + +exit_point: CuStringFree(output); _CLDELETE_CaARRAY(cl_tempDir) -exit_point: - _lucene_shutdown(); //clears all static memory //print lucenebase debug This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 21:27:52
|
Revision: 2925 http://clucene.svn.sourceforge.net/clucene/?rev=2925&view=rev Author: ustramooner Date: 2008-10-02 21:25:07 +0000 (Thu, 02 Oct 2008) Log Message: ----------- jstreams changes. now that we have size() we can make a better guess about how much to read. in the even our stream doesn't support size(), we take a 10mb of data 'guess'... we should make that configurable and editable (or at least warn the developer) Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2008-10-02 21:23:16 UTC (rev 2924) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp 2008-10-02 21:25:07 UTC (rev 2925) @@ -8,7 +8,7 @@ #include "_FieldsWriter.h" //#include "CLucene/util/VoidMap.h" -#include "CLucene/util/Reader.h" +#include "CLucene/util/CLStreams.h" #include "CLucene/util/Misc.h" #include "CLucene/store/Directory.h" #include "CLucene/store/_RAMDirectory.h" @@ -159,11 +159,17 @@ if (field->isBinary()) { //todo: since we currently don't support static length vints, we have to //read the entire stream into memory first.... ugly! - jstreams::StreamBase<char>* stream = field->streamValue(); - const char* sd; + InputStream* stream = field->streamValue(); + const signed char* sd; + + int32_t sz = stream->size(); + if ( sz < 0 ) + sz = 10000000; //todo: we should warn the developer here.... + //how do wemake sure we read the entire index in now??? - //todo: we need to have a max amount, and guarantee its all in or throw an error... - int32_t rl = stream->read(sd,10000000,0); + //todo: we need to have a max amount, and guarantee its all in or throw an error.. + //todo: make this value configurable.... + int32_t rl = stream->read(sd, sz, 0); if ( rl < 0 ){ fieldsStream->writeVInt(0); //todo: could we detect this earlier and not actually write the field?? @@ -181,7 +187,7 @@ //read the entire string const TCHAR* rv; - int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE); + int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE, 0); if ( rl > LUCENE_INT32_MAX_SHOULDBE ) _CLTHROWA(CL_ERR_Runtime,"Field length too long"); else if ( rl < 0 ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 21:25:56
|
Revision: 2924 http://clucene.svn.sourceforge.net/clucene/?rev=2924&view=rev Author: ustramooner Date: 2008-10-02 21:23:16 +0000 (Thu, 02 Oct 2008) Log Message: ----------- cleanups Modified Paths: -------------- branches/lucene2_3_2/src/contribs/CLucene/analysis/LanguageBasedAnalyzer.cpp Modified: branches/lucene2_3_2/src/contribs/CLucene/analysis/LanguageBasedAnalyzer.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/CLucene/analysis/LanguageBasedAnalyzer.cpp 2008-10-02 21:22:04 UTC (rev 2923) +++ branches/lucene2_3_2/src/contribs/CLucene/analysis/LanguageBasedAnalyzer.cpp 2008-10-02 21:23:16 UTC (rev 2924) @@ -6,12 +6,13 @@ #include "CLucene/analysis/standard/StandardFilter.h" #include "CLucene/snowball/SnowballFilter.h" -CL_NS_DEF(analysis) CL_NS_USE(util) CL_NS_USE2(analysis,cjk) CL_NS_USE2(analysis,standard) CL_NS_USE2(analysis,snowball) +CL_NS_DEF(analysis) + LanguageBasedAnalyzer::LanguageBasedAnalyzer(const TCHAR* language, bool stem) { _tcsncpy(lang,language,100); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 21:24:51
|
Revision: 2923 http://clucene.svn.sourceforge.net/clucene/?rev=2923&view=rev Author: ustramooner Date: 2008-10-02 21:22:04 +0000 (Thu, 02 Oct 2008) Log Message: ----------- cleanups and linux fixes Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-02 21:20:17 UTC (rev 2922) +++ branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-02 21:22:04 UTC (rev 2923) @@ -120,7 +120,7 @@ strcpy(handle->path,path); //Open the file - handle->fhandle = _cl_open(path, _O_BINARY | O_RDONLY | _O_RANDOM, _S_IREAD ); + handle->fhandle = ::_cl_open(path, _O_BINARY | O_RDONLY | _O_RANDOM, _S_IREAD ); //Check if a valid handle was retrieved if (handle->fhandle >= 0){ Modified: branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp 2008-10-02 21:20:17 UTC (rev 2922) +++ branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp 2008-10-02 21:22:04 UTC (rev 2923) @@ -8,8 +8,8 @@ #include "IndexInput.h" #include "IndexOutput.h" +CL_NS_DEF(store) CL_NS_USE(util) -CL_NS_DEF(store) IndexInput::IndexInput() { Modified: branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 21:20:17 UTC (rev 2922) +++ branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 21:22:04 UTC (rev 2923) @@ -6,6 +6,7 @@ ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" #include "CLStreams.h" +#include "CLucene/util/Misc.h" #include <fcntl.h> #ifdef _CL_HAVE_IO_H @@ -89,7 +90,7 @@ int32_t fillBuffer(signed char* start, int32_t space){ if (fhandle == 0) return -1; // read into the buffer - int32_t nwritten = _read(fhandle, start, space); + int32_t nwritten = ::_read(fhandle, start, space); // check the file stream status if (nwritten == -1 ) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 21:21:17
|
Revision: 2922 http://clucene.svn.sourceforge.net/clucene/?rev=2922&view=rev Author: ustramooner Date: 2008-10-02 21:20:17 +0000 (Thu, 02 Oct 2008) Log Message: ----------- fixed pthread recursive check and pthread lib linkage Modified Paths: -------------- branches/lucene2_3_2/src/shared/CMakeLists.txt branches/lucene2_3_2/src/shared/cmake/CheckPthread.cmake Modified: branches/lucene2_3_2/src/shared/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/shared/CMakeLists.txt 2008-10-02 18:30:38 UTC (rev 2921) +++ branches/lucene2_3_2/src/shared/CMakeLists.txt 2008-10-02 21:20:17 UTC (rev 2922) @@ -32,9 +32,11 @@ INCLUDE (CheckHashmaps) INCLUDE (CheckNamespace) INCLUDE (CheckSnprintf) -INCLUDE (CheckPthread) INCLUDE (CheckStdCallFunctionExists) +find_package(Threads REQUIRED) +INCLUDE (CheckPthread) + ######################################################################## # test for headers ######################################################################## @@ -198,10 +200,8 @@ ######################################################################## #check for pthreads -find_package(Threads REQUIRED) IF ( CMAKE_USE_WIN32_THREADS_INIT ) SET ( _CL_HAVE_WIN32_THREADS 1 ) - SET ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_THREAD_LIBS_INIT}" ) ENDIF ( CMAKE_USE_WIN32_THREADS_INIT ) IF ( CMAKE_USE_PTHREADS_INIT ) SET ( _CL_HAVE_PTHREAD 1 ) @@ -209,6 +209,7 @@ IF ( CMAKE_USE_SPROC_INIT OR CMAKE_HP_PTHREADS_INIT ) MESSAGE(FATAL_ERROR "Threads library not implemented") ENDIF( CMAKE_USE_SPROC_INIT OR CMAKE_HP_PTHREADS_INIT ) +SET ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_THREAD_LIBS_INIT}" ) #define if we have pthreads with recusrive capabilities CHECK_PTHREAD_RECURSIVE(_CL_HAVE_PTHREAD _CL_HAVE_PTHREAD_MUTEX_RECURSIVE) Modified: branches/lucene2_3_2/src/shared/cmake/CheckPthread.cmake =================================================================== --- branches/lucene2_3_2/src/shared/cmake/CheckPthread.cmake 2008-10-02 18:30:38 UTC (rev 2921) +++ branches/lucene2_3_2/src/shared/cmake/CheckPthread.cmake 2008-10-02 21:20:17 UTC (rev 2922) @@ -1,7 +1,10 @@ #define if we have pthreads with recusrive capabilities MACRO ( CHECK_PTHREAD_RECURSIVE ifpthread result) + IF ( ${ifpthread} ) + SET ( CMAKE_REQUIRED_FLAGS "${CMAKE_THREAD_LIBS_INIT}") + CHECK_CXX_SOURCE_RUNS(" #include <sys/types.h> #include <pthread.h> @@ -17,5 +20,8 @@ } " ${result} ) #NOTE: pthread_mutexattr_setkind_np is the deprecated name for pthread_mutexattr_settype. old compilers might need it + + + SET ( CMAKE_REQUIRED_FLAGS) ENDIF ( ${ifpthread} ) ENDMACRO ( CHECK_PTHREAD_RECURSIVE ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 18:31:37
|
Revision: 2921 http://clucene.svn.sourceforge.net/clucene/?rev=2921&view=rev Author: ustramooner Date: 2008-10-02 18:30:38 +0000 (Thu, 02 Oct 2008) Log Message: ----------- define _REENTRANT. this is required for gcc Modified Paths: -------------- branches/lucene2_3_2/cmake/DefineOptions.cmake Modified: branches/lucene2_3_2/cmake/DefineOptions.cmake =================================================================== --- branches/lucene2_3_2/cmake/DefineOptions.cmake 2008-10-02 16:04:24 UTC (rev 2920) +++ branches/lucene2_3_2/cmake/DefineOptions.cmake 2008-10-02 18:30:38 UTC (rev 2921) @@ -10,6 +10,8 @@ IF(DISABLE_MULTITHREADING) SET (${extraOptions} "${${extraOptions}} -D_CL_DISABLE_MULTITHREADING") + ELSE(DISABLE_MULTITHREADING) + SET(${extraOptions} "${${extraOptions}} -D_REENTRANT") ENDIF(DISABLE_MULTITHREADING) IF(ENABLE_ASCII_MODE) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 16:14:37
|
Revision: 2910 http://clucene.svn.sourceforge.net/clucene/?rev=2910&view=rev Author: ustramooner Date: 2008-10-02 14:19:05 +0000 (Thu, 02 Oct 2008) Log Message: ----------- The locking mechanism was using openInput and throwing errors. I think it's bad to throw errors when no error occurred. A new overload allows the result to be returned without any real exception being thrown. Backwards compatible with an overload in Directory. Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp branches/lucene2_3_2/src/core/CLucene/store/Directory.h branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.h Modified: branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp 2008-10-02 14:19:05 UTC (rev 2910) @@ -191,11 +191,13 @@ } } -IndexInput* CompoundFileReader::openInput(const char* id){ - SCOPED_LOCK_MUTEX(THIS_LOCK) +bool CompoundFileReader::openInput(const char * id, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize){ + SCOPED_LOCK_MUTEX(THIS_LOCK) - if (stream == NULL) - _CLTHROWA(CL_ERR_IO,"Stream closed"); + if (stream == NULL){ + error.set(CL_ERR_IO,"Stream closed"); + return false; + } const ReaderFileEntry* entry = entries->get(id); if (entry == NULL){ @@ -203,9 +205,11 @@ strcpy(buf,"No sub-file with id "); strncat(buf,id,CL_MAX_PATH); strcat(buf," found"); - _CLTHROWA(CL_ERR_IO,buf); + error.set(CL_ERR_IO,buf); + return false; } - return _CLNEW CSIndexInput(stream, entry->offset, entry->length); + ret = _CLNEW CSIndexInput(stream, entry->offset, entry->length); + return true; } void CompoundFileReader::list(vector<string>* names) const{ Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp 2008-10-02 14:19:05 UTC (rev 2910) @@ -790,11 +790,10 @@ // getting the right generation. int64_t genB = -1; if (directory != NULL) { + CLuceneError e; for(int32_t i=0;i<defaultGenFileRetryCount;i++) { IndexInput* genInput = NULL; - try { - genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN); - } catch (CLuceneError &e) { + if ( ! directory->openInput(IndexFileNames::SEGMENTS_GEN, genInput, e) ){ //if (e.number == CL_ERR_FileNotFound) { // FileNotFound not yet exists... // CL_TRACE("segments.gen open: FileNotFoundException %s", e); // _CLLDELETE(genInput); Modified: branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h 2008-10-02 14:19:05 UTC (rev 2910) @@ -48,7 +48,7 @@ const char* getName() const; void close(); - CL_NS(store)::IndexInput* openInput(const char* id); + bool openInput(const char * name, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize=1); /** Returns an array of strings, one for each file in the directory-> */ void list(std::vector<std::string>* names) const; Modified: branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp 2008-10-02 14:19:05 UTC (rev 2910) @@ -56,7 +56,11 @@ return ret; } IndexInput* Directory::openInput(const char* name, int32_t bufferSize){ - return openInput(name); //implementation didnt overload the bufferSize + IndexInput* ret; + CLuceneError err; + if ( ! openInput(name, ret, err, bufferSize) ) + throw err; + return ret; } char** Directory:: list() const{ vector<string> names; Modified: branches/lucene2_3_2/src/core/CLucene/store/Directory.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/Directory.h 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/Directory.h 2008-10-02 14:19:05 UTC (rev 2910) @@ -57,9 +57,11 @@ // Returns the length of a file in the directory. virtual int64_t fileLength(const char* name) const = 0; + // An advanced overload to avoid throwing an error. if result is false, error is filled with the reason + virtual bool openInput(const char* name, IndexInput*& ret, CLuceneError& error, int32_t bufferSize = -1) = 0; + // Returns a stream reading an existing file. - virtual IndexInput* openInput(const char* name) = 0; - virtual IndexInput* openInput(const char* name, int32_t bufferSize); + IndexInput* openInput(const char* name, int32_t bufferSize=-1); /// Set the modified time of an existing file to now. */ virtual void touchFile(const char* name) = 0; Modified: branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp 2008-10-02 14:19:05 UTC (rev 2910) @@ -64,10 +64,16 @@ }; SharedHandle* handle; int64_t _pos; + FSIndexInput(SharedHandle* handle, int32_t __bufferSize): + BufferedIndexInput(__bufferSize) + { + this->_pos = 0; + this->handle = handle; + }; protected: FSIndexInput(const FSIndexInput& clone); public: - FSIndexInput(const char* path, int32_t bufferSize=CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE); + static bool open(const char* path, IndexInput*& ret, CLuceneError& error, int32_t bufferSize=-1); ~FSIndexInput(); IndexInput* clone() const; @@ -100,9 +106,7 @@ int64_t length() const; }; - FSDirectory::FSIndexInput::FSIndexInput(const char* path, int32_t __bufferSize): - BufferedIndexInput(__bufferSize) - { + bool FSDirectory::FSIndexInput::open(const char* path, IndexInput*& ret, CLuceneError& error, int32_t __bufferSize ) { //Func - Constructor. // Opens the file named path //Pre - path != NULL @@ -110,29 +114,38 @@ CND_PRECONDITION(path != NULL, "path is NULL"); - handle = _CLNEW SharedHandle(); + if ( __bufferSize == -1 ) + __bufferSize = CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE; + SharedHandle* handle = _CLNEW SharedHandle(); strcpy(handle->path,path); //Open the file handle->fhandle = _cl_open(path, _O_BINARY | O_RDONLY | _O_RANDOM, _S_IREAD ); //Check if a valid handle was retrieved - if (handle->fhandle < 0){ + if (handle->fhandle >= 0){ + //Store the file length + handle->_length = fileSize(handle->fhandle); + if ( handle->_length == -1 ) + error.set( CL_ERR_IO,"fileStat error" ); + else{ + handle->_fpos = 0; + ret = _CLNEW FSIndexInput(handle, __bufferSize); + return true; + } + }else{ int err = errno; if ( err == ENOENT ) - _CLTHROWA(CL_ERR_IO, "File does not exist"); + error.set(CL_ERR_IO, "File does not exist"); else if ( err == EACCES ) - _CLTHROWA(CL_ERR_IO, "File Access denied"); + error.set(CL_ERR_IO, "File Access denied"); else if ( err == EMFILE ) - _CLTHROWA(CL_ERR_IO, "Too many open files"); + error.set(CL_ERR_IO, "Too many open files"); + else + error.set(CL_ERR_IO, "Could not open file"); } - - //Store the file length - handle->_length = fileSize(handle->fhandle); - if ( handle->_length == -1 ) - _CLTHROWA( CL_ERR_IO,"fileStat error" ); - handle->_fpos = 0; - this->_pos = 0; + _CLDELETE(handle); + return false; } FSDirectory::FSIndexInput::FSIndexInput(const FSIndexInput& other): BufferedIndexInput(other){ @@ -519,10 +532,6 @@ else return buf.st_size; } - - IndexInput* FSDirectory::openInput(const char* name ) { - return openInput(name, CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE); - } IndexInput* FSDirectory::openMMapFile(const char* name, int32_t bufferSize){ #ifdef LUCENE_FS_MMAP @@ -537,7 +546,8 @@ #endif } - IndexInput* FSDirectory::openInput(const char* name, int32_t bufferSize ){ + bool FSDirectory::openInput(const char * name, lucene::store::IndexInput *& ret, CLuceneError& error, int32_t bufferSize) + { CND_PRECONDITION(directory[0]!=0,"directory is not open") char fl[CL_MAX_DIR]; priv_getFN(fl, name); @@ -546,10 +556,10 @@ //is >2gb, then some system cannot mmap the file //also some file systems mmap will fail?? could detect here too if ( useMMap && Misc::file_Size(fl) < LUCENE_INT32_MAX_SHOULDBE ) //todo: would this be bigger on 64bit systems?. i suppose it would be...test first - return _CLNEW MMapIndexInput( fl ); + return MMapIndexInput( fl, ret, error, bufferSize ); else #endif - return _CLNEW FSIndexInput( fl, bufferSize ); + return FSIndexInput::open( fl, ret, error, bufferSize ); } void FSDirectory::close(){ Modified: branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h 2008-10-02 14:19:05 UTC (rev 2910) @@ -99,8 +99,7 @@ int64_t fileLength(const char* name) const; /// Returns a stream reading an existing file. - IndexInput* openInput(const char* name); - IndexInput* openInput(const char* name, int32_t bufferSize); + bool openInput(const char* name, IndexInput*& ret, CLuceneError& err, int32_t bufferSize=-1); IndexInput* openMMapFile(const char* name, int32_t bufferSize=LUCENE_STREAM_BUFFER_SIZE); Modified: branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp 2008-10-02 14:19:05 UTC (rev 2910) @@ -444,13 +444,15 @@ } - IndexInput* RAMDirectory::openInput(const char* name) { + bool RAMDirectory::openInput(const char* name, IndexInput*& ret, CLuceneError& error, int32_t bufferSize) { SCOPED_LOCK_MUTEX(files_mutex); RAMFile* file = files->get(name); if (file == NULL) { /* DSR:PROPOSED: Better error checking. */ - _CLTHROWA(CL_ERR_IO,"[RAMDirectory::open] The requested file does not exist."); + error.set(CL_ERR_IO, "[RAMDirectory::open] The requested file does not exist."); + return false; } - return _CLNEW RAMIndexInput( file ); + ret = _CLNEW RAMIndexInput( file ); + return true; } void RAMDirectory::close(){ Modified: branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.h 2008-10-02 14:18:47 UTC (rev 2909) +++ branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.h 2008-10-02 14:19:05 UTC (rev 2910) @@ -43,7 +43,7 @@ void _copyFromDir(Directory* dir, bool closeDir); FileMap* files; // unlike the java Hashtable, FileMap is not synchronized, and all access must be protected by a lock public: - int64_t sizeInBytes; + int64_t sizeInBytes; //todo DEFINE_MUTABLE_MUTEX(files_mutex) // mutable: const methods must also be able to synchronize properly @@ -87,7 +87,7 @@ virtual IndexOutput* createOutput(const char* name); /// Returns a stream reading an existing file. - IndexInput* openInput(const char* name); + bool openInput(const char* name, IndexInput*& ret, CLuceneError& error, int32_t bufferSize = -1); virtual void close(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 16:04:36
|
Revision: 2920 http://clucene.svn.sourceforge.net/clucene/?rev=2920&view=rev Author: ustramooner Date: 2008-10-02 16:04:24 +0000 (Thu, 02 Oct 2008) Log Message: ----------- various cleanups Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2008-10-02 16:03:05 UTC (rev 2919) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2008-10-02 16:04:24 UTC (rev 2920) @@ -7,10 +7,9 @@ #include "CLucene/_ApiHeader.h" #include <assert.h> -////#include "CLucene/util/VoidMap.h" #include "CLucene/util/Misc.h" -#include "CLucene/util/subinputstream.h" #include "CLucene/util/_StringIntern.h" +#include "CLucene/util/CLStreams.h" #include "CLucene/store/Directory.h" #include "CLucene/store/IndexInput.h" #include "CLucene/document/Document.h" @@ -18,6 +17,7 @@ #include "_FieldInfos.h" #include "_FieldsWriter.h" #include "_FieldsReader.h" +#include "CLucene/analysis/AnalysisHeader.h" CL_NS_USE(store) CL_NS_USE(document) @@ -25,19 +25,63 @@ CL_NS_DEF(index) -class FieldsReader::FieldsStreamHolder: public jstreams::StreamBase<char>{ +class FieldsReader::FieldsStreamHolder: public InputStream{ CL_NS(store)::IndexInput* indexInput; - CL_NS(store)::IndexInputStream* indexInputStream; - jstreams::SubInputStream<char>* subStream; + signed char* buffer; + size_t bufferLen; + int32_t subLength; + int32_t pos; + void growBuffer(int32_t to){ + this->bufferLen = to; + this->buffer = (signed char*)realloc(buffer,bufferLen); + } public: - FieldsStreamHolder(CL_NS(store)::IndexInput* indexInput, int32_t subLength); - ~FieldsStreamHolder(); - int32_t read(const char*& start, int32_t _min, int32_t _max); - int64_t skip(int64_t ntoskip); - int64_t reset(int64_t pos); - jstreams::SubInputStream<char>* getStream() const; + FieldsStreamHolder(CL_NS(store)::IndexInput* indexInput, int32_t subLength, int32_t bufferLength=4096){ + this->indexInput = indexInput->clone(); + this->subLength = subLength; + this->bufferLen = cl_min(bufferLength,subLength); + this->pos = 0; + this->buffer = (signed char*)malloc(bufferLen); + } + ~FieldsStreamHolder(){ + indexInput->close(); + _CLDELETE(indexInput); + if ( buffer != NULL ) + free(buffer); + } + int32_t read(const signed char*& start, int32_t min, int32_t _max){ + int32_t max = cl_max(min,_max); + int32_t ret = cl_min(max, this->subLength-this->pos); + if ( ret > bufferLen ) + growBuffer(ret); + + start = this->buffer; + if ( ret == 0 ) + return -1; + indexInput->readBytes((uint8_t*)this->buffer, ret); + pos += ret; + return ret; + } + int64_t skip(int64_t ntoskip){ + int32_t origPos = this->pos; + int32_t r; + while ( ntoskip > 0 && pos < subLength ){ + r = (int32_t)cl_min3(ntoskip, this->bufferLen, this->subLength-this->pos); + indexInput->readBytes((uint8_t*)this->buffer, r); + this->pos += r; + ntoskip -= r; + } + return this->pos-origPos; + } + int64_t position(){ + return this->pos; + } + size_t size(){ + return this->subLength; + } }; + FieldsReader::FieldsReader(Directory* d, const char* segment, FieldInfos* fn, int32_t _readBufferSize, int32_t _docStoreOffset, int32_t size): fieldInfos(fn), closed(false) { @@ -52,42 +96,42 @@ bool success = false; try { - const char* buf = Misc::segmentname(segment,".fdt"); - cloneableFieldsStream = d->openInput( buf, _readBufferSize ); - fieldsStream = cloneableFieldsStream->clone(); + const char* buf = Misc::segmentname(segment,".fdt"); + cloneableFieldsStream = d->openInput( buf, _readBufferSize ); + fieldsStream = cloneableFieldsStream->clone(); _CLDELETE_LCaARRAY( buf ); buf = Misc::segmentname(segment,".fdx"); indexStream = d->openInput( buf, _readBufferSize ); _CLDELETE_LCaARRAY( buf ); - if (_docStoreOffset != -1) { - // We read only a slice out of this shared fields file - this->docStoreOffset = _docStoreOffset; - this->_size = size; - - // Verify the file is long enough to hold all of our - // docs - CND_CONDITION(((int32_t) (indexStream.length() / 8)) >= size + this->docStoreOffset, - "the file is not long enough to hold all of our docs"); - } else { - this->docStoreOffset = 0; - this->_size = (int32_t) (indexStream->length() >> 3); - } - - //_size = (int32_t)indexStream->length()/8; - - numTotalDocs = (int32_t) (indexStream->length() >> 3); + if (_docStoreOffset != -1) { + // We read only a slice out of this shared fields file + this->docStoreOffset = _docStoreOffset; + this->_size = size; + + // Verify the file is long enough to hold all of our + // docs + CND_CONDITION(((int32_t) (indexStream.length() / 8)) >= size + this->docStoreOffset, + "the file is not long enough to hold all of our docs"); + } else { + this->docStoreOffset = 0; + this->_size = (int32_t) (indexStream->length() >> 3); + } + + //_size = (int32_t)indexStream->length()/8; + + numTotalDocs = (int32_t) (indexStream->length() >> 3); success = true; - } _CLFINALLY ({ - // With lock-less commits, it's entirely possible (and - // fine) to hit a FileNotFound exception above. In - // this case, we want to explicitly close any subset - // of things that were opened so that we don't have to - // wait for a GC to do so. - if (!success) { - close(); - } + } _CLFINALLY ({ + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) { + close(); + } }); } @@ -99,10 +143,10 @@ close(); } -void FieldsReader::ensureOpen() { - if (closed) { - _CLTHROWA(CL_ERR_IllegalState, "this FieldsReader is closed"); - } +void FieldsReader::ensureOpen() { + if (closed) { + _CLTHROWA(CL_ERR_IllegalState, "this FieldsReader is closed"); + } } void FieldsReader::close() { @@ -120,11 +164,11 @@ _CLDELETE(indexStream); } /* - CL_NS(store)::IndexInput* localFieldsStream = fieldsStreamTL.get(); - if (localFieldsStream != NULL) { - localFieldsStream->close(); - fieldsStreamTL->set(NULL); - }*/ + CL_NS(store)::IndexInput* localFieldsStream = fieldsStreamTL.get(); + if (localFieldsStream != NULL) { + localFieldsStream->close(); + fieldsStreamTL->set(NULL); + }*/ closed = true; } } @@ -133,7 +177,7 @@ return _size; } -bool FieldsReader::doc(int32_t n, Document* doc, CL_NS(document)::FieldSelector* fieldSelector) { +bool FieldsReader::doc(int32_t n, Document& doc, CL_NS(document)::FieldSelector* fieldSelector) { if ( (n + docStoreOffset) * 8L > indexStream->length() ) return false; indexStream->seek((n + docStoreOffset) * 8L); @@ -146,41 +190,41 @@ FieldInfo* fi = fieldInfos->fieldInfo(fieldNumber); if ( fi == NULL ) _CLTHROWA(CL_ERR_IO, "Field stream is invalid"); - FieldSelectorResult acceptField = (fieldSelector == NULL) ? LOAD : fieldSelector->accept(fi->name); + FieldSelector::FieldSelectorResult acceptField = (fieldSelector == NULL) ? FieldSelector::LOAD : fieldSelector->accept(fi->name); uint8_t bits = fieldsStream->readByte(); - CND_CONDITION(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY, - "invalid field bits"); - + CND_CONDITION(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY, + "invalid field bits"); + const bool compressed = (bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0; const bool tokenize = (bits & FieldsWriter::FIELD_IS_TOKENIZED) != 0; - const bool binary = (bits & FieldsWriter::FIELD_IS_BINARY) != 0; - + const bool binary = (bits & FieldsWriter::FIELD_IS_BINARY) != 0; + //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion - if (acceptField = LOAD) { + if (acceptField = FieldSelector::LOAD) { addField(doc, fi, binary, compressed, tokenize); } - else if (acceptField = LOAD_FOR_MERGE) { + else if (acceptField = FieldSelector::LOAD_FOR_MERGE) { addFieldForMerge(doc, fi, binary, compressed, tokenize); } - else if (acceptField = LOAD_AND_BREAK){ + else if (acceptField = FieldSelector::LOAD_AND_BREAK){ addField(doc, fi, binary, compressed, tokenize); break;//Get out of this loop } - else if (acceptField = LAZY_LOAD) { + else if (acceptField = FieldSelector::LAZY_LOAD) { addFieldLazy(doc, fi, binary, compressed, tokenize); } - else if (acceptField = SIZE){ + else if (acceptField = FieldSelector::SIZE){ skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } - else if (acceptField = SIZE_AND_BREAK){ + else if (acceptField = FieldSelector::SIZE_AND_BREAK){ addFieldSize(doc, fi, binary, compressed); break; } else { skipField(binary, compressed); - } + } /* if ((bits & FieldsWriter::FIELD_IS_BINARY) != 0) { @@ -300,17 +344,17 @@ } } -void FieldsReader::addFieldLazy(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, +void FieldsReader::addFieldLazy(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize) { if (binary) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); if (compressed) { //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); - doc->add(*_CLNEW LazyField(this, fi->name, Field::STORE_COMPRESS, toRead, pointer)); + doc.add(*_CLNEW LazyField(this, fi->name, Field::STORE_COMPRESS, toRead, pointer)); } else { //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); - doc->add(*_CLNEW LazyField(this, fi->name, Field::STORE_YES, toRead, pointer)); + doc.add(*_CLNEW LazyField(this, fi->name, Field::STORE_YES, toRead, pointer)); } //Need to move the pointer ahead by toRead positions fieldsStream->seek(pointer + toRead); @@ -331,44 +375,39 @@ f = _CLNEW LazyField(this, fi->name, Field::STORE_YES | getIndexType(fi, tokenize) | getTermVectorType(fi), length, pointer); f->setOmitNorms(fi->omitNorms); } - doc->add(*f); + doc.add(*f); } } // in merge mode we don't uncompress the data of a compressed field -void FieldsReader::addFieldForMerge(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize) { +void FieldsReader::addFieldForMerge(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize) { void* data; Field::ValueType v; if ( binary || compressed) { int32_t toRead = fieldsStream->readVInt(); - FieldsReader::FieldsStreamHolder* subStream = new FieldsReader::FieldsStreamHolder(fieldsStream, toRead); - //final byte[] b = new byte[toRead]; - //fieldsStream->readBytes(b, toRead); - data = subStream->getStream(); + data = _CLNEW FieldsReader::FieldsStreamHolder(fieldsStream, toRead); v = Field::VALUE_STREAM; } else { data = fieldsStream->readString(); v = Field::VALUE_STRING; } - doc->add(*_CLNEW FieldForMerge(data, v, fi, binary, compressed, tokenize)); + doc.add(*_CLNEW FieldForMerge(data, v, fi, binary, compressed, tokenize)); } -void FieldsReader::addField(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize) { +void FieldsReader::addField(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize) { //we have a binary stored field, and it may be compressed if (binary) { const int32_t toRead = fieldsStream->readVInt(); - FieldsReader::FieldsStreamHolder* subStream = new FieldsReader::FieldsStreamHolder(fieldsStream, toRead); - //final byte[] b = new byte[toRead]; - //fieldsStream->readBytes(b, 0, b.length); + FieldsReader::FieldsStreamHolder* subStream = _CLNEW FieldsReader::FieldsStreamHolder(fieldsStream, toRead); if (compressed) { // we still do not support compressed fields - doc->add(* _CLNEW Field(fi->name, subStream->getStream(), Field::STORE_COMPRESS)); // todo: uncompress(subStream->getStream()) + doc.add(* _CLNEW Field(fi->name, subStream, Field::STORE_COMPRESS)); // todo: uncompress(subStream->getStream()) } else - doc->add(* _CLNEW Field(fi->name, subStream->getStream(), Field::STORE_YES)); + doc.add(* _CLNEW Field(fi->name, subStream, Field::STORE_YES)); } else { uint8_t bits = 0; @@ -381,14 +420,12 @@ const int32_t toRead = fieldsStream->readVInt(); FieldsStreamHolder* subStream = new FieldsStreamHolder(fieldsStream, toRead); - //final byte[] b = new byte[toRead]; - //fieldsStream.readBytes(b, 0, b.length); //todo: we dont have gzip inputstream available, must alert user //to somehow use a gzip inputstream f = _CLNEW Field(fi->name, // field name //todo: new String(uncompress(subStream->getStream()), "UTF-8"), // uncompress the value and add as string - subStream->getStream(), bits); + subStream, bits); f->setOmitNorms(fi->omitNorms); } else { bits |= Field::STORE_YES; @@ -397,11 +434,11 @@ bits, false); f->setOmitNorms(fi->omitNorms); } - doc->add(*f); + doc.add(*f); } } -int32_t FieldsReader::addFieldSize(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed) { +int32_t FieldsReader::addFieldSize(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed) { const int32_t size = fieldsStream->readVInt(); const int32_t bytesize = binary || compressed ? size : 2*size; /* @@ -443,47 +480,7 @@ return Field::INDEX_NO; } -FieldsReader::FieldsStreamHolder::FieldsStreamHolder(IndexInput* indexInput, int32_t subLength){ - this->indexInput = indexInput->clone(); - this->indexInputStream = new IndexInputStream(this->indexInput); - this->subStream = new jstreams::SubInputStream<char>(indexInputStream, subLength); - this->size = subStream->getSize(); - this->position = subStream->getPosition(); - this->error = subStream->getError(); - this->status = subStream->getStatus(); -} -FieldsReader::FieldsStreamHolder::~FieldsStreamHolder(){ - delete subStream; - delete indexInputStream; - - indexInput->close(); - _CLDELETE(indexInput); -} -int32_t FieldsReader::FieldsStreamHolder::read(const char*& start, int32_t _min, int32_t _max){ - int32_t ret = subStream->read(start,_min,_max); - this->position = subStream->getPosition(); - this->error = subStream->getError(); - this->status = subStream->getStatus(); - return ret; -} -int64_t FieldsReader::FieldsStreamHolder::skip(int64_t ntoskip){ - int64_t ret = subStream->skip(ntoskip); - this->position = subStream->getPosition(); - this->error = subStream->getError(); - this->status = subStream->getStatus(); - return ret; -} -int64_t FieldsReader::FieldsStreamHolder::reset(int64_t pos){ - int64_t ret = subStream->reset(pos); - this->position = subStream->getPosition(); - this->error = subStream->getError(); - this->status = subStream->getStatus(); - return ret; -} -jstreams::SubInputStream<char>* FieldsReader::FieldsStreamHolder::getStream() const { return subStream; } - - FieldsReader::LazyField::LazyField(FieldsReader* _parent, const TCHAR* _name, int config, const int32_t _toRead, const int64_t _pointer) : Field(_name, config), parent(_parent) { @@ -502,23 +499,24 @@ return localFieldsStream; } -jstreams::StreamBase<char>* FieldsReader::LazyField::streamValue() { +CL_NS(util)::InputStream* FieldsReader::LazyField::streamValue() { parent->ensureOpen(); if (fieldsData == NULL) { //uint8_t* b = _CL_NEWARRAY(uint8_t, toRead); CL_NS(store)::IndexInput* localFieldsStream = getFieldStream(); localFieldsStream->seek(pointer); - FieldsReader::FieldsStreamHolder* subStream = new FieldsReader::FieldsStreamHolder(localFieldsStream, toRead); + FieldsReader::FieldsStreamHolder* subStream = _CLNEW FieldsReader::FieldsStreamHolder(localFieldsStream, toRead); //localFieldsStream->readBytes(b, toRead); if (isCompressed()) { //fieldsData = uncompress(b); + //todo: ... what happens here? } else { - fieldsData = subStream->getStream(); + fieldsData = subStream; } valueType = VALUE_STREAM; } - return static_cast<jstreams::StreamBase<char>*>(fieldsData); + return static_cast<CL_NS(util)::InputStream*>(fieldsData); } CL_NS(util)::Reader* FieldsReader::LazyField::readerValue() const { @@ -587,8 +585,8 @@ return NULL; } -jstreams::StreamBase<char>* FieldsReader::FieldForMerge::streamValue() const { - return (valueType & VALUE_STREAM) ? static_cast<jstreams::StreamBase<char>*>(fieldsData) : NULL; +CL_NS(util)::InputStream* FieldsReader::FieldForMerge::streamValue() const { + return (valueType & VALUE_STREAM) ? static_cast<CL_NS(util)::InputStream*>(fieldsData) : NULL; } CL_NS(analysis)::TokenStream* FieldsReader::FieldForMerge::tokenStreamValue() const { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 16:03:21
|
Revision: 2919 http://clucene.svn.sourceforge.net/clucene/?rev=2919&view=rev Author: ustramooner Date: 2008-10-02 16:03:05 +0000 (Thu, 02 Oct 2008) Log Message: ----------- jstreams does a better job now. no need to move using mark, setMinBufSize is always rewindable Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/util/FastCharStream.cpp Modified: branches/lucene2_3_2/src/core/CLucene/util/FastCharStream.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/FastCharStream.cpp 2008-10-02 15:56:49 UTC (rev 2918) +++ branches/lucene2_3_2/src/core/CLucene/util/FastCharStream.cpp 2008-10-02 16:03:05 UTC (rev 2919) @@ -7,13 +7,13 @@ #include "CLucene/_ApiHeader.h" #include "_FastCharStream.h" -#include "CLucene/util/Reader.h" +#include "CLucene/util/CLStreams.h" CL_NS_DEF(util) const int32_t FastCharStream::maxRewindSize = LUCENE_MAX_WORD_LEN*2; - FastCharStream::FastCharStream(Reader* reader): + FastCharStream::FastCharStream(BufferedReader* reader): pos(0), rewindPos(0), resetPos(0), @@ -21,9 +21,9 @@ line(1), input(reader) { - input->mark(maxRewindSize); + input->setMinBufSize(maxRewindSize); } - FastCharStream::~FastCharStream(){ + FastCharStream::~FastCharStream(){ } void FastCharStream::readChar(TCHAR &c) { try{ @@ -39,7 +39,6 @@ } int FastCharStream::GetNext() { - // printf("getnext\n"); if (input == 0 ) // end of file { _CLTHROWA(CL_ERR_IO,"warning : FileReader.GetNext : Read TCHAR over EOS."); @@ -48,14 +47,6 @@ // implementing the functions from the java version of // charstream will be much more efficient. ++pos; - if ( pos > resetPos + maxRewindSize && rewindPos == 0) { - // move the marker one position (~expensive) - resetPos = pos-(maxRewindSize/2); - if ( resetPos != input->reset(resetPos) ) - _CLTHROWA(CL_ERR_IO,"Unexpected reset() result"); - input->mark(maxRewindSize); - input->skip((maxRewindSize/2) - 1); - } TCHAR ch; readChar(ch); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 16:02:15
|
Revision: 2909 http://clucene.svn.sourceforge.net/clucene/?rev=2909&view=rev Author: ustramooner Date: 2008-10-02 14:18:47 +0000 (Thu, 02 Oct 2008) Log Message: ----------- fixed memory leak Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2008-10-02 14:18:10 UTC (rev 2908) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2008-10-02 14:18:47 UTC (rev 2909) @@ -22,6 +22,7 @@ CL_NS(util)::Deletor::Object<TokenStream> >; } Analyzer::~Analyzer(){ + _CLDELETE(internal->tokenStreams); delete internal; } TokenStream* Analyzer::getPreviousTokenStream() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 15:57:04
|
Revision: 2918 http://clucene.svn.sourceforge.net/clucene/?rev=2918&view=rev Author: ustramooner Date: 2008-10-02 15:56:49 +0000 (Thu, 02 Oct 2008) Log Message: ----------- deprecated cleanups memory leak fixes Modified Paths: -------------- branches/lucene2_3_2/src/test/document/TestDocument.cpp branches/lucene2_3_2/src/test/index/TestReuters.cpp branches/lucene2_3_2/src/test/index/TestUtf8.cpp branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/search/TestSort.cpp branches/lucene2_3_2/src/test/test.h Modified: branches/lucene2_3_2/src/test/document/TestDocument.cpp =================================================================== --- branches/lucene2_3_2/src/test/document/TestDocument.cpp 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/document/TestDocument.cpp 2008-10-02 15:56:49 UTC (rev 2918) @@ -6,6 +6,49 @@ ------------------------------------------------------------------------------*/ #include "test.h" +//an in memory input stream for testing binary data +class MemReader: public CL_NS(util)::InputStream{ + signed char* value; + size_t len; + int64_t pos; +public: + MemReader ( const char* value, const int32_t length = -1 ){ + if ( length >= 0 ) + this->len = length; + else + this->len = strlen(value); + this->pos = 0; + this->value = _CL_NEWARRAY(signed char, this->len); + memcpy(this->value, value, this->len); + } + virtual ~MemReader(){ + _CLDELETE_ARRAY(this->value); + } + + int32_t read(const signed char*& start, int32_t min, int32_t max){ + start = this->value + pos; + int32_t r = max>min?max:min; + if ( len-pos < r ) + r = len-pos; + pos += r; + return r; + } + int64_t position(){ + return pos; + } + int64_t skip(int64_t ntoskip){ + int64_t s = ntoskip; + if ( len-pos < s ) + s = len-pos; + + this->pos += s; + return s; + } + size_t size(){ + return len; + } +}; + void TestFields(CuTest *tc){ Field *f = _CLNEW Field(_T("test"), _T("value"), Field::INDEX_TOKENIZED); CLUCENE_ASSERT(f->isIndexed() && f->isTokenized()); @@ -43,10 +86,10 @@ Document doc; Field* f; - const char* _as; - const char* _as2; + const signed char* _as; + const signed char* _as2; const TCHAR* _ts; - jstreams::StreamBase<char>* strm; + CL_NS(util)::InputStream* strm; RAMDirectory ram; const char* areaderString = "a string reader field"; @@ -57,7 +100,7 @@ IndexWriter writer(&ram,&an,true); //no analyzer needed since we are not indexing... //use binary utf8 - doc.add( *_CLNEW Field(_T("utf8Field"), new jstreams::StringReader<char>(areaderString), + doc.add( *_CLNEW Field(_T("utf8Field"), _CLNEW MemReader(areaderString), Field::TERMVECTOR_NO | Field::STORE_YES | Field::INDEX_NO) ); writer.addDocument(&doc); doc.clear(); @@ -72,7 +115,7 @@ writer.optimize(); //use big file - doc.add( *_CLNEW Field(_T("fileField"), new jstreams::FileInputStream(factbook), + doc.add( *_CLNEW Field(_T("fileField"), _CLNEW CL_NS(util)::FileInputStream(factbook), Field::TERMVECTOR_NO | Field::STORE_YES | Field::INDEX_NO) ); writer.addDocument(&doc); doc.clear(); @@ -84,7 +127,7 @@ IndexReader* reader = IndexReader::open(&ram); //now check binary stream - reader->document(0, &doc); + reader->document(0, doc); f = doc.getField(_T("utf8Field")); strm = f->streamValue(); @@ -97,7 +140,7 @@ //and check reader stream - reader->document(1, &doc); + reader->document(1, doc); f = doc.getField(_T("readerField")); _ts = f->stringValue(); CLUCENE_ASSERT(_tcscmp(treaderString,_ts)==0); @@ -105,10 +148,10 @@ //now check file stream - reader->document(2, &doc); + reader->document(2, doc); f = doc.getField(_T("fileField")); strm = f->streamValue(); - jstreams::FileInputStream fbStream(factbook); + FileInputStream fbStream(factbook); do{ int32_t rd = fbStream.read(_as2,1,1); Modified: branches/lucene2_3_2/src/test/index/TestReuters.cpp =================================================================== --- branches/lucene2_3_2/src/test/index/TestReuters.cpp 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/index/TestReuters.cpp 2008-10-02 15:56:49 UTC (rev 2918) @@ -1,6 +1,6 @@ #include "test.h" #include "CLucene/util/dirent.h" -#include "CLucene/util/Reader.h" +#include "CLucene/util/CLStreams.h" #include "CLucene/LuceneThreads.h" #ifdef _CL_HAVE_SYS_STAT_H @@ -300,15 +300,16 @@ ); } _LUCENE_THREAD_FUNC(threadedSearcherTest, arg){ - IndexSearcher* searcher = (IndexSearcher*)arg; + IndexSearcher* searcher = (IndexSearcher*)arg; printf("thread started :-)...\n"); for ( int i=0;i<100;i++ ){ threadSearch(searcher, _T("test") ); threadSearch(searcher, _T("reuters") ); threadSearch(searcher, _T("data") ); - } + } printf ("done...\n"); + _LUCENE_THREAD_FUNC_RETURN(0); } void testThreaded(CuTest* tc){ Modified: branches/lucene2_3_2/src/test/index/TestUtf8.cpp =================================================================== --- branches/lucene2_3_2/src/test/index/TestUtf8.cpp 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/index/TestUtf8.cpp 2008-10-02 15:56:49 UTC (rev 2918) @@ -1,6 +1,6 @@ #include "test.h" #include "CLucene/util/dirent.h" -#include "CLucene/util/Reader.h" +#include "CLucene/util/CLStreams.h" CL_NS_USE(util) @@ -88,12 +88,12 @@ size_t p, p1, p2; p = p1 = p2 = 0; while(true){ - s = utf8.read(buf1, readLen); + s = utf8.read(buf1, 1, readLen); if ( s == -1 ) break; p1+=s; - s = unicode.read(buf2, readLen); + s = unicode.read(buf2, 1, readLen); if (s == -1) break; p2+=s; @@ -145,8 +145,8 @@ FileReader utf8(utf8text,"UTF-8"); FileReader unicode(unicodetext, "UCS-2LE"); - utf8.mark(10000); - unicode.mark(10000); + utf8.setMinBufSize(10000); + unicode.setMinBufSize(10000); doReadChars(tc,utf8,unicode); //now try reading with read, instead of readChar Modified: branches/lucene2_3_2/src/test/search/TestQueries.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestQueries.cpp 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/search/TestQueries.cpp 2008-10-02 15:56:49 UTC (rev 2918) @@ -28,6 +28,7 @@ CLUCENE_ASSERT(3 == hits->length()); // All documents in /Computers category and below _CLDELETE(query); _CLDELETE(t); + _CLDELETE(hits); t = _CLNEW Term(_T("category"), _T("/Computers/Mac")); query = _CLNEW PrefixQuery(t); Modified: branches/lucene2_3_2/src/test/search/TestSort.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-02 15:56:49 UTC (rev 2918) @@ -236,7 +236,7 @@ } sortScores* sort_getScores (CuTest* tc, Hits* hits, bool deleteHits=true){ - sortScores* scoreMap = _CLNEW sortScores(_CLNEW TCharCompare); + sortScores* scoreMap = _CLNEW sortScores(true); int n = hits->length(); float_t m=pow(10.0,-8); Modified: branches/lucene2_3_2/src/test/test.h =================================================================== --- branches/lucene2_3_2/src/test/test.h 2008-10-02 15:53:33 UTC (rev 2917) +++ branches/lucene2_3_2/src/test/test.h 2008-10-02 15:56:49 UTC (rev 2918) @@ -106,7 +106,7 @@ } } - void put(_K k, _T v){ + void add(_K k, _T v){ this->insert ( std::pair<_K,_T>(k,v) ); } }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 15:53:51
|
Revision: 2917 http://clucene.svn.sourceforge.net/clucene/?rev=2917&view=rev Author: ustramooner Date: 2008-10-02 15:53:33 +0000 (Thu, 02 Oct 2008) Log Message: ----------- BufferedReader is a reader which can be reset. Readers don't have to be resetable. Classes which need reset ability now ask for a BufferedReader Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp 2008-10-02 14:34:50 UTC (rev 2916) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp 2008-10-02 15:53:33 UTC (rev 2917) @@ -8,7 +8,7 @@ #include "StandardAnalyzer.h" ////#include "CLucene/util/VoidMap.h" -#include "CLucene/util/Reader.h" +#include "CLucene/util/CLStreams.h" #include "CLucene/analysis/AnalysisHeader.h" #include "CLucene/analysis/Analyzers.h" #include "StandardFilter.h" @@ -52,7 +52,13 @@ TokenStream* StandardAnalyzer::tokenStream(const TCHAR* fieldName, Reader* reader) { - TokenStream* ret = _CLNEW StandardTokenizer(reader); + BufferedReader* bufferedReader = reader->__asBufferedReader(); + TokenStream* ret; + + if ( bufferedReader == NULL ) + ret = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true ); + else + ret = _CLNEW StandardTokenizer(bufferedReader); ret = _CLNEW StandardFilter(ret,true); ret = _CLNEW LowerCaseFilter(ret,true); ret = _CLNEW StopFilter(ret,true, stopSet); Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2008-10-02 14:34:50 UTC (rev 2916) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2008-10-02 15:53:33 UTC (rev 2917) @@ -8,6 +8,7 @@ #include "StandardTokenizer.h" #include "CLucene/util/StringBuffer.h" #include "CLucene/util/_FastCharStream.h" +#include "CLucene/util/CLStreams.h" CL_NS_USE(analysis) CL_NS_USE(util) @@ -92,17 +93,21 @@ #define CONTAINS_ANY(sb, ofThese) (_tcscspn(sb.getBuffer(), _T(ofThese)) != static_cast<size_t>(sb.len)) - StandardTokenizer::StandardTokenizer(BufferedReader* reader): + StandardTokenizer::StandardTokenizer(BufferedReader* reader, bool deleteReader): /* rdPos is zero-based. It starts at -1, and will advance to the first ** position when readChar() is first called. */ rdPos(-1), tokenStart(-1), rd(_CLNEW FastCharStream(reader)) { + this->reader = reader; + this->deleteReader = deleteReader; } StandardTokenizer::~StandardTokenizer() { _CLDELETE(rd); + if ( this->deleteReader ) + _CLDELETE(reader) } int StandardTokenizer::readChar() { Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2008-10-02 14:34:50 UTC (rev 2916) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2008-10-02 15:53:33 UTC (rev 2917) @@ -49,11 +49,13 @@ bool ReadDotted(CL_NS(util)::StringBuffer* str, TokenTypes forcedType,Token* t); + CL_NS(util)::BufferedReader* reader; + bool deleteReader; + CL_NS(util)::FastCharStream* rd; public: - CL_NS(util)::FastCharStream* rd; // Constructs a tokenizer for this Reader. - StandardTokenizer(CL_NS(util)::BufferedReader* reader); + StandardTokenizer(CL_NS(util)::BufferedReader* reader, bool deleteReader=false); ~StandardTokenizer(); Modified: branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h 2008-10-02 14:34:50 UTC (rev 2916) +++ branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h 2008-10-02 15:53:33 UTC (rev 2917) @@ -102,6 +102,7 @@ template <class T> class CLUCENE_EXPORT BufferedStream{ public: + ~BufferedStream(){} /** * @brief Repositions this stream to a given position. * @@ -137,6 +138,7 @@ class BufferedReader; class CLUCENE_EXPORT Reader: public CLStream<TCHAR>{ public: + ~Reader(){} virtual BufferedReader* __asBufferedReader(){ return NULL; } }; class CLUCENE_EXPORT BufferedReader: public Reader, public BufferedStream<TCHAR>{ @@ -145,12 +147,35 @@ this->setMinBufSize(readAheadlimit); return this->position(); } + ~BufferedReader(){} BufferedReader* __asBufferedReader(){ return this; } }; -class CLUCENE_EXPORT InputStream: public CLStream<signed char>{}; -class CLUCENE_EXPORT BufferedInputStream: public InputStream, public BufferedStream<signed char>{}; +class CLUCENE_EXPORT InputStream: public CLStream<signed char>{ +public: + ~InputStream(){} +}; +class CLUCENE_EXPORT BufferedInputStream: public InputStream, public BufferedStream<signed char>{ +public: + ~BufferedInputStream(){} +}; + +class FilteredBufferedReader: public BufferedReader{ + class Internal; + Internal* internal; +public: + FilteredBufferedReader(Reader* reader, bool deleteReader); + virtual ~FilteredBufferedReader(); + + int32_t read(const TCHAR*& start, int32_t min, int32_t max); + int64_t position(); + int64_t reset(int64_t); + int64_t skip(int64_t ntoskip); + size_t size(); + void setMinBufSize(int32_t minbufsize); +}; + class CLUCENE_EXPORT StringReader: public BufferedReader{ TCHAR* value; bool ownValue; Modified: branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 14:34:50 UTC (rev 2916) +++ branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 15:53:33 UTC (rev 2917) @@ -340,4 +340,64 @@ internal->jsbuffer->_setMinBufSize(minbufsize); } +class FilteredBufferedReader::Internal{ +public: + class JStreamsFilteredBuffer: public jstreams::BufferedReader{ + Reader* input; + bool deleteInput; + protected: + int32_t fillBuffer(TCHAR* start, int32_t space){ + const TCHAR* buffer; + int32_t r = input->read(buffer, 1, space); + if ( r > 0 ) + _tcsncpy(start, buffer, r); + return r; + } + public: + JStreamsFilteredBuffer(Reader* input, bool deleteInput){ + this->input = input; + this->deleteInput = deleteInput; + } + ~JStreamsFilteredBuffer(){ + if ( deleteInput ) + _CLDELETE(input); + } + void _setMinBufSize(int32_t min){ + this->setMinBufSize(min); + } + }; + JStreamsFilteredBuffer* jsbuffer; + + Internal(Reader* reader, bool deleteReader){ + this->jsbuffer = new JStreamsFilteredBuffer(reader, deleteReader); + } + ~Internal(){ + delete jsbuffer; + } +}; +FilteredBufferedReader::FilteredBufferedReader(Reader* reader, bool deleteReader){ + internal = new Internal(reader, deleteReader); +} +FilteredBufferedReader::~FilteredBufferedReader(){ + delete internal; +} +int32_t FilteredBufferedReader::read(const TCHAR*& start, int32_t min, int32_t max){ + return internal->jsbuffer->read(start,min,max); +} +int64_t FilteredBufferedReader::position(){ + return internal->jsbuffer->position(); +} +int64_t FilteredBufferedReader::reset(int64_t p){ + return internal->jsbuffer->reset(p); +} +int64_t FilteredBufferedReader::skip(int64_t ntoskip){ + return internal->jsbuffer->skip(ntoskip); +} +size_t FilteredBufferedReader::size(){ + return internal->jsbuffer->size(); +} +void FilteredBufferedReader::setMinBufSize(int32_t minbufsize){ + return internal->jsbuffer->_setMinBufSize(minbufsize); +} + CL_NS_END This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 15:49:49
|
Revision: 2915 http://clucene.svn.sourceforge.net/clucene/?rev=2915&view=rev Author: ustramooner Date: 2008-10-02 14:33:51 +0000 (Thu, 02 Oct 2008) Log Message: ----------- some cleanups for threads Modified Paths: -------------- branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h branches/lucene2_3_2/src/shared/CLucene/config/_threads.h branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp Modified: branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2008-10-02 14:30:11 UTC (rev 2914) +++ branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h 2008-10-02 14:33:51 UTC (rev 2915) @@ -19,7 +19,8 @@ #define _LUCENE_CURRTHREADID 1 #define _LUCENE_THREADID_TYPE char #define _LUCENE_THREAD_FUNC(name, argName) int name(void* argName) - #define _LUCENE_THREAD_CREATE(value, func, arg) func(arg) + #define _LUCENE_THREAD_FUNC_RETURN(val) return (int)val; + #define _LUCENE_THREAD_CREATE(func, arg) (*func)(arg) #define _LUCENE_THREAD_JOIN(value) //nothing to do... #else @@ -29,6 +30,7 @@ #if defined(_CL_HAVE_PTHREAD) #define _LUCENE_THREADID_TYPE pthread_t #define _LUCENE_THREAD_FUNC(name, argName) void* name(void* argName) //< use this macro to correctly define the thread start routine + #define _LUCENE_THREAD_FUNC_RETURN(val) return (int)val; typedef void* (luceneThreadStartRoutine)(void* lpThreadParameter ); class CLUCENE_SHARED_EXPORT mutex_thread { @@ -49,6 +51,7 @@ #elif defined(_CL_HAVE_WIN32_THREADS) #define _LUCENE_THREADID_TYPE uint64_t #define _LUCENE_THREAD_FUNC(name, argName) void __stdcall name(void* argName) //< use this macro to correctly define the thread start routine + #define _LUCENE_THREAD_FUNC_RETURN(val) mutex_thread::_exitThread(val) typedef void (__stdcall luceneThreadStartRoutine)(void* lpThreadParameter ); class CLUCENE_SHARED_EXPORT mutex_thread { @@ -61,6 +64,7 @@ ~mutex_thread(); void lock(); void unlock(); + static void _exitThread(int ret); static _LUCENE_THREADID_TYPE _GetCurrentThreadId(); static _LUCENE_THREADID_TYPE CreateThread(luceneThreadStartRoutine* func, void* arg); static void JoinThread(_LUCENE_THREADID_TYPE id); Modified: branches/lucene2_3_2/src/shared/CLucene/config/_threads.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/config/_threads.h 2008-10-02 14:30:11 UTC (rev 2914) +++ branches/lucene2_3_2/src/shared/CLucene/config/_threads.h 2008-10-02 14:33:51 UTC (rev 2915) @@ -31,6 +31,7 @@ __declspec(dllimport) void __stdcall EnterCriticalSection(CRITICAL_SECTION *); __declspec(dllimport) void __stdcall LeaveCriticalSection(CRITICAL_SECTION *); __declspec(dllimport) void __stdcall DeleteCriticalSection(CRITICAL_SECTION *); + __declspec(dllimport) void __stdcall ExitThread(_cl_dword_t); __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(); typedef struct _SECURITY_ATTRIBUTES Modified: branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp 2008-10-02 14:30:11 UTC (rev 2914) +++ branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp 2008-10-02 14:33:51 UTC (rev 2915) @@ -58,6 +58,9 @@ _LUCENE_THREADID_TYPE mutex_thread::_GetCurrentThreadId(){ return GetCurrentThreadId(); } + void mutex_thread::_exitThread(int val){ + ExitThread(val); + } _LUCENE_THREADID_TYPE mutex_thread::CreateThread(luceneThreadStartRoutine* func, void* arg){ return (_LUCENE_THREADID_TYPE) ::_beginthread (func, 0, arg); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 15:49:42
|
Revision: 2916 http://clucene.svn.sourceforge.net/clucene/?rev=2916&view=rev Author: ustramooner Date: 2008-10-02 14:34:50 +0000 (Thu, 02 Oct 2008) Log Message: ----------- jstreams changes... Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp branches/lucene2_3_2/src/core/CLucene/util/Reader.h Modified: branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 14:33:51 UTC (rev 2915) +++ branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp 2008-10-02 14:34:50 UTC (rev 2916) @@ -5,56 +5,193 @@ * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" -#include "Reader.h" +#include "CLStreams.h" +#include <fcntl.h> +#ifdef _CL_HAVE_IO_H + #include <io.h> +#endif +#ifdef _CL_HAVE_SYS_STAT_H + #include <sys/stat.h> +#endif +#ifdef _CL_HAVE_UNISTD_H + #include <unistd.h> +#endif +#ifdef _CL_HAVE_DIRECT_H + #include <direct.h> +#endif +#include <errno.h> + +#include "_bufferedstream.h" + CL_NS_DEF(util) -StringReader::StringReader ( const TCHAR* value ): - Reader(NULL,true){ - reader = new jstreams::StringReader<TCHAR>(value); +StringReader::StringReader ( TCHAR* value, const int32_t length, bool copyData ) +{ + this->m_size = length; + this->pos = 0; + if ( copyData ){ + this->value = _CL_NEWARRAY(TCHAR, this->m_size); + _tcsncpy(this->value, value, this->m_size); + }else{ + this->value = value; + } + this->ownValue = copyData; } -StringReader::StringReader ( const TCHAR* value, const int32_t length ): - Reader(NULL,true){ - reader = new jstreams::StringReader<TCHAR>(value,length); + +StringReader::StringReader ( const TCHAR* value, const int32_t length ){ + if ( length >= 0 ) + this->m_size = length; + else + this->m_size = _tcslen(value); + this->pos = 0; + this->value = _CL_NEWARRAY(TCHAR, this->m_size); + _tcsncpy(this->value, value, this->m_size); + this->ownValue = true; } -StringReader::StringReader ( const TCHAR* value, const int32_t length, bool copyData ): - Reader(NULL,true){ - reader = new jstreams::StringReader<TCHAR>(value,length, copyData); -} StringReader::~StringReader(){ + if ( ownValue ) + _CLDELETE_ARRAY(this->value); } +size_t StringReader::size(){ + return m_size; +} +int32_t StringReader::read(const TCHAR*& start, int32_t min, int32_t max){ + if ( m_size == pos ) + return -1; + start = this->value + pos; + int32_t r = (int32_t)cl_min(cl_max(min,max),m_size-pos); + pos += r; + return r; +} +int64_t StringReader::position(){ + return pos; +} +void StringReader::setMinBufSize(int32_t s){ +} +int64_t StringReader::reset(int64_t pos){ + if ( pos >= 0 && pos < this->m_size ) + this->pos = pos; + return this->pos; +} +int64_t StringReader::skip(int64_t ntoskip){ + int64_t s = cl_min(ntoskip, m_size-pos); + this->pos += s; + return s; +} -FileReader::FileReader ( const char* path, const char* enc, - const int32_t cachelen, const int32_t /*cachebuff*/ ): - Reader(NULL, true) +class FileInputStream::Internal{ +public: + class JStreamsBuffer: public jstreams::BufferedInputStream{ + int32_t fhandle; + protected: + int32_t fillBuffer(signed char* start, int32_t space){ + if (fhandle == 0) return -1; + // read into the buffer + int32_t nwritten = _read(fhandle, start, space); + + // check the file stream status + if (nwritten == -1 ) { + m_error = "Could not read from file"; + m_status = jstreams::Error; + if ( fhandle > 0 ){ + ::_close(fhandle); + fhandle = 0; + } + return -1; + }else if ( nwritten == 0 ) { + ::_close(fhandle); + fhandle = 0; + } + return nwritten; + } + public: + int encoding; + + JStreamsBuffer(int32_t fhandle, int32_t buffersize){ + this->fhandle = fhandle; + + m_size = fileSize(fhandle); // no need to know the file length... + + // allocate memory in the buffer + int32_t bufsize = (int32_t)((m_size <= buffersize) ?m_size+1 :buffersize); + setMinBufSize(bufsize); + } + void _setMinBufSize(int32_t bufsize){ + this->setMinBufSize(bufsize); + } + + ~JStreamsBuffer(){ + if ( fhandle > 0 ){ + if ( ::_close(fhandle) != 0 ) + _CLTHROWA(CL_ERR_IO, "File IO Close error"); + } + } + }; + + JStreamsBuffer* jsbuffer; + + Internal(const char* path, int32_t buffersize){ + int32_t fhandle = _cl_open(path, _O_BINARY | O_RDONLY | _O_RANDOM, _S_IREAD ); + + //Check if a valid handle was retrieved + if (fhandle < 0){ + int err = errno; + if ( err == ENOENT ) + _CLTHROWA(CL_ERR_IO, "File does not exist"); + else if ( err == EACCES ) + _CLTHROWA(CL_ERR_IO, "File Access denied"); + else if ( err == EMFILE ) + _CLTHROWA(CL_ERR_IO, "Too many open files"); + else + _CLTHROWA(CL_ERR_IO, "Could not open file"); + } + jsbuffer = new JStreamsBuffer(fhandle, buffersize); + + } + ~Internal(){ + delete jsbuffer; + } +}; + + +FileInputStream::FileInputStream ( const char* path, int32_t buflen ) { - this->input = new jstreams::FileInputStream(path, cachelen); - this->reader = new SimpleInputStreamReader(this->input,enc); //(this is a jstream object) + if ( buflen == -1 ) + buflen = DEFAULT_BUFFER_SIZE; + internal = new Internal(path, buflen); } -FileReader::~FileReader (){ - if (input) - delete input; +size_t FileInputStream::size(){ + return internal->jsbuffer->size(); } -int32_t FileReader::read(const TCHAR*& start, int32_t _min, int32_t _max) { - return reader->read(start, _min, _max); + +FileInputStream::~FileInputStream () +{ + delete internal; } -int64_t FileReader::mark(int32_t readlimit) { - return reader->mark(readlimit); + +int32_t FileInputStream::read(const signed char*& start, int32_t min, int32_t max){ + return internal->jsbuffer->read(start,min,max); } -int64_t FileReader::reset(int64_t newpos) { - return reader->reset(newpos); +int64_t FileInputStream::position(){ + return internal->jsbuffer->position(); } +int64_t FileInputStream::reset(int64_t to){ + return internal->jsbuffer->reset(to); +} +int64_t FileInputStream::skip(int64_t ntoskip){ + return internal->jsbuffer->skip(ntoskip); +} +void FileInputStream::setMinBufSize(int32_t minbufsize){ + internal->jsbuffer->_setMinBufSize(minbufsize); +} - -SimpleInputStreamReader::SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* enc) +FileReader::FileReader(const char *path, const char *enc, int32_t buflen) { - finishedDecoding = false; - input = i; - charbuf.setSize(262); - + int encoding; if ( strcmp(enc,"ASCII")==0 ) encoding = ASCII; #ifdef _UCS2 @@ -65,122 +202,142 @@ #endif else _CLTHROWA(CL_ERR_IllegalArgument,"Unsupported encoding, use jstreams iconv based instead"); - - mark(262); - charsLeft = 0; + init( _CLNEW FileInputStream(path, buflen), encoding); } -SimpleInputStreamReader::~SimpleInputStreamReader(){ - input = NULL; +FileReader::FileReader(const char *path, int encoding, int32_t buflen) +{ + init(_CLNEW FileInputStream(path, buflen), encoding); } -int32_t SimpleInputStreamReader::decode(TCHAR* start, int32_t space){ - // decode from charbuf - const char *inbuf = charbuf.readPos; - const char *inbufend = charbuf.readPos + charbuf.avail; - TCHAR *outbuf = start; - const TCHAR *outbufend = outbuf + space; +FileReader::~FileReader(){ +} - if ( encoding == ASCII ){ - while ( outbuf<outbufend && inbuf<inbufend ){ - *outbuf = *inbuf; - outbuf++; - inbuf++; - } - -#ifdef _UCS2 - } - else if ( encoding == UCS2_LE ){ - while ( outbuf<outbufend && (inbuf+1)<inbufend ){ - uint8_t c1 = *inbuf; - uint8_t c2 = *(inbuf+1); - unsigned short c = c1 | (c2<<8); - - #ifdef _UCS2 - *outbuf = c; - #else - *outbuf = LUCENE_OOR_CHAR(c); - #endif - outbuf++; - inbuf+=2; - } +class SimpleInputStreamReader::Internal{ +public: + + class JStreamsBuffer: public jstreams::BufferedReader{ + InputStream* input; + char utf8buf[6]; //< buffer used for converting utf8 characters + protected: + int readChar(){ + const signed char* buf; + if ( encoding == ASCII ){ + int32_t ret = this->input->read(buf, 1, 1) ; + if ( ret == 1 ){ + return buf[0]; + }else + return -1; - }else if ( encoding == UTF8 ){ - while ( outbuf<outbufend && inbuf<inbufend ){ - size_t utflen = lucene_utf8charlen(inbuf); - if ( utflen==0 ){ - error = "Invalid multibyte sequence."; - status = jstreams::Error; - return -1; - }else if ( inbuf+utflen > inbufend ){ - break; //character incomplete - }else{ - size_t rd = lucene_utf8towc(outbuf,inbuf,inbufend-inbuf); - if ( rd == 0 ){ - error = "Invalid multibyte sequence."; - status = jstreams::Error; + }else if ( encoding == UCS2_LE ){ + int32_t ret = this->input->read(buf, 2, 2); + if ( ret < 0 ) return -1; + else if ( ret == 1 ){ + return buf[0]; }else{ - inbuf+=rd; - outbuf++; + uint8_t c1 = *buf; + uint8_t c2 = *(buf+1); + return c1 | (c2<<8); } + }else if ( encoding == UTF8 ){ + int32_t ret = this->input->read(buf, 1, 1); + + if ( ret == 1 ){ + size_t len = lucene_utf8charlen(buf[0]); + if ( len > 1 ){ + *utf8buf = buf[0]; + ret = this->input->read(buf, len-1, len-1); + }else + return buf[0]; + + if ( ret >= 0 ){ + if ( ret == len-1 ){ + memcpy(utf8buf+1,buf,ret); + wchar_t wcbuf=0; + lucene_utf8towc(wcbuf, utf8buf); + return wcbuf; + } + } + }else if ( ret == -1 ) + return -1; + this->m_error = "Invalid multibyte sequence."; + this->m_status = jstreams::Error; + }else{ + this->m_error = "Unexpected encoding"; + this->m_status = jstreams::Error; } + return -1; } -#endif //_UCS2 - }else - _CLTHROWA(CL_ERR_Runtime,"Unexpected encoding"); + int32_t fillBuffer(TCHAR* start, int32_t space){ + if ( input == NULL ) return -1; - if ( outbuf < outbufend ) { - //we had enough room to convert the entire input - if ( inbuf < inbufend ) { - // last character is incomplete - // move from inbuf to the end to the start of - // the buffer - memmove(charbuf.start, inbuf, inbufend-inbuf); - charbuf.readPos = charbuf.start; - charbuf.avail = inbufend-inbuf; - } else if ( outbuf < outbufend ) { //input sequence was completely converted - charbuf.readPos = charbuf.start; - charbuf.avail = 0; - if (input == NULL) { - finishedDecoding = true; + int c; + int32_t i; + for(i=0;i<space;i++){ + c = readChar(); + if ( c == -1 ){ + if ( this->m_status == jstreams::Ok ){ + if ( i == 0 ) + return -1; + break; + } + return -1; + } + start[i] = c; } + return i; } - } else { - charbuf.readPos += charbuf.avail - (inbufend-inbuf); - charbuf.avail = inbufend-inbuf; + public: + int encoding; + + JStreamsBuffer(InputStream* input, int encoding){ + this->input = input; + this->encoding = encoding; + } + ~JStreamsBuffer(){ + _CLDELETE(input); + } + void _setMinBufSize(int32_t min){ + this->setMinBufSize(min); + } + }; + + JStreamsBuffer* jsbuffer; + + Internal(InputStream* input, int encoding){ + jsbuffer = new JStreamsBuffer(input, encoding); } - return outbuf-start; + ~Internal(){ + delete jsbuffer; + } +}; + +SimpleInputStreamReader::SimpleInputStreamReader(){ + internal = NULL; } +void SimpleInputStreamReader::init(InputStream *i, int encoding){ + internal = new Internal(i, encoding); +} +SimpleInputStreamReader::~SimpleInputStreamReader(){ + delete internal; +} -int32_t SimpleInputStreamReader::fillBuffer(TCHAR* start, int32_t space) { - // fill up charbuf - if (input && charbuf.readPos == charbuf.start) { - const char *begin; - int32_t numRead; - numRead = input->read(begin, 1, charbuf.size - charbuf.avail); - //printf("filled up charbuf\n"); - if (numRead < -1) { - error = input->getError(); - status = jstreams::Error; - input = 0; - return numRead; - } - if (numRead < 1) { - // signal end of input buffer - input = 0; - if (charbuf.avail) { - error = "stream ends on incomplete character"; - status = jstreams::Error; - } - return -1; - } - // copy data into other buffer - memmove( charbuf.start + charbuf.avail, begin, numRead * sizeof(char)); - charbuf.avail = numRead + charbuf.avail; - } - // decode - int32_t n = decode(start, space); - //printf("decoded %i\n", n); - return n; +int32_t SimpleInputStreamReader::read(const TCHAR*& start, int32_t min, int32_t max){ + return internal->jsbuffer->read(start, min, max); } +int64_t SimpleInputStreamReader::position(){ + return internal->jsbuffer->position(); +} +int64_t SimpleInputStreamReader::reset(int64_t to){ + return internal->jsbuffer->reset(to); +} +int64_t SimpleInputStreamReader::skip(int64_t ntoskip){ + return internal->jsbuffer->skip(ntoskip); +} +size_t SimpleInputStreamReader::size(){ + return internal->jsbuffer->size(); +} +void SimpleInputStreamReader::setMinBufSize(int32_t minbufsize){ + internal->jsbuffer->_setMinBufSize(minbufsize); +} CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/util/Reader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Reader.h 2008-10-02 14:33:51 UTC (rev 2915) +++ branches/lucene2_3_2/src/core/CLucene/util/Reader.h 2008-10-02 14:34:50 UTC (rev 2916) @@ -7,148 +7,13 @@ #ifndef _lucene_util_Reader_ #define _lucene_util_Reader_ - -#include "streambase.h" -#include "stringreader.h" -#include "fileinputstream.h" -#include "bufferedstream.h" - +#include "CLucene/util/CLStreams.h" CL_NS_DEF(util) -/** -* An inline wrapper that reads from Jos van den Oever's jstreams -*/ -class CLUCENE_EXPORT Reader:LUCENE_BASE { -typedef jstreams::StreamBase<TCHAR> jsReader; -public: - bool deleteReader; - jsReader* reader; - Reader(jsReader* reader, bool deleteReader){ - this->reader = reader; - this->deleteReader = deleteReader; - } - virtual ~Reader(){ - if ( deleteReader ) - delete reader; - reader = NULL; - } - inline int read(){ - const TCHAR*b; - const int32_t nread = reader->read(b, 1,1); - if ( nread < -1 ) //if not eof - _CLTHROWA(CL_ERR_IO,reader->getError() ); - else if ( nread == -1 ) - return -1; - else - return b[0]; - } +#error Reader has been refactored. It is recommended that you use strigi streams +#error for all input into CLucene. If, however, you don't want to use that dependency, +#error then you'll have to refactor your current code. The jstreams namespace +#error was completely removed - // Read one line, return the length of the line read - inline int32_t readLine(TCHAR* buffer){ - int32_t i = 0; - while (true) { - int32_t b = read(); - if (b < 1) - break; - if (b == '\n' || b == '\r') { - if (i > 0) - break; - else - continue; - } - buffer[i++] = b; - } - buffer[i] = 0; - return i; - } - /** - * Read at least 1 character, and as much as is conveniently available - */ - inline int32_t read(const TCHAR*& start){ - int32_t nread = reader->read(start,1,0); - if ( nread < -1 ) //if not eof - _CLTHROWA(CL_ERR_IO,reader->getError()); - else - return nread; - } - inline int32_t read(const TCHAR*& start, int32_t len){ - int32_t nread = reader->read(start, len, len); - if ( nread < -1 ) //if not eof - _CLTHROWA(CL_ERR_IO,reader->getError()); - else - return nread; - } - inline int64_t skip(int64_t ntoskip){ - int64_t skipped = reader->skip(ntoskip); - if ( skipped < 0 ) - _CLTHROWA(CL_ERR_IO,reader->getError()); - else - return skipped; - } - inline int64_t mark(int32_t readAheadlimit){ - int64_t pos = reader->mark(readAheadlimit); - if ( pos < 0 ) - _CLTHROWA(CL_ERR_IO,reader->getError()); - else - return pos; - } - int64_t reset(int64_t pos){ - int64_t r = reader->reset(pos); - if ( r < 0 ) - _CLTHROWA(CL_ERR_IO,reader->getError()); - else - return r; - } -}; - -///A helper class which constructs a the jstreams StringReader. -class CLUCENE_EXPORT StringReader: public Reader{ -public: - StringReader ( const TCHAR* value ); - StringReader ( const TCHAR* value, const int32_t length ); - StringReader ( const TCHAR* value, const int32_t length, bool copyData ); - virtual ~StringReader(); -}; - -/** A very simple inputstreamreader implementation. For a -* more complete InputStreamReader, use the jstreams version -* located in the contrib package -*/ -class CLUCENE_EXPORT SimpleInputStreamReader: public jstreams::BufferedInputStream<TCHAR>{ - int32_t decode(TCHAR* start, int32_t space); - int encoding; - enum{ - ASCII=1, - UTF8=2, - UCS2_LE=3 - }; - bool finishedDecoding; - jstreams::StreamBase<char>* input; - int32_t charsLeft; - - jstreams::InputStreamBuffer<char> charbuf; - int32_t fillBuffer(TCHAR* start, int32_t space); -public: - SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* encoding); - virtual ~SimpleInputStreamReader(); -}; - -/** -* A helper class which constructs a FileReader with a specified -* simple encodings, or a given inputstreamreader -*/ -class CLUCENE_EXPORT FileReader: public Reader{ - jstreams::FileInputStream* input; -public: - FileReader ( const char* path, const char* enc, - const int32_t cachelen = 13, - const int32_t cachebuff = 14 ); //todo: optimise these cache values - virtual ~FileReader (); - - int32_t read(const TCHAR*& start, int32_t _min, int32_t _max); - int64_t mark(int32_t readlimit); - int64_t reset(int64_t); -}; - CL_NS_END #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 14:32:31
|
Revision: 2913 http://clucene.svn.sourceforge.net/clucene/?rev=2913&view=rev Author: ustramooner Date: 2008-10-02 14:29:11 +0000 (Thu, 02 Oct 2008) Log Message: ----------- - Changed document(Document*) overload to document(Document&). This makes more sense to describe the function as copying data into the variable. Backwards compatibility is maintained through the IndexReader implementing the original prototype. - Changing jstreams namespace. Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2008-10-02 14:28:52 UTC (rev 2912) +++ branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp 2008-10-02 14:29:11 UTC (rev 2913) @@ -297,13 +297,13 @@ } TCHAR** terms = _CL_NEWARRAY(TCHAR*,numTerms+1); - Array<int32_t>* termFreqs = _CLNEW Array<int32_t>(numTerms); + ValueArray<int32_t>* termFreqs = _CLNEW ValueArray<int32_t>(numTerms); // we may not need these, but declare them Array< Array<int32_t> >* positions = NULL; Array< Array<TermVectorOffsetInfo> >* offsets = NULL; if(storePositions){ - Array<int32_t>* tmp = _CL_NEWARRAY(Array<int32_t>,numTerms); + Array<int32_t>* tmp = (Array<int32_t>*)_CL_NEWARRAY(ValueArray<int32_t>,numTerms); positions = _CLNEW Array< Array<int32_t> >(tmp, numTerms); } if(storeOffsets){ @@ -369,9 +369,9 @@ terms[numTerms]=NULL; //null terminate terms array if (storePositions || storeOffsets){ - return _CLNEW SegmentTermPositionVector(field, terms, termFreqs, positions, offsets); + return _CLNEW SegmentTermPositionVector(field, terms, (Array<int32_t>*)termFreqs, positions, offsets); }else { - return _CLNEW SegmentTermVector(field, terms, termFreqs); + return _CLNEW SegmentTermVector(field, terms, (Array<int32_t>*)termFreqs); } } Modified: branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h 2008-10-02 14:28:52 UTC (rev 2912) +++ branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h 2008-10-02 14:29:11 UTC (rev 2913) @@ -71,7 +71,7 @@ int32_t size() const; /** Loads the fields from n'th document into doc. returns true on success. */ - bool doc(int32_t n, CL_NS(document)::Document* doc, CL_NS(document)::FieldSelector* fieldSelector = NULL); + bool doc(int32_t n, CL_NS(document)::Document& doc, CL_NS(document)::FieldSelector* fieldSelector = NULL); protected: /** Returns the length in bytes of each raw document in a @@ -88,18 +88,18 @@ void skipField(const bool binary, const bool compressed); void skipField(const bool binary, const bool compressed, const int32_t toRead); - void addFieldLazy(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); + void addFieldLazy(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); /** Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) * Read just the size -- caller must skip the field content to continue reading fields * Return the size in bytes or chars, depending on field type */ - int32_t addFieldSize(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed); + int32_t addFieldSize(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed); // in merge mode we don't uncompress the data of a compressed field - void addFieldForMerge(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); + void addFieldForMerge(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); - void addField(CL_NS(document)::Document* doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); + void addField(CL_NS(document)::Document& doc, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); CL_NS(document)::Field::TermVector getTermVectorType(const FieldInfo* fi); CL_NS(document)::Field::Index getIndexType(const FieldInfo* fi, const bool tokenize); @@ -125,7 +125,7 @@ /** The value of the field in Binary, or null. If null, the Reader value, * String value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ - jstreams::StreamBase<char>* streamValue(); + CL_NS(util)::InputStream* streamValue(); /** The value of the field as a Reader, or null. If null, the String value, * binary value, or TokenStream value is used. Exactly one of stringValue(), @@ -148,6 +148,7 @@ int32_t getToRead() const; void setToRead(const int32_t _toRead); }; + friend class LazyField; // Instances of this class hold field properties and data // for merge @@ -155,7 +156,7 @@ public: const TCHAR* stringValue() const; CL_NS(util)::Reader* readerValue() const; - jstreams::StreamBase<char>* streamValue() const; + CL_NS(util)::InputStream* streamValue() const; CL_NS(analysis)::TokenStream* tokenStreamValue() const; FieldForMerge(void* _value, ValueType _type, const FieldInfo* fi, const bool binary, const bool compressed, const bool tokenize); Modified: branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2008-10-02 14:28:52 UTC (rev 2912) +++ branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2008-10-02 14:29:11 UTC (rev 2913) @@ -276,7 +276,7 @@ TermEnum* terms(const Term* t) const; ///Gets the document identified by n - bool document(int32_t n, CL_NS(document)::Document* doc); + bool document(int32_t n, CL_NS(document)::Document& doc); ///Checks if the n-th document has been marked deleted bool isDeleted(const int32_t n); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2008-10-02 14:30:40
|
Revision: 2914 http://clucene.svn.sourceforge.net/clucene/?rev=2914&view=rev Author: ustramooner Date: 2008-10-02 14:30:11 +0000 (Thu, 02 Oct 2008) Log Message: ----------- fixed some memory leaks relating to Array Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/test/search/TestSort.cpp branches/lucene2_3_2/src/test/search/TestTermVector.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2008-10-02 14:29:11 UTC (rev 2913) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2008-10-02 14:30:11 UTC (rev 2914) @@ -274,7 +274,7 @@ //Check if the j-th document has been deleted, if so skip it if (!reader->isDeleted(j)){ //Get the document - if ( reader->document(j, &doc) ){ + if ( reader->document(j, doc) ){ //Add the document to the new FieldsWriter fieldsWriter->addDocument( &doc ); docCount++; @@ -307,10 +307,10 @@ if (reader->isDeleted(docNum)) continue; - Array<TermFreqVector*> tmp; - if ( reader->getTermFreqVectors(docNum, tmp) ) - termVectorsWriter->addAllDocVectors(tmp); - tmp.deleteAll(); + ObjectArray<TermFreqVector*> tmp; + if ( reader->getTermFreqVectors(docNum, (Array<TermFreqVector*>&)tmp) ) + termVectorsWriter->addAllDocVectors((Array<TermFreqVector*>&)tmp); + tmp.deleteValues(); } } }_CLFINALLY( _CLDELETE(termVectorsWriter); ); Modified: branches/lucene2_3_2/src/core/CLucene/util/Array.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Array.h 2008-10-02 14:29:11 UTC (rev 2913) +++ branches/lucene2_3_2/src/core/CLucene/util/Array.h 2008-10-02 14:30:11 UTC (rev 2914) @@ -73,7 +73,7 @@ }; /** -* An array of objects +* An array of objects. _CLDELETE is called on every containing object. */ template<typename T> class CLUCENE_EXPORT ObjectArray: public ArrayBase<T>{ @@ -86,7 +86,7 @@ if ( this->values == NULL ) return; for (size_t i=0;i<this->length;i++){ - this->values[i]; + _CLLDELETE(this->values[i]); } this->deleteArray(); } @@ -96,14 +96,14 @@ }; /** -* An array of objects +* Legacy code... don't use, remove all instances of this! */ template<typename T> class CLUCENE_EXPORT Array: public ArrayBase<T>{ public: - /*_CL_DEPRECATED(ObjectArray or ValueArray)*/ Array():ArrayBase<T>(){} - /*_CL_DEPRECATED(ObjectArray or ValueArray)*/ Array(T* values, size_t length):ArrayBase<T>(values,length){} - /*_CL_DEPRECATED(ObjectArray or ValueArray)*/ Array(size_t length):ArrayBase<T>(length){} + _CL_DEPRECATED(ObjectArray or ValueArray) Array():ArrayBase<T>(){} + _CL_DEPRECATED(ObjectArray or ValueArray) Array(T* values, size_t length):ArrayBase<T>(values,length){} + _CL_DEPRECATED(ObjectArray or ValueArray) Array(size_t length):ArrayBase<T>(length){} void deleteValues(){ if ( this->values == NULL ) return; Modified: branches/lucene2_3_2/src/test/search/TestSort.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-02 14:29:11 UTC (rev 2913) +++ branches/lucene2_3_2/src/test/search/TestSort.cpp 2008-10-02 14:30:11 UTC (rev 2914) @@ -236,8 +236,10 @@ } sortScores* sort_getScores (CuTest* tc, Hits* hits, bool deleteHits=true){ - sortScores* scoreMap = _CLNEW sortScores(true); + sortScores* scoreMap = _CLNEW sortScores(_CLNEW TCharCompare); int n = hits->length(); + float_t m=pow(10.0,-8); + for (int i=0; i<n; ++i) { Document& doc = hits->doc(i); TCHAR** v = doc.getValues( _T("tracer")); @@ -248,8 +250,20 @@ CuAssertIntEquals (tc, _T("tracer values"), vLength, 1); - scoreMap->insert ( scorePair(v[0], hits->score(i)) ); - _CLDELETE_ARRAY(v); + if ( scoreMap->find(v[0]) != scoreMap->end () ){ + //this (should) be a multi search... the document will be double, so here we check that + //the existing value is the same as this value... and then delete and ignore it. + float_t diff = scoreMap->find(v[0])->second - hits->score(i); + if ( diff < 0 ) + diff *= -1; + if ( diff>m ) + CuAssert(tc,_T("sort_getScores(multi or incorrect) f1!=f2"),false); + + _CLDELETE_ARRAY_ALL(v); + }else{ + scoreMap->insert ( scorePair(v[0], hits->score(i)) ); + _CLDELETE_ARRAY(v); + } } if ( deleteHits ) _CLDELETE(hits); Modified: branches/lucene2_3_2/src/test/search/TestTermVector.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestTermVector.cpp 2008-10-02 14:29:11 UTC (rev 2913) +++ branches/lucene2_3_2/src/test/search/TestTermVector.cpp 2008-10-02 14:30:11 UTC (rev 2914) @@ -21,10 +21,10 @@ for (int32_t i = 0; i < hits->length(); i++) { - Array<TermFreqVector*> vector; - CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), vector)); + ObjectArray<TermFreqVector*> vector; + CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), (Array<TermFreqVector*>&)vector)); CLUCENE_ASSERT(vector.length== 1); - vector.deleteAll(); + vector.deleteValues(); } _CLDELETE(hits); @@ -48,10 +48,10 @@ for (int32_t i = 0; i < hits->length(); i++) { - Array<TermFreqVector*> vector; - CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), vector)); + ObjectArray<TermFreqVector*> vector; + CLUCENE_ASSERT(tv_searcher->getReader()->getTermFreqVectors(hits->id(i), (Array<TermFreqVector*>&)vector)); CLUCENE_ASSERT(vector.length == 1); - vector.deleteAll(); + vector.deleteValues(); } //test mem leaks with vectors @@ -118,18 +118,18 @@ const TCHAR* test4 = _T("eating chocolate with a chocolate lab in an old chocolate colored computer lab"); //13 terms typedef StringMap<const TCHAR*, int32_t> test4MapType; - test4MapType test4Map(false); - test4Map.put(_T("chocolate"), 3); - test4Map.put(_T("lab"), 2); - test4Map.put(_T("eating"), 1); - test4Map.put(_T("computer"), 1); - test4Map.put(_T("with"), 1); - test4Map.put(_T("a"), 1); - test4Map.put(_T("colored"), 1); - test4Map.put(_T("in"), 1); - test4Map.put(_T("an"), 1); - test4Map.put(_T("computer"), 1); - test4Map.put(_T("old"), 1); + test4MapType test4Map(NULL); + test4Map.add(_T("chocolate"), 3); + test4Map.add(_T("lab"), 2); + test4Map.add(_T("eating"), 1); + test4Map.add(_T("computer"), 1); + test4Map.add(_T("with"), 1); + test4Map.add(_T("a"), 1); + test4Map.add(_T("colored"), 1); + test4Map.add(_T("in"), 1); + test4Map.add(_T("an"), 1); + test4Map.add(_T("computer"), 1); + test4Map.add(_T("old"), 1); Document testDoc1; setupDoc(testDoc1, test1); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |