From: <ust...@us...> - 2006-10-10 21:08:18
|
Revision: 2330 http://svn.sourceforge.net/clucene/?rev=2330&view=rev Author: ustramooner Date: 2006-10-10 14:08:10 -0700 (Tue, 10 Oct 2006) Log Message: ----------- jlucene 2.0 changes Modified Paths: -------------- trunk/src/CLucene/index/IndexReader.cpp trunk/src/CLucene/index/IndexReader.h trunk/src/CLucene/index/IndexWriter.cpp trunk/src/CLucene/index/IndexWriter.h trunk/src/CLucene/index/MultiReader.cpp trunk/src/CLucene/index/MultiReader.h trunk/src/CLucene/index/SegmentHeader.h Modified: trunk/src/CLucene/index/IndexReader.cpp =================================================================== --- trunk/src/CLucene/index/IndexReader.cpp 2006-10-10 21:00:50 UTC (rev 2329) +++ trunk/src/CLucene/index/IndexReader.cpp 2006-10-10 21:08:10 UTC (rev 2330) @@ -6,6 +6,7 @@ ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "IndexReader.h" +#include "IndexWriter.h" #include "CLucene/store/Directory.h" #include "CLucene/store/FSDirectory.h" @@ -176,7 +177,17 @@ } int64_t IndexReader::getCurrentVersion(Directory* directory) { - return SegmentInfos::readCurrentVersion(directory); + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync + LuceneLock* commitLock=directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + bool locked=false; + try { + locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); + return SegmentInfos::readCurrentVersion(directory); + }_CLFINALLY( + if (locked) { + commitLock->release(); + } + ) } @@ -187,7 +198,24 @@ _CLDECDELETE(dir); return version; } - + int64_t IndexReader::getVersion() { + return segmentInfos->getVersion(); + } + + bool IndexReader::isCurrent() { + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync + LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + bool locked=false; + try { + locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT); + return SegmentInfos::readCurrentVersion(directory) == segmentInfos->getVersion(); + } _CLFINALLY( + if (locked) { + commitLock->release(); + } + ) + } + uint64_t IndexReader::lastModified(const Directory* directory) { //Func - Static method // Returns the time the index in this directory was last modified. @@ -209,7 +237,7 @@ CND_PRECONDITION(directory != NULL, "directory is NULL"); //Create a buffer of length CL_MAXDIR - char f[CL_MAX_PATH+10]; //add 10 in case that directory is already 260 long + char f[CL_MAX_PATH]; //todo: potential buffer overflow //Copy the directory string to the buffer strcpy(f,directory); //Cat the name of the segments to buffer @@ -233,7 +261,7 @@ if (writeLock == NULL) { LuceneLock* writeLock = directory->makeLock("write.lock"); - if (!writeLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)) // obtain write lock + if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock _CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock this->writeLock = writeLock; @@ -460,7 +488,7 @@ CND_PRECONDITION(directory != NULL, "directory is NULL"); //Create a buffer of length CL_MAXDIR - char f[CL_MAX_PATH+12]; //add 12 in case that directory is already 260 long + char f[CL_MAX_PATH]; //todo: potential buffer overflow //Copy the directory string to the buffer strcpy(f,directory); //Cat the name of the write.lock file to buffer @@ -473,13 +501,20 @@ return ret; } + +/** Returns true if there are norms stored for this field. */ +bool IndexReader::hasNorms(const TCHAR* field) { + // backward compatible implementation. + // SegmentReader has an efficient implementation. + return norms(field) != NULL; +} - void IndexReader::unlock(const char* path){ - FSDirectory* dir = FSDirectory::getDirectory(path,false); - unlock(dir); - dir->close(); - _CLDECDELETE(dir); - } +void IndexReader::unlock(const char* path){ + FSDirectory* dir = FSDirectory::getDirectory(path,false); + unlock(dir); + dir->close(); + _CLDECDELETE(dir); +} void IndexReader::unlock(Directory* directory){ //Func - Static method // Forcibly unlocks the index in the named directory-> @@ -499,6 +534,57 @@ _CLDELETE(lock); } +bool IndexReader::isLuceneFile(const char* filename){ + if ( !filename ) + return false; + size_t len = strlen(filename); + if ( len < 6 ) //need at least x.frx + return false; + const char* ext = filename + len - 4; + + if ( strcmp(ext, ".cfs") == 0 ) + return true; + else if ( strcmp(ext, ".fnm") == 0 ) + return true; + else if ( strcmp(ext, ".fdx") == 0 ) + return true; + else if ( strcmp(ext, ".fdt") == 0 ) + return true; + else if ( strcmp(ext, ".tii") == 0 ) + return true; + else if ( strcmp(ext, ".tis") == 0 ) + return true; + else if ( strcmp(ext, ".frq") == 0 ) + return true; + else if ( strcmp(ext, ".prx") == 0 ) + return true; + else if ( strcmp(ext, ".del") == 0 ) + return true; + else if ( strcmp(ext, ".tvx") == 0 ) + return true; + else if ( strcmp(ext, ".tvd") == 0 ) + return true; + else if ( strcmp(ext, ".tvf") == 0 ) + return true; + else if ( strcmp(ext, ".tvp") == 0 ) + return true; + + else if ( strcmp(filename, "segments") == 0 ) + return true; + else if ( strcmp(filename, "segments.new") == 0 ) + return true; + else if ( strcmp(filename, "deletable") == 0 ) + return true; + + else if ( strncmp(ext,".f",2)==0 ){ + const char* n = ext+2; + if ( *n && _istdigit(*n) ) + return true; + } + + return false; +} + void IndexReader::addCloseCallback(CloseCallback callback, void* parameter){ closeCallbacks.put(callback, parameter); } @@ -506,13 +592,13 @@ //Constructor IndexReader::IndexReaderLockWith::IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir): - CL_NS(store)::LuceneLockWith(lock,LUCENE_COMMIT_LOCK_TIMEOUT) + CL_NS(store)::LuceneLockWith(lock,IndexWriter::COMMIT_LOCK_TIMEOUT) { this->directory = dir; } //Constructor IndexReader::IndexReaderCommitLockWith::IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r ): - CL_NS(store)::LuceneLockWith(lock,LUCENE_COMMIT_LOCK_TIMEOUT), + CL_NS(store)::LuceneLockWith(lock,IndexWriter::COMMIT_LOCK_TIMEOUT), reader(r) { } Modified: trunk/src/CLucene/index/IndexReader.h =================================================================== --- trunk/src/CLucene/index/IndexReader.h 2006-10-10 21:00:50 UTC (rev 2329) +++ trunk/src/CLucene/index/IndexReader.h 2006-10-10 21:08:10 UTC (rev 2330) @@ -28,377 +28,445 @@ so that any subclass which implements it is searchable. <p> Concrete subclasses of IndexReader are usually constructed with a call to - the static method {@link #open}. + one of the static <code>open()</code> methods, e.g. {@link #open(String)}. <p> For efficiency, in this API documents are often referred to via <i>document numbers</i>, non-negative integers which each name a unique document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. + + <p> An IndexReader can be opened on a directory for which an IndexWriter is + opened already, but it cannot be used to delete documents from the index then. +*/ +class IndexReader :LUCENE_BASE{ +public: + //Callback for classes that need to know if IndexReader is closing. + typedef void (*CloseCallback)(IndexReader*, void*); -*/ - class IndexReader :LUCENE_BASE{ + class CloseCallbackCompare:public CL_NS(util)::Compare::_base{ public: - //Callback for classes that need to know if IndexReader is closing. - typedef void (*CloseCallback)(IndexReader*, void*); + bool operator()( CloseCallback t1, CloseCallback t2 ) const{ + return t1 > t2; + } + static void doDelete(CloseCallback dummy){ + } + }; + + + enum FieldOption { + // all fields + ALL = 1, + // all indexed fields + INDEXED = 2, + // all fields which are not indexed + UNINDEXED = 4, + // all fields which are indexed with termvectors enables + INDEXED_WITH_TERMVECTOR = 8, + // all fields which are indexed but don't have termvectors enabled + INDEXED_NO_TERMVECTOR = 16, + // all fields where termvectors are enabled. Please note that only standard termvector fields are returned + TERMVECTOR = 32, + // all field with termvectors wiht positions enabled + TERMVECTOR_WITH_POSITION = 64, + // all fields where termvectors with offset position are set + TERMVECTOR_WITH_OFFSET = 128, + // all fields where termvectors with offset and position values set + TERMVECTOR_WITH_POSITION_OFFSET = 256 + }; - class CloseCallbackCompare:public CL_NS(util)::Compare::_base{ - public: - bool operator()( CloseCallback t1, CloseCallback t2 ) const{ - return t1 > t2; - } - static void doDelete(CloseCallback dummy){ - } - }; - private: - CL_NS(store)::LuceneLock* writeLock; +private: + CL_NS(store)::LuceneLock* writeLock; - bool directoryOwner; - bool stale; - bool hasChanges; - bool closeDirectory; + bool directoryOwner; + bool stale; + bool hasChanges; + bool closeDirectory; - CL_NS(store)::Directory* directory; - typedef CL_NS(util)::CLSet<CloseCallback, void*, - CloseCallbackCompare, - CloseCallbackCompare> CloseCallbackMap; - CloseCallbackMap closeCallbacks; - - /** - * Trys to acquire the WriteLock on this directory. - * this method is only valid if this IndexReader is directory owner. - * - * @throws IOException If WriteLock cannot be acquired. - */ - void aquireWriteLock(); - protected: - /** - * Constructor used if IndexReader is not owner of its directory. - * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. - * - * @param directory Directory where IndexReader files reside. - */ - IndexReader(CL_NS(store)::Directory* dir); + CL_NS(store)::Directory* directory; + typedef CL_NS(util)::CLSet<CloseCallback, void*, + CloseCallbackCompare, + CloseCallbackCompare> CloseCallbackMap; + CloseCallbackMap closeCallbacks; + + /** + * Tries to acquire the WriteLock on this directory. + * this method is only valid if this IndexReader is directory owner. + * + * @throws IOException If WriteLock cannot be acquired. + */ + void aquireWriteLock(); +protected: + /** + * Constructor used if IndexReader is not owner of its directory. + * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. + * + * @param directory Directory where IndexReader files reside. + */ + IndexReader(CL_NS(store)::Directory* dir); - /** - * Constructor used if IndexReader is owner of its directory. - * If IndexReader is owner of its directory, it locks its directory in case of write operations. - * - * @param directory Directory where IndexReader files reside. - * @param segmentInfos Used for write-l - * @param closeDirectory - */ - IndexReader(CL_NS(store)::Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory); - + /** + * Constructor used if IndexReader is owner of its directory. + * If IndexReader is owner of its directory, it locks its directory in case of write operations. + * + * @param directory Directory where IndexReader files reside. + * @param segmentInfos Used for write-l + * @param closeDirectory + */ + IndexReader(CL_NS(store)::Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory); + - /// Implements close. - virtual void doClose() = 0; + /// Implements close. + virtual void doClose() = 0; - /** Implements setNorm in subclass.*/ - virtual void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) = 0; - - /** Implements actual undeleteAll() in subclass. */ - virtual void doUndeleteAll() = 0; - - - /** Implements deletion of the document numbered <code>docNum</code>. - * Applications should call {@link #delete(int32_t)} or {@link #delete(Term)}. - */ - virtual void doDelete(const int32_t docNum) = 0; - - public: + /** Implements setNorm in subclass.*/ + virtual void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) = 0; - DEFINE_MUTEX(THIS_LOCK) + /** Implements actual undeleteAll() in subclass. */ + virtual void doUndeleteAll() = 0; - ///Do not access this directly, only public so that MultiReader can access it - virtual void commit(); + /** Implements deletion of the document numbered <code>docNum</code>. + * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. + */ + virtual void doDelete(const int32_t docNum) = 0; - /** Undeletes all documents currently marked as deleted in this index.*/ - void undeleteAll(); +public: - /** - * Returns a list of all unique field names that exist in the index pointed - * to by this IndexReader. - * @memory All memory must be cleaned by caller - * @return Collection of Strings indicating the names of the fields - * @throws IOException if there is a problem with accessing the index - */ - virtual TCHAR** getFieldNames() = 0; + DEFINE_MUTEX(THIS_LOCK) + + ///Do not access this directly, only public so that MultiReader can access it + virtual void commit(); + + + /** Undeletes all documents currently marked as deleted in this index.*/ + void undeleteAll(); - /** - * Returns a list of all unique field names that exist in the index pointed - * to by this IndexReader. The boolean argument specifies whether the fields - * returned are indexed or not. - * @memory All memory must be cleaned by caller - * @param indexed <code>true</code> if only indexed fields should be returned; - * <code>false</code> if only unindexed fields should be returned. - * @return Collection of Strings indicating the names of the fields - * @throws IOException if there is a problem with accessing the index - */ - virtual TCHAR** getFieldNames(bool indexed) = 0; + /** + * Get a list of unique field names that exist in this index and have the specified + * field option information. + * @param fldOption specifies which field option should be available for the returned fields + * @return Collection of Strings indicating the names of the fields. + * @see IndexReader.FieldOption + */ + virtual void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray) = 0; - /** - * - * @memory All memory must be cleaned by caller - * @param storedTermVector if true, returns only Indexed fields that have term vector info, - * else only indexed fields without term vector info - * @return Collection of Strings indicating the names of the fields - */ - virtual TCHAR** getIndexedFieldNames(bool storedTermVector) = 0; + /** Returns the byte-encoded normalization factor for the named field of + * every document. This is used by the search code to score documents. + * + * The number of bytes returned is the size of the IndexReader->maxDoc() + * MEMORY: The values are cached, so don't delete the returned byte array. + * @see Field#setBoost(float_t) + */ + virtual uint8_t* norms(const TCHAR* field) = 0; + + + /** Reads the byte-encoded normalization factor for the named field of every + * document. This is used by the search code to score documents. + * + * @see Field#setBoost(float_t) + */ + virtual void norms(const TCHAR* field, uint8_t* bytes, const int32_t offset) = 0; + /** Expert: Resets the normalization factor for the named field of the named + * document. + * + * @see #norms(String) + * @see Similarity#decodeNorm(byte) + */ + void setNorm(int32_t doc, const TCHAR* field, float_t value); + + /** Expert: Resets the normalization factor for the named field of the named + * document. The norm represents the product of the field's {@link + * Field#setBoost(float_t) boost} and its {@link Similarity#lengthNorm(String, + * int32_t) length normalization}. Thus, to preserve the length normalization + * values when resetting this, one should base the new value upon the old. + * + * @see #norms(String) + * @see Similarity#decodeNorm(byte) + */ + void setNorm(int32_t doc, const TCHAR* field, uint8_t value); - /** Returns the byte-encoded normalization factor for the named field of - * every document. This is used by the search code to score documents. - * - * The number of bytes returned is the size of the IndexReader->maxDoc() - * MEMORY: The values are cached, so don't delete the returned byte array. - * @see Field#setBoost(float_t) - */ - virtual uint8_t* norms(const TCHAR* field) = 0; + /// Release the write lock, if needed. + virtual ~IndexReader(); + /// Returns an IndexReader reading the index in an FSDirectory in the named path. + static IndexReader* open(const char* path); - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see Field#setBoost(float_t) - */ - virtual void norms(const TCHAR* field, uint8_t* bytes) = 0; + /// Returns an IndexReader reading the index in the given Directory. + static IndexReader* open( CL_NS(store)::Directory* directory, bool closeDirectory=false); + /** + * Returns the time the index in the named directory was last modified. + * Do not use this to check whether the reader is still up-to-date, use + * {@link #isCurrent()} instead. + */ + static uint64_t lastModified(const char* directory); + /** + * Returns the time the index in the named directory was last modified. + * Do not use this to check whether the reader is still up-to-date, use + * {@link #isCurrent()} instead. + */ + static uint64_t lastModified(const CL_NS(store)::Directory* directory); - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNorm(byte) - */ - void setNorm(int32_t doc, const TCHAR* field, float_t value); - - /** Expert: Resets the normalization factor for the named field of the named - * document. The norm represents the product of the field's {@link - * Field#setBoost(float_t) boost} and its {@link Similarity#lengthNorm(String, - * int32_t) length normalization}. Thus, to preserve the length normalization - * values when resetting this, one should base the new value upon the old. - * - * @see #norms(String) - * @see Similarity#decodeNorm(byte) - */ - void setNorm(int32_t doc, const TCHAR* field, uint8_t value); - - /// Release the write lock, if needed. - virtual ~IndexReader(); - - /// Returns an IndexReader reading the index in an FSDirectory in the named path. - static IndexReader* open(const char* path); - - /// Returns an IndexReader reading the index in the given Directory. - static IndexReader* open( CL_NS(store)::Directory* directory, bool closeDirectory=false); - - /** - * Returns the time the index in the named directory was last modified. - * - * <p>Synchronization of IndexReader and IndexWriter instances is - * no longer done via time stamps of the segments file since the time resolution - * depends on the hardware platform. Instead, a version number is maintained - * within the segments file, which is incremented everytime when the index is - * changed.</p> - * - * @deprecated Replaced by {@link #getCurrentVersion(String)} - */ - static uint64_t lastModified(const char* directory); - - /** - * Returns the time the index in the named directory was last modified. - * - * <p>Synchronization of IndexReader and IndexWriter instances is - * no longer done via time stamps of the segments file since the time resolution - * depends on the hardware platform. Instead, a version number is maintained - * within the segments file, which is incremented everytime when the index is - * changed.</p> - * - * @deprecated Replaced by {@link #getCurrentVersion(Directory)} - * */ - static uint64_t lastModified(const CL_NS(store)::Directory* directory); - - - /** - * Reads version number from segments files. The version number counts the - * number of changes of the index. + + /** + * Reads version number from segments files. The version number is + * initialized with a timestamp and then increased by one for each change of + * the index. + * + * @param directory where the index resides. + * @return version number. + * @throws IOException if segments file cannot be read + */ + static int64_t getCurrentVersion(CL_NS(store)::Directory* directory); + + /** + * Reads version number from segments files. The version number is + * initialized with a timestamp and then increased by one for each change of + * the index. * * @param directory where the index resides. * @return version number. - * @throws IOException if segments file cannot be read. + * @throws IOException if segments file cannot be read */ - static int64_t getCurrentVersion(CL_NS(store)::Directory* directory); - + static int64_t getCurrentVersion(const char* directory); + /** - * Reads version number from segments files. The version number counts the - * number of changes of the index. + * Version number when this IndexReader was opened. + */ + int64_t getVersion(); + + /** + * Check whether this IndexReader still works on a current version of the index. + * If this is not the case you will need to re-open the IndexReader to + * make sure you see the latest changes made to the index. * - * @param directory where the index resides. - * @return version number. - * @throws IOException if segments file cannot be read + * @throws IOException */ - static int64_t getCurrentVersion(const char* directory); + bool isCurrent(); - - /** Return an array of term frequency vectors for the specified document. - * The array contains a vector for each vectorized field in the document. - * Each vector contains terms and frequencies for all terms - * in a given vectorized field. - * If no such fields existed, the method returns null. - * - * @see Field#isTermVectorStored() - */ - virtual TermFreqVector** getTermFreqVectors(int32_t docNumber) =0; - /** Return a term frequency vector for the specified document and field. The - * vector returned contains terms and frequencies for those terms in - * the specified field of this document, if the field had storeTermVector - * flag set. If the flag was not set, the method returns null. - * - * @see Field#isTermVectorStored() - */ - virtual TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field) = 0; - - ///Checks if an index exists in the named directory - static bool indexExists(const char* directory); + /** + * Return an array of term frequency vectors for the specified document. + * The array contains a vector for each vectorized field in the document. + * Each vector contains terms and frequencies for all terms in a given vectorized field. + * If no such fields existed, the method returns null. The term vectors that are + * returned my either be of type TermFreqVector or of type TermPositionsVector if + * positions or offsets have been stored. + * + * @param docNumber document for which term frequency vectors are returned + * @return array of term frequency vectors. May be null if no term vectors have been + * stored for the specified document. + * @throws IOException if index cannot be accessed + * @see org.apache.lucene.document.Field.TermVector + */ + virtual TermFreqVector** getTermFreqVectors(int32_t docNumber) =0; + + /** + * Return a term frequency vector for the specified document and field. The + * returned vector contains terms and frequencies for the terms in + * the specified field of this document, if the field had the storeTermVector + * flag set. If termvectors had been stored with positions or offsets, a + * TermPositionsVector is returned. + * + * @param docNumber document for which the term frequency vector is returned + * @param field field for which the term frequency vector is returned. + * @return term frequency vector May be null if field does not exist in the specified + * document or term vector was not stored. + * @throws IOException if index cannot be accessed + * @see org.apache.lucene.document.Field.TermVector + */ + virtual TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field) = 0; + + /** + * Returns <code>true</code> if an index exists at the specified directory. + * If the directory does not exist or if there is no index in it. + * @param directory the directory to check for an index + * @return <code>true</code> if an index exists; <code>false</code> otherwise + */ + static bool indexExists(const char* directory); - //Checks if an index exists in the directory - static bool indexExists(const CL_NS(store)::Directory* directory); + /** + * Returns <code>true</code> if an index exists at the specified directory. + * If the directory does not exist or if there is no index in it. + * @param directory the directory to check for an index + * @return <code>true</code> if an index exists; <code>false</code> otherwise + * @throws IOException if there is a problem with accessing the index + */ + static bool indexExists(const CL_NS(store)::Directory* directory); - ///Returns the number of documents in this index. - virtual int32_t numDocs() = 0; + /** Returns the number of documents in this index. */ + virtual int32_t numDocs() = 0; - ///Returns one greater than the largest possible document number. - ///This may be used to, e.g., determine how big to allocate an array which - ///will have an element for every document number in an index. - virtual int32_t maxDoc() const = 0; + /** Returns one greater than the largest possible document number. + * This may be used to, e.g., determine how big to allocate an array which + * will have an element for every document number in an index. + */ + virtual int32_t maxDoc() const = 0; - ///Returns the stored fields of the n-th Document in this index. - virtual CL_NS(document)::Document* document(const int32_t n) =0; + /** Returns the stored fields of the <code>n</code><sup>th</sup> + <code>Document</code> in this index. */ + virtual CL_NS(document)::Document* document(const int32_t n) =0; - ///Returns true if document n has been deleted - virtual bool isDeleted(const int32_t n) = 0; + /** Returns true if document <i>n</i> has been deleted */ + virtual bool isDeleted(const int32_t n) = 0; - /** Returns true if any documents have been deleted */ - virtual bool hasDeletions() = 0; + /** Returns true if any documents have been deleted */ + virtual bool hasDeletions() = 0; - ///Returns an enumeration of all the terms in the index. - ///The enumeration is ordered by Term.compareTo(). Each term - ///is greater than all that precede it in the enumeration. - virtual TermEnum* terms() const =0; + /** Returns true if there are norms stored for this field. */ + virtual bool hasNorms(const TCHAR* field); - ///Returns an enumeration of all terms after a given term. - ///The enumeration is ordered by Term.compareTo(). Each term - ///is greater than all that precede it in the enumeration. - virtual TermEnum* terms(const Term* t) const = 0; + /** Returns an enumeration of all the terms in the index. + * The enumeration is ordered by Term.compareTo(). Each term + * is greater than all that precede it in the enumeration. + */ + virtual TermEnum* terms() const =0; - ///Returns the number of documents containing the term t. - virtual int32_t docFreq(const Term* t) const = 0; + /** Returns an enumeration of all terms after a given term. + * The enumeration is ordered by Term.compareTo(). Each term + * is greater than all that precede it in the enumeration. + */ + virtual TermEnum* terms(const Term* t) const = 0; - /// Returns an unpositioned TermPositions enumerator. - virtual TermPositions* termPositions() const = 0; - - //Returns an enumeration of all the documents which contain term. For each - //document, in addition to the document number and frequency of the term in - //that document, a list of all of the ordinal positions of the term in the document - //is available. - TermPositions* termPositions(Term* term) const; + /** Returns the number of documents containing the term <code>t</code>. */ + virtual int32_t docFreq(const Term* t) const = 0; - /// Returns an unpositioned TermDocs enumerator. - virtual TermDocs* termDocs() const = 0; + /// Returns an unpositioned TermPositions enumerator. + virtual TermPositions* termPositions() const = 0; + + /** Returns an enumeration of all the documents which contain + * <code>term</code>. For each document, in addition to the document number + * and frequency of the term in that document, a list of all of the ordinal + * positions of the term in the document is available. Thus, this method + * implements the mapping: + * + * <p><ul> + * Term => <docNum, freq, + * <pos<sub>1</sub>, pos<sub>2</sub>, ... + * pos<sub>freq-1</sub>> + * ><sup>*</sup> + * </ul> + * <p> This positional information faciliates phrase and proximity searching. + * <p>The enumeration is ordered by document number. Each document number is + * greater than all that precede it in the enumeration. + */ + TermPositions* termPositions(Term* term) const; - ///Returns an enumeration of all the documents which contain term. - TermDocs* termDocs(Term* term) const; + /** Returns an unpositioned {@link TermDocs} enumerator. */ + virtual TermDocs* termDocs() const = 0; - ///Deletes the document numbered docNum. Once a document is deleted it will not appear - ///in TermDocs or TermPostitions enumerations. Attempts to read its field with the document - ///method will result in an error. The presence of this document may still be reflected in - ///the docFreq statistic, though this will be corrected eventually as the index is further modified. - ///Note: API renamed, because delete is a reserved word in c++. - void deleteDocument(const int32_t docNum); + /** Returns an enumeration of all the documents which contain + * <code>term</code>. For each document, the document number, the frequency of + * the term in that document is also provided, for use in search scoring. + * Thus, this method implements the mapping: + * <p><ul> + * Term => <docNum, freq><sup>*</sup> + * </ul> + * <p>The enumeration is ordered by document number. Each document number + * is greater than all that precede it in the enumeration. + */ + TermDocs* termDocs(Term* term) const; - ///@Deprecated. Use deleteDocument instead. - void deleteDoc(const int32_t docNum){ deleteDocument(docNum); } + /** Deletes the document numbered <code>docNum</code>. Once a document is + * deleted it will not appear in TermDocs or TermPostitions enumerations. + * Attempts to read its field with the {@link #document} + * method will result in an error. The presence of this document may still be + * reflected in the {@link #docFreq} statistic, though + * this will be corrected eventually as the index is further modified. + */ + void deleteDocument(const int32_t docNum); - ///Deletes all documents containing term. Returns the number of deleted documents - int32_t deleteDocuments(Term* term); + ///@Deprecated. Use deleteDocument instead. + void deleteDoc(const int32_t docNum){ deleteDocument(docNum); } - ///@Deprecated. Use deleteDocuments instead. - int32_t deleteTerm(Term* term){ return deleteDocuments(term); } + /** Deletes all documents containing <code>term</code>. + * This is useful if one uses a document field to hold a unique ID string for + * the document. Then to delete such a document, one merely constructs a + * term with the appropriate field and the unique ID string as its text and + * passes it to this method. + * See {@link #deleteDocument(int)} for information about when this deletion will + * become effective. + * @return the number of documents deleted + */ + int32_t deleteDocuments(Term* term); - /** - * Closes files associated with this index and also saves any new deletions to disk. - * No other methods should be called after this has been called. - */ - void close(); + ///@Deprecated. Use deleteDocuments instead. + int32_t deleteTerm(Term* term){ return deleteDocuments(term); } - ///Checks if the index in the named directory is currently locked. - static bool isLocked(CL_NS(store)::Directory* directory); + /** + * Closes files associated with this index and also saves any new deletions to disk. + * No other methods should be called after this has been called. + */ + void close(); - ///Checks if the index in the named directory is currently locked. - static bool isLocked(const char* directory); + ///Checks if the index in the named directory is currently locked. + static bool isLocked(CL_NS(store)::Directory* directory); + + ///Checks if the index in the named directory is currently locked. + static bool isLocked(const char* directory); - ///Forcibly unlocks the index in the named directory. - ///Caution: this should only be used by failure recovery code, - ///when it is known that no other process nor thread is in fact - ///currently accessing this index. - static void unlock(CL_NS(store)::Directory* directory); - static void unlock(const char* path); + ///Forcibly unlocks the index in the named directory. + ///Caution: this should only be used by failure recovery code, + ///when it is known that no other process nor thread is in fact + ///currently accessing this index. + static void unlock(CL_NS(store)::Directory* directory); + static void unlock(const char* path); - /** Returns the directory this index resides in. */ - CL_NS(store)::Directory* getDirectory() { return directory; } + /** Returns the directory this index resides in. */ + CL_NS(store)::Directory* getDirectory() { return directory; } + /** Returns true if the file is a lucene filename (based on extension or filename) */ + static bool isLuceneFile(const char* filename); - #ifndef LUCENE_HIDE_INTERNAL - //this should be protected, but MSVC 6 does not allow access - //to these fuctions in the protected classes IndexReaderLockWith - //which is wrong, since they themselves are members of the class!! + //this should be protected, but MSVC 6 does not allow access + //to these fuctions in the protected classes IndexReaderLockWith + //which is wrong, since they themselves are members of the class!! - ///for internal use. Public so that lock class can access it - SegmentInfos* segmentInfos; - - /** Internal use. Implements commit. Public so that lock class can access it*/ - virtual void doCommit() = 0; + ///for internal use. Public so that lock class can access it + SegmentInfos* segmentInfos; + + /** Internal use. Implements commit. Public so that lock class can access it*/ + virtual void doCommit() = 0; #endif - /** - * For classes that need to know when the IndexReader closes (such as caches, etc), - * should pass their callback function to this. - */ - void addCloseCallback(CloseCallback callback, void* parameter); + /** + * For classes that need to know when the IndexReader closes (such as caches, etc), + * should pass their callback function to this. + */ + void addCloseCallback(CloseCallback callback, void* parameter); - protected: - class IndexReaderLockWith:public CL_NS(store)::LuceneLockWith{ - public: - CL_NS(store)::Directory* directory; - IndexReader* indexReader; +protected: + class IndexReaderLockWith:public CL_NS(store)::LuceneLockWith{ + public: + CL_NS(store)::Directory* directory; + IndexReader* indexReader; - //Constructor - IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir); + //Constructor + IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir); - //Reads the segmentinfo file and depending on the number of segments found - //it returns a MultiReader or a SegmentReader - void* doBody(); + //Reads the segmentinfo file and depending on the number of segments found + //it returns a MultiReader or a SegmentReader + void* doBody(); - }; + }; - class IndexReaderCommitLockWith:public CL_NS(store)::LuceneLockWith{ - private: - IndexReader* reader; - public: - - //Constructor - IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r ); - void* doBody(); - }; + class IndexReaderCommitLockWith:public CL_NS(store)::LuceneLockWith{ + private: + IndexReader* reader; + public: + + //Constructor + IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r ); + void* doBody(); }; - +}; + CL_NS_END #endif Modified: trunk/src/CLucene/index/IndexWriter.cpp =================================================================== --- trunk/src/CLucene/index/IndexWriter.cpp 2006-10-10 21:00:50 UTC (rev 2329) +++ trunk/src/CLucene/index/IndexWriter.cpp 2006-10-10 21:08:10 UTC (rev 2330) @@ -21,11 +21,15 @@ CL_NS_USE(analysis) CL_NS_DEF(index) + + const char* IndexWriter::WRITE_LOCK_NAME = "write.lock"; + const char* IndexWriter::COMMIT_LOCK_NAME = "commit.lock"; + IndexWriter::IndexWriter(const char* path, Analyzer* a, const bool create, const bool _closeDir): directory( FSDirectory::getDirectory(path, create) ), analyzer(a), segmentInfos (_CLNEW SegmentInfos), - closeDir(_closeDir){ + closeDir(_closeDir){ //Func - Constructor // Constructs an IndexWriter for the index in path. //Pre - path != NULL and contains a named directory path @@ -62,11 +66,10 @@ //Func - Initialises the instances //Pre - create indicates if the indexWriter must create a new index located at path or just open it //Post - - maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; - similarity = CL_NS(search)::Similarity::getDefault(); + similarity = CL_NS(search)::Similarity::getDefault(); - useCompoundFile = true; + useCompoundFile = true; //Create a ramDirectory ramDirectory = _CLNEW TransactionalRAMDirectory; @@ -75,23 +78,24 @@ //Initialize the writeLock to writeLock = NULL; - //Initialize the mergeFactor to 10 indicating that a merge will occur after 10 documents - //have been added to the index managed by this IndexWriter - mergeFactor = 10; - //Initialize maxMergeDocs to INT_MAX - maxMergeDocs = INT_MAX; + + //initialise the settings... + maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; + mergeFactor = DEFAULT_MERGE_FACTOR; + maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + writeLockTimeout = WRITE_LOCK_TIMEOUT; + commitLockTimeout = COMMIT_LOCK_TIMEOUT; + minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; + termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; - //initialise to LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS - minMergeDocs = LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS; - //Create a new lock using the name "write.lock" - LuceneLock* newLock = directory->makeLock("write.lock"); + LuceneLock* newLock = directory->makeLock(IndexWriter::WRITE_LOCK_NAME); //Condition check to see if newLock has been allocated properly CND_CONDITION(newLock != NULL, "No memory could be allocated for LuceneLock newLock"); //Try to obtain a write lock - if (!newLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)){ + if (!newLock->obtain(writeLockTimeout)){ //Write lock could not be obtained so delete it _CLDELETE(newLock); //Reset the instance @@ -101,16 +105,15 @@ } //The Write Lock has been obtained so save it for later use - writeLock = newLock; + this->writeLock = newLock; //Create a new lock using the name "commit.lock" - LuceneLock* lock = directory->makeLock("commit.lock"); + LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); //Condition check to see if lock has been allocated properly CND_CONDITION(lock != NULL, "No memory could be allocated for LuceneLock lock"); - IndexWriterLockWith with ( lock,LUCENE_WRITE_LOCK_TIMEOUT,this,create ); - + LockWith2 with ( lock,commitLockTimeout,this, NULL, create ); { SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync with.run(); @@ -153,37 +156,6 @@ } - void* IndexWriterLockWith::doBody() { - //Func - Writes segmentInfos to or reads segmentInfos from disk - //Pre - writer != NULL - //Post - if create is true then segementInfos has been written to disk otherwise - // segmentInfos has been read from disk - - CND_PRECONDITION(writer != NULL, "writer is NULL"); - - if (create) - writer->segmentInfos->write(writer->getDirectory()); - else - writer->segmentInfos->read(writer->getDirectory()); - - return NULL; - } - - void* IndexWriterLockWith2::doBody(){ - //Func - Writes the segmentInfos to Disk and deletes unused segments - //Pre - writer != NULL - //Post - segmentInfos have been written to disk and unused segments have been deleted - - CND_PRECONDITION(writer != NULL, "writer is NULL"); - - //commit before deleting - writer->segmentInfos->write(writer->getDirectory()); - //delete now-unused segments - writer->deleteSegments(segmentsToDelete); - - return NULL; - } - void IndexWriter::close( ) { //Func - Flushes all changes to an index, closes all associated files, and closes // the directory that the index is stored in. @@ -248,7 +220,7 @@ if ( analyzer == NULL ) analyzer = this->analyzer; - + ramDirectory->transStart(); try { char* segmentName = newSegmentName(); @@ -257,7 +229,7 @@ //Create the DocumentWriter using a ramDirectory and analyzer // supplied by the IndexWriter (this). DocumentWriter* dw = _CLNEW DocumentWriter( - ramDirectory, analyzer, similarity, maxFieldLength ); + ramDirectory, analyzer, this ); CND_CONDITION(dw != NULL, "dw is NULL"); try { //Add the client-supplied document to the new segment. @@ -385,15 +357,18 @@ } } + void IndexWriter::mergeSegments(const uint32_t minSegment) { + mergeSegments(minSegment, segmentInfos->size()); + } - void IndexWriter::mergeSegments(const uint32_t minSegment) { + void IndexWriter::mergeSegments(const uint32_t minSegment, const uint32_t end) { CLVector<SegmentReader*> segmentsToDelete(false); const char* mergedName = newSegmentName(); #ifdef _CL_DEBUG_INFO fprintf(_CL_DEBUG_INFO, "merging segments\n"); #endif - SegmentMerger merger(directory, mergedName, useCompoundFile); - for (int32_t i = minSegment; i < segmentInfos->size(); i++) { + SegmentMerger merger(this, mergedName); + for (size_t i = minSegment; i < end; i++) { SegmentInfo* si = segmentInfos->info(i); #ifdef _CL_DEBUG_INFO fprintf(_CL_DEBUG_INFO, " %s (%d docs)\n",si->name,si->docCount); @@ -402,7 +377,7 @@ merger.add(reader); if ((reader->getDirectory() == this->directory) || // if we own the directory (reader->getDirectory() == this->ramDirectory)){ - segmentsToDelete.push_back((SegmentReader*)reader); // queue segment for deletion + segmentsToDelete.push_back(reader); // queue segment for deletion } } @@ -411,76 +386,91 @@ #ifdef _CL_DEBUG_INFO fprintf(_CL_DEBUG_INFO,"\n into %s (%d docs)\n",mergedName, mergedDocCount); #endif - - segmentInfos->clearto(minSegment); // pop old infos & add new - segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory)); + segmentInfos->clearto(minSegment);// remove old infos & add new + segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory) ); // close readers before we attempt to delete now-obsolete segments merger.closeReaders(); - LuceneLock* lock = directory->makeLock("commit.lock"); - IndexWriterLockWith2 with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,&segmentsToDelete ); + LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + LockWith2 with ( lock, commitLockTimeout,this, &segmentsToDelete, true ); { SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync with.run(); } + _CLDELETE( lock ); - _CLDELETE( lock ); + + + if (useCompoundFile) { + char cmpdTmpName[CL_MAX_PATH]; + strcpy(cmpdTmpName,mergedName); + strcat(cmpdTmpName,".tmp"); + + AStringArrayWithDeletor filesToDelete; + merger.createCompoundFile(cmpdTmpName, filesToDelete); + + LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + LockWithCFS with ( lock,commitLockTimeout,directory, this, mergedName, &filesToDelete); + { + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync + with.run(); + } + _CLDELETE(lock); + } + _CLDELETE_CaARRAY( mergedName ); //ADD: } void IndexWriter::deleteSegments(CLVector<SegmentReader*>* segments) { - AStringArrayConstWithDeletor deletable; + AStringArrayWithDeletor deletable; - AStringArrayConstWithDeletor* deleteArray = readDeleteableFiles(); - deleteFiles(deleteArray, &deletable); // try to delete deleteable - _CLDELETE(deleteArray); + {//scope delete deleteArray object + AStringArrayWithDeletor deleteArray; + readDeleteableFiles(deleteArray); + deleteFiles(deleteArray, deletable); // try to delete deleteable + } + AStringArrayWithDeletor files; for (uint32_t i = 0; i < segments->size(); i++) { SegmentReader* reader = (*segments)[i]; - AStringArrayConstWithDeletor* files = reader->files(); + files.clear(); + reader->files(files); if (reader->getDirectory() == this->directory) - deleteFiles(files, &deletable); // try to delete our files + deleteFiles(files, deletable); // try to delete our files else deleteFiles(files, reader->getDirectory()); // delete, eg, RAM files - - _CLDELETE(files); } - writeDeleteableFiles(&deletable); // note files we can't delete + writeDeleteableFiles(deletable); // note files we can't delete } - AStringArrayConstWithDeletor* IndexWriter::readDeleteableFiles() { - AStringArrayConstWithDeletor* result = _CLNEW AStringArrayConstWithDeletor; - + void IndexWriter::readDeleteableFiles(AStringArrayWithDeletor& result) { if (!directory->fileExists("deletable")) - return result; + return; IndexInput* input = directory->openInput("deletable"); try { TCHAR tname[CL_MAX_PATH]; for (int32_t i = input->readInt(); i > 0; i--){ // read file names input->readString(tname,CL_MAX_PATH); - result->push_back(STRDUP_TtoA(tname)); + result.push_back(STRDUP_TtoA(tname)); } } _CLFINALLY( input->close(); _CLDELETE(input); ); - - - return result; } - void IndexWriter::writeDeleteableFiles(AStringArrayConstWithDeletor* files) { + void IndexWriter::writeDeleteableFiles(AStringArrayWithDeletor& files) { IndexOutput* output = directory->createOutput("deleteable.new"); try { - output->writeInt(files->size()); + output->writeInt(files.size()); TCHAR tfile[CL_MAX_PATH]; //temporary space for tchar file name - for (uint32_t i = 0; i < files->size(); i++){ - STRCPY_AtoT(tfile,(*files)[i],CL_MAX_PATH); + for (uint32_t i = 0; i < files.size(); i++){ + STRCPY_AtoT(tfile,files[i],CL_MAX_PATH); output->writeString( tfile, _tcslen(tfile) ); } } _CLFINALLY( @@ -491,33 +481,38 @@ directory->renameFile("deleteable.new", "deletable"); } - void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, Directory* directory) { - AStringArrayConstWithDeletor::const_iterator itr = files->begin(); - while ( itr != files->end() ){ - directory->deleteFile( *itr ); + void IndexWriter::deleteFiles(AStringArrayWithDeletor& files){ + AStringArrayWithDeletor deletable; + AStringArrayWithDeletor currDeletable; + readDeleteableFiles(currDeletable); + deleteFiles(currDeletable, deletable); // try to delete deleteable + deleteFiles(files, deletable); // try to delete our files + writeDeleteableFiles(deletable); // note files we can't delete + } + + void IndexWriter::deleteFiles(AStringArrayWithDeletor& files, Directory* directory) { + AStringArrayWithDeletor::iterator itr = files.begin(); + while ( itr != files.end() ){ + directory->deleteFile( *itr, true ); ++itr; } } - void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, AStringArrayConstWithDeletor* deletable) { - AStringArrayConstWithDeletor::const_iterator itr=files->begin(); - while ( itr != files->end() ){ + void IndexWriter::deleteFiles(AStringArrayWithDeletor& files, AStringArrayWithDeletor& deletable) { + AStringArrayWithDeletor::iterator itr=files.begin(); + while ( itr != files.end() ){ const char* file = *itr; - try { - if ( directory->fileExists(file) ) - directory->deleteFile(file); // try to delete each file - } catch (CLuceneError& err) { // if delete fails - if ( err.number() != CL_ERR_IO ) - throw err; //not an IO err... re-throw - - if (directory->fileExists(file)) { - #ifdef _CL_DEBUG_INFO - fprintf(_CL_DEBUG_INFO,"%s; Will re-try later.\n", err.what()); - #endif - deletable->push_back(STRDUP_AtoA(file)); // add to deletable + if ( getDirectory()->fileExists(file) ){ + if ( !getDirectory()->deleteFile(file, false) ){ + if (directory->fileExists(file)) { + #ifdef _CL_DEBUG_INFO + fprintf(_CL_DEBUG_INFO,"%s; Will re-try later.\n", err.what()); + #endif + deletable.push_back(STRDUP_AtoA(file)); // add to deletable + } } } - ++itr; + ++itr; } } @@ -537,61 +532,135 @@ // start with zero or 1 seg so optimize the current optimize(); + + int32_t start = segmentInfos->size(); //Iterate through the directories - int32_t i = 0; + int32_t i = 0; while ( dirs[i] != NULL ) { // DSR: Changed SegmentInfos constructor arg (see bug discussion below). SegmentInfos sis(false); sis.read( dirs[i]); for (int32_t j = 0; j < sis.size(); j++) { - /* DSR:CL_BUG: - ** In CLucene 0.8.11, the next call placed a pointer to a SegmentInfo - ** object from stack variable $sis into the vector this->segmentInfos. - ** Then, when the call to optimize() is made just before exiting this - ** function, $sis had already been deallocated (and has deleted its - ** member objects), leaving dangling pointers in this->segmentInfos. - ** I added a SegmentInfos constructor that allowed me to order it not - ** to delete its members, invoked the new constructor form above for - ** $sis, and the problem was solved. */ - segmentInfos->add(sis.info(j)); // add each info + segmentInfos->add(sis.info(j)); // add each info } i++; } + + // merge newly added segments in log(n) passes + while (segmentInfos->size() > start+mergeFactor) { + for (int32_t base = start; base < segmentInfos->size(); base++) { + int32_t end = min(segmentInfos->size(), base+mergeFactor); + if (end-base > 1) + mergeSegments(base, end); + } + } + optimize(); // cleanup } void IndexWriter::addIndexes(IndexReader** readers){ - SCOPED_LOCK_MUTEX(THIS_LOCK) + SCOPED_LOCK_MUTEX(THIS_LOCK) optimize(); // start with zero or 1 seg char* mergedName = newSegmentName(); - SegmentMerger* merger = _CLNEW SegmentMerger(directory, mergedName, false); + SegmentMerger merger(this, mergedName); - if (segmentInfos->size() == 1) // add existing index, if any - merger->add(_CLNEW SegmentReader(segmentInfos->info(0))); + CLVector<SegmentReader*> segmentsToDelete; + SegmentReader* sReader = NULL; + if (segmentInfos->size() == 1){ // add existing index, if any + sReader = _CLNEW SegmentReader(segmentInfos->info(0)); + merger.add(sReader); + segmentsToDelete.push_back(sReader); // queue segment for deletion + } int32_t readersLength = 0; while ( readers[readersLength] != NULL ) - merger->add((SegmentReader*) readers[readersLength++]); + merger.add(readers[readersLength++]); - int32_t docCount = merger->merge(); // merge 'em + int32_t docCount = merger.merge(); // merge 'em // pop old infos & add new segmentInfos->clearto(0); segmentInfos->add(_CLNEW SegmentInfo(mergedName, docCount, directory)); - LuceneLock* lock = directory->makeLock("commit.lock"); - IndexWriterLockWith with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,true); + if ( sReader != NULL ){ + sReader->close(); + _CLDELETE(sReader); + } + LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + LockWith2 with ( lock,commitLockTimeout,this, &segmentsToDelete, true); { SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync with.run(); } + _CLDELETE(lock); - _CLDELETE(lock); - } + if (useCompoundFile) { + char cmpdTmpName[CL_MAX_PATH]; + strcpy(cmpdTmpName,mergedName); + strcat(cmpdTmpName,".tmp"); + AStringArrayWithDeletor filesToDelete; + merger.createCompoundFile(cmpdTmpName, filesToDelete); + + LuceneLock* cfslock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); + LockWithCFS with ( lock,commitLockTimeout,directory, this, mergedName, &filesToDelete); + { + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync + with.run(); + } + _CLDELETE(cfslock); + } + } + + + + + void* IndexWriter::LockWith2::doBody() { + //Func - Writes segmentInfos to or reads segmentInfos from disk + //Pre - writer != NULL + //Post - if create is true then segementInfos has been written to disk otherwise + // segmentInfos has been read from disk + + CND_PRECONDITION(writer != NULL, "writer is NULL"); + + if (create){ + writer->segmentInfos->write(writer->getDirectory()); + if ( segmentsToDelete != NULL ) + writer->deleteSegments(segmentsToDelete); // delete now-unused segments + }else + writer->segmentInfos->read(writer->getDirectory()); + + return NULL; + } + + void* IndexWriter::LockWithCFS::doBody() { + //Func - Writes segmentInfos to or reads segmentInfos from disk + //Pre - writer != NULL + //Post - if create is true then segementInfos has been written to disk otherwise + // segmentInfos has been read from disk + + CND_PRECONDITION(directory != NULL, "directory is NULL"); + CND_PRECONDITION(segName != NULL, "mergedName is NULL"); + + char from[CL_MAX_PATH]; + char nu[CL_MAX_PATH]; + + strcpy(from,segName); + strcat(from,".tmp"); + strcpy(nu,segName); + strcat(nu,".cfs"); + + // make compound file visible for SegmentReaders + directory->renameFile(from, nu); + // delete now unused files of segment + writer->deleteFiles(*filesToDelete); + + return NULL; + } + CL_NS_END Modified: trunk/src/CLucene/index/IndexWriter.h =================================================================== --- trunk/src/CLucene/index/IndexWriter.h 2006-10-10 21:00:50 UTC (rev 2329) +++ trunk/src/CLucene/index/IndexWriter.h 2006-10-10 21:08:10 UTC (rev 2330) @@ -21,278 +21,403 @@ CL_NS_DEF(index) - /// An IndexWriter creates and maintains an index. - /// - /// The third argument to the <a href="#IndexWriter"><b>constructor</b></a> - /// determines whether a new index is created, or whether an existing index is - /// opened for the addition of new documents. - /// - /// In either case, documents are added with the <a - /// href="#addDocument"><b>addDocument</b></a> method. When finished adding - /// documents, <a href="#close"><b>close</b></a> should be called. - /// - /// If an index will not have more documents added for a while and optimal search - /// performance is desired, then the <a href="#optimize"><b>optimize</b></a> - /// method should be called before the index is closed. - class IndexWriter:LUCENE_BASE { - private: - // where this index resides - CL_NS(store)::Directory* directory; - // how to analyze text - CL_NS(analysis)::Analyzer* analyzer; +/** +An IndexWriter creates and maintains an index. + +The third argument to the +<a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a> +determines whether a new index is created, or whether an existing index is +opened for the addition of new documents. + +In either case, documents are added with the <a +href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method. +When finished adding documents, <a href="#close()"><b>close</b></a> should be called. + +<p>If an index will not have more documents added for a while and optimal search +performance is desired, then the <a href="#optimize()"><b>optimize</b></a> +method should be called before the index is closed. + +<p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open +another IndexWriter on the same directory will lead to an IOException. The IOException +is also thrown if an IndexReader on the same directory is used to delete documents +from the index. + +@see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion +*/ +class IndexWriter:LUCENE_BASE { + class LockWith2:public CL_NS(store)::LuceneLockWith{ public: - // Release the write lock, if needed. - SegmentInfos* segmentInfos; - private: - bool closeDir; + CL_NS(util)::CLVector<SegmentReader*>* segmentsToDelete; + IndexWriter* writer; + bool create; + void* doBody(); + LockWith2(CL_NS(store)::LuceneLock* lock, int64_t lockWaitTimeout, + IndexWriter* wr, + CL_NS(util)::CLVector<SegmentReader*>* std, + bool create): + CL_NS(store)::LuceneLockWith(lock,lockWaitTimeout) + { + this->writer = wr; + this->segmentsToDelete = std; + this->create = create; + } + ~LockWith2(){ + } + }; + class LockWithCFS:public CL_NS(store)::LuceneLockWith{ + public: + CL_NS(store)::Directory* directory; + IndexWriter* writer; + const char* segName; + CL_NS(util)::AStringArrayWithDeletor* filesToDelete; + void* doBody(); + LockWithCFS(CL_NS(store)::LuceneLock* lock, int64_t lockWaitTimeout, + CL_NS(store)::Directory* dir, + IndexWriter* wr, + const char* segName, + CL_NS(util)::AStringArrayWithDeletor* ftd): + CL_NS(store)::LuceneLockWith(lock,lockWaitTimeout) + { + this->segName = segName; + this->directory = dir; + this->writer = wr; + this->filesToDelete = ftd; + } + ~LockWithCFS(){ + } + }; - bool isOpen; //indicates if the writers is open - this way close can be called multiple times - CL_NS(search)::Similarity* similarity; // how to normalize + bool isOpen; //indicates if the writers is open - this way close can be called multiple times - /** Use compound file setting. Defaults to true, minimizing the number of - * files used. Setting this to false may improve indexing performance, but - * may also cause file handle problems. - */ - bool useCompoundFile; + // how to analyze text + CL_NS(analysis)::Analyzer* analyzer; - CL_NS(store)::TransactionalRAMDirectory* ramDirectory; // for temp segs + CL_NS(search)::Similarity* similarity; // how to normalize - CL_NS(store)::LuceneLock* writeLock; + /** Use compound file setting. Defaults to true, minimizing the number of + * files used. Setting this to false may improve indexing performance, but + * may also cause file handle problems. + */ + bool useCompoundFile; + bool closeDir; - void _IndexWriter(const bool create); + CL_NS(store)::TransactionalRAMDirectory* ramDirec... [truncated message content] |