[Clucene-cvs] SF.net SVN: clucene: [2330] trunk/src/CLucene/index

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 2330
          http://svn.sourceforge.net/clucene/?rev=2330&view=rev
Author:   ustramooner
Date:     2006-10-10 14:08:10 -0700 (Tue, 10 Oct 2006)

Log Message:
-----------
jlucene 2.0 changes

Modified Paths:
--------------
    trunk/src/CLucene/index/IndexReader.cpp
    trunk/src/CLucene/index/IndexReader.h
    trunk/src/CLucene/index/IndexWriter.cpp
    trunk/src/CLucene/index/IndexWriter.h
    trunk/src/CLucene/index/MultiReader.cpp
    trunk/src/CLucene/index/MultiReader.h
    trunk/src/CLucene/index/SegmentHeader.h

Modified: trunk/src/CLucene/index/IndexReader.cpp
===================================================================

--- trunk/src/CLucene/index/IndexReader.cpp	2006-10-10 21:00:50 UTC (rev 2329)
+++ trunk/src/CLucene/index/IndexReader.cpp	2006-10-10 21:08:10 UTC (rev 2330)
@@ -6,6 +6,7 @@
 ------------------------------------------------------------------------------*/
 #include "CLucene/StdHeader.h"
 #include "IndexReader.h"
+#include "IndexWriter.h"
 
 #include "CLucene/store/Directory.h"
 #include "CLucene/store/FSDirectory.h"
@@ -176,7 +177,17 @@
   }
 
   int64_t IndexReader::getCurrentVersion(Directory* directory) {
-     return SegmentInfos::readCurrentVersion(directory);
+	SCOPED_LOCK_MUTEX(directory->THIS_LOCK)                 // in- & inter-process sync
+	LuceneLock* commitLock=directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+	bool locked=false;
+	try {
+		locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+		return SegmentInfos::readCurrentVersion(directory);
+	}_CLFINALLY(
+		if (locked) {
+			commitLock->release();
+		}
+	)
   }
 
 
@@ -187,7 +198,24 @@
       _CLDECDELETE(dir);
       return version;
    }
-    
+    int64_t IndexReader::getVersion() {
+		return segmentInfos->getVersion();
+	}
+	
+	bool IndexReader::isCurrent() {
+		SCOPED_LOCK_MUTEX(directory->THIS_LOCK)                 // in- & inter-process sync
+		LuceneLock* commitLock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+		bool locked=false;
+		try {
+			locked=commitLock->obtain(IndexWriter::COMMIT_LOCK_TIMEOUT);
+			return SegmentInfos::readCurrentVersion(directory) == segmentInfos->getVersion();
+		} _CLFINALLY(
+			if (locked) {
+				commitLock->release();
+			}
+		)
+	}
+
   uint64_t IndexReader::lastModified(const Directory* directory) {
   //Func - Static method
   //       Returns the time the index in this directory was last modified. 
@@ -209,7 +237,7 @@
        CND_PRECONDITION(directory != NULL, "directory is NULL");
 
 	   //Create a buffer of length CL_MAXDIR
-       char f[CL_MAX_PATH+10]; //add 10 in case that directory is already 260 long
+       char f[CL_MAX_PATH]; //todo: potential buffer overflow
 	   //Copy the directory string to the buffer
        strcpy(f,directory);
 	   //Cat the name of the segments to buffer
@@ -233,7 +261,7 @@
 
     if (writeLock == NULL) {
       LuceneLock* writeLock = directory->makeLock("write.lock");
-      if (!writeLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)) // obtain write lock
+      if (!writeLock->obtain(IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock
        _CLTHROWA(CL_ERR_IO,"Index locked for write"); // + writeLock
       this->writeLock = writeLock;
 
@@ -460,7 +488,7 @@
       CND_PRECONDITION(directory != NULL, "directory is NULL");
 
 	  //Create a buffer of length CL_MAXDIR
-      char f[CL_MAX_PATH+12]; //add 12 in case that directory is already 260 long
+      char f[CL_MAX_PATH]; //todo: potential buffer overflow
 	  //Copy the directory string to the buffer
       strcpy(f,directory);
 	  //Cat the name of the write.lock file to buffer
@@ -473,13 +501,20 @@
 
 	  return ret;
   }
+  
+/** Returns true if there are norms stored for this field. */
+bool IndexReader::hasNorms(const TCHAR* field) {
+	// backward compatible implementation.
+	// SegmentReader has an efficient implementation.
+	return norms(field) != NULL;
+}
 
-  void IndexReader::unlock(const char* path){
-	  FSDirectory* dir = FSDirectory::getDirectory(path,false);
-	  unlock(dir);
-	  dir->close();
-	  _CLDECDELETE(dir);
-  }
+void IndexReader::unlock(const char* path){
+	FSDirectory* dir = FSDirectory::getDirectory(path,false);
+	unlock(dir);
+	dir->close();
+	_CLDECDELETE(dir);
+}
   void IndexReader::unlock(Directory* directory){
   //Func - Static method
   //       Forcibly unlocks the index in the named directory->
@@ -499,6 +534,57 @@
       _CLDELETE(lock);
   }
 
+bool IndexReader::isLuceneFile(const char* filename){
+	if ( !filename )
+		return false;
+	size_t len = strlen(filename);
+	if ( len < 6 ) //need at least x.frx
+		return false;
+	const char* ext = filename + len - 4;
+
+	if ( strcmp(ext, ".cfs") == 0 )
+		return true;
+	else if ( strcmp(ext, ".fnm") == 0 )
+		return true;
+	else if ( strcmp(ext, ".fdx") == 0 )
+		return true;
+	else if ( strcmp(ext, ".fdt") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tii") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tis") == 0 )
+		return true;
+	else if ( strcmp(ext, ".frq") == 0 )
+		return true;
+	else if ( strcmp(ext, ".prx") == 0 )
+		return true;
+	else if ( strcmp(ext, ".del") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tvx") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tvd") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tvf") == 0 )
+		return true;
+	else if ( strcmp(ext, ".tvp") == 0 )
+		return true;
+
+	else if ( strcmp(filename, "segments") == 0 )
+		return true;
+	else if ( strcmp(filename, "segments.new") == 0 )
+		return true;
+	else if ( strcmp(filename, "deletable") == 0 )
+		return true;
+
+	else if ( strncmp(ext,".f",2)==0 ){
+		const char* n = ext+2;
+		if ( *n && _istdigit(*n) )
+			return true;	
+	}
+
+	return false;
+}
+
 	void IndexReader::addCloseCallback(CloseCallback callback, void* parameter){
 		closeCallbacks.put(callback, parameter);	
 	}
@@ -506,13 +592,13 @@
 
 	//Constructor	
     IndexReader::IndexReaderLockWith::IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir):
-		CL_NS(store)::LuceneLockWith(lock,LUCENE_COMMIT_LOCK_TIMEOUT)
+		CL_NS(store)::LuceneLockWith(lock,IndexWriter::COMMIT_LOCK_TIMEOUT)
 	{
 		this->directory = dir;
 	}	
 	//Constructor	
 	IndexReader::IndexReaderCommitLockWith::IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r ):
-		CL_NS(store)::LuceneLockWith(lock,LUCENE_COMMIT_LOCK_TIMEOUT),
+		CL_NS(store)::LuceneLockWith(lock,IndexWriter::COMMIT_LOCK_TIMEOUT),
 		reader(r)
 	{
 	}

Modified: trunk/src/CLucene/index/IndexReader.h
===================================================================
--- trunk/src/CLucene/index/IndexReader.h	2006-10-10 21:00:50 UTC (rev 2329)
+++ trunk/src/CLucene/index/IndexReader.h	2006-10-10 21:08:10 UTC (rev 2330)
@@ -28,377 +28,445 @@
  so that any subclass which implements it is searchable.
 
  <p> Concrete subclasses of IndexReader are usually constructed with a call to
- the static method {@link #open}.
+ one of the static <code>open()</code> methods, e.g. {@link #open(String)}.
 
  <p> For efficiency, in this API documents are often referred to via
  <i>document numbers</i>, non-negative integers which each name a unique
  document in the index.  These document numbers are ephemeral--they may change
  as documents are added to and deleted from an index.  Clients should thus not
  rely on a given document having the same number between sessions.
+ 
+ <p> An IndexReader can be opened on a directory for which an IndexWriter is
+ opened already, but it cannot be used to delete documents from the index then.
+*/
+class IndexReader :LUCENE_BASE{
+public:
+	//Callback for classes that need to know if IndexReader is closing.
+	typedef void (*CloseCallback)(IndexReader*, void*);
 
-*/
-	class IndexReader :LUCENE_BASE{
+	class CloseCallbackCompare:public CL_NS(util)::Compare::_base{
 	public:
-		//Callback for classes that need to know if IndexReader is closing.
-		typedef void (*CloseCallback)(IndexReader*, void*);
+		bool operator()( CloseCallback t1, CloseCallback t2 ) const{
+			return t1 > t2;
+		}
+		static void doDelete(CloseCallback dummy){
+		}
+	};
+	
+	
+	enum FieldOption {
+		// all fields
+		ALL = 1,
+		// all indexed fields
+		INDEXED = 2,
+		// all fields which are not indexed
+		UNINDEXED = 4,
+		// all fields which are indexed with termvectors enables
+		INDEXED_WITH_TERMVECTOR = 8,
+		// all fields which are indexed but don't have termvectors enabled
+		INDEXED_NO_TERMVECTOR = 16,
+		// all fields where termvectors are enabled. Please note that only standard termvector fields are returned
+		TERMVECTOR = 32,
+		// all field with termvectors wiht positions enabled
+		TERMVECTOR_WITH_POSITION = 64,
+		// all fields where termvectors with offset position are set
+		TERMVECTOR_WITH_OFFSET = 128,
+		// all fields where termvectors with offset and position values set
+		TERMVECTOR_WITH_POSITION_OFFSET = 256
+	};
 
-		class CloseCallbackCompare:public CL_NS(util)::Compare::_base{
-		public:
-			bool operator()( CloseCallback t1, CloseCallback t2 ) const{
-				return t1 > t2;
-			}
-			static void doDelete(CloseCallback dummy){
-			}
-		};
 
-	private:
-		CL_NS(store)::LuceneLock* writeLock;
+private:
+	CL_NS(store)::LuceneLock* writeLock;
 
-        bool directoryOwner;
-        bool stale;
-        bool hasChanges;
-        bool closeDirectory;
+    bool directoryOwner;
+    bool stale;
+    bool hasChanges;
+    bool closeDirectory;
 
-        CL_NS(store)::Directory* directory;
-		typedef CL_NS(util)::CLSet<CloseCallback, void*, 
-			CloseCallbackCompare,
-			CloseCallbackCompare> CloseCallbackMap;
-		CloseCallbackMap closeCallbacks;
-	  
-        /**
-        * Trys to acquire the WriteLock on this directory.
-        * this method is only valid if this IndexReader is directory owner.
-        * 
-        * @throws IOException If WriteLock cannot be acquired.
-        */
-        void aquireWriteLock();
-	protected:
-        /**
-        * Constructor used if IndexReader is not owner of its directory. 
-        * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
-        * 
-        * @param directory Directory where IndexReader files reside.
-        */
-	    IndexReader(CL_NS(store)::Directory* dir);
+    CL_NS(store)::Directory* directory;
+	typedef CL_NS(util)::CLSet<CloseCallback, void*, 
+		CloseCallbackCompare,
+		CloseCallbackCompare> CloseCallbackMap;
+	CloseCallbackMap closeCallbacks;
+  
+    /**
+	* Tries to acquire the WriteLock on this directory.
+	* this method is only valid if this IndexReader is directory owner.
+	* 
+	* @throws IOException If WriteLock cannot be acquired.
+	*/
+    void aquireWriteLock();
+protected:
+    /**
+    * Constructor used if IndexReader is not owner of its directory. 
+    * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
+    * 
+    * @param directory Directory where IndexReader files reside.
+    */
+    IndexReader(CL_NS(store)::Directory* dir);
 
-	    /**
-        * Constructor used if IndexReader is owner of its directory.
-        * If IndexReader is owner of its directory, it locks its directory in case of write operations.
-        * 
-        * @param directory Directory where IndexReader files reside.
-        * @param segmentInfos Used for write-l
-        * @param closeDirectory
-        */
-		IndexReader(CL_NS(store)::Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory);
-		
+    /**
+    * Constructor used if IndexReader is owner of its directory.
+    * If IndexReader is owner of its directory, it locks its directory in case of write operations.
+    * 
+    * @param directory Directory where IndexReader files reside.
+    * @param segmentInfos Used for write-l
+    * @param closeDirectory
+    */
+	IndexReader(CL_NS(store)::Directory* directory, SegmentInfos* segmentInfos, bool closeDirectory);
+	
 
-		/// Implements close. 
-		virtual void doClose() = 0;
+	/// Implements close. 
+	virtual void doClose() = 0;
 
-        /** Implements setNorm in subclass.*/
-        virtual void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) = 0;
-          
-        /** Implements actual undeleteAll() in subclass. */
-        virtual void doUndeleteAll() = 0;
-
-
-        /** Implements deletion of the document numbered <code>docNum</code>.
-        * Applications should call {@link #delete(int32_t)} or {@link #delete(Term)}.
-        */
-	    virtual void doDelete(const int32_t docNum) = 0;
-
-	public:
+    /** Implements setNorm in subclass.*/
+    virtual void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value) = 0;
       
-      DEFINE_MUTEX(THIS_LOCK)
+    /** Implements actual undeleteAll() in subclass. */
+    virtual void doUndeleteAll() = 0;
 
-      ///Do not access this directly, only public so that MultiReader can access it
-      virtual void commit();
 
+    /** Implements deletion of the document numbered <code>docNum</code>.
+	* Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
+	*/
+    virtual void doDelete(const int32_t docNum) = 0;
 
-  		/** Undeletes all documents currently marked as deleted in this index.*/
-      void undeleteAll();
+public:
 
-      /**
-      * Returns a list of all unique field names that exist in the index pointed
-      * to by this IndexReader.
-      * @memory All memory must be cleaned by caller
-      * @return Collection of Strings indicating the names of the fields
-      * @throws IOException if there is a problem with accessing the index
-      */
-      virtual TCHAR** getFieldNames() = 0;
+	DEFINE_MUTEX(THIS_LOCK)
+	
+	///Do not access this directly, only public so that MultiReader can access it
+	virtual void commit();
+	
+	
+	/** Undeletes all documents currently marked as deleted in this index.*/
+	void undeleteAll();
 
-      /**
-      * Returns a list of all unique field names that exist in the index pointed
-      * to by this IndexReader.  The boolean argument specifies whether the fields
-      * returned are indexed or not.
-      * @memory All memory must be cleaned by caller
-      * @param indexed <code>true</code> if only indexed fields should be returned;
-      *                <code>false</code> if only unindexed fields should be returned.
-      * @return Collection of Strings indicating the names of the fields
-      * @throws IOException if there is a problem with accessing the index
-      */
-      virtual TCHAR** getFieldNames(bool indexed) = 0;
+	/**
+	* Get a list of unique field names that exist in this index and have the specified
+	* field option information.
+	* @param fldOption specifies which field option should be available for the returned fields
+	* @return Collection of Strings indicating the names of the fields.
+	* @see IndexReader.FieldOption
+	*/
+	virtual void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray) = 0;
 
-      /**
-      * 
-      * @memory All memory must be cleaned by caller
-      * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
-      *                        else only indexed fields without term vector info 
-      * @return Collection of Strings indicating the names of the fields
-      */ 
-      virtual TCHAR** getIndexedFieldNames(bool storedTermVector) = 0;
+	/** Returns the byte-encoded normalization factor for the named field of
+	* every document.  This is used by the search code to score documents.
+	*
+	* The number of bytes returned is the size of the IndexReader->maxDoc()
+	* MEMORY: The values are cached, so don't delete the returned byte array.
+	* @see Field#setBoost(float_t)
+	*/
+	virtual uint8_t* norms(const TCHAR* field) = 0;
+	
+	
+	/** Reads the byte-encoded normalization factor for the named field of every
+	*  document.  This is used by the search code to score documents.
+	*
+	* @see Field#setBoost(float_t)
+	*/
+	virtual void norms(const TCHAR* field, uint8_t* bytes, const int32_t offset) = 0;
 
+    /** Expert: Resets the normalization factor for the named field of the named
+    * document.
+    *
+    * @see #norms(String)
+    * @see Similarity#decodeNorm(byte)
+    */
+    void setNorm(int32_t doc, const TCHAR* field, float_t value);
+  
+    /** Expert: Resets the normalization factor for the named field of the named
+    * document.  The norm represents the product of the field's {@link
+    * Field#setBoost(float_t) boost} and its {@link Similarity#lengthNorm(String,
+    * int32_t) length normalization}.  Thus, to preserve the length normalization
+    * values when resetting this, one should base the new value upon the old.
+    *
+    * @see #norms(String)
+    * @see Similarity#decodeNorm(byte)
+    */
+    void setNorm(int32_t doc, const TCHAR* field, uint8_t value);
 
-      /** Returns the byte-encoded normalization factor for the named field of
-      * every document.  This is used by the search code to score documents.
-      *
-	  * The number of bytes returned is the size of the IndexReader->maxDoc()
-	  * MEMORY: The values are cached, so don't delete the returned byte array.
-      * @see Field#setBoost(float_t)
-      */
-		virtual uint8_t* norms(const TCHAR* field) = 0;
+	/// Release the write lock, if needed. 
+    virtual ~IndexReader();
 
+	/// Returns an IndexReader reading the index in an FSDirectory in the named path. 
+	static IndexReader* open(const char* path);
 
-      /** Reads the byte-encoded normalization factor for the named field of every
-      *  document.  This is used by the search code to score documents.
-      *
-      * @see Field#setBoost(float_t)
-      */
-      virtual void norms(const TCHAR* field, uint8_t* bytes) = 0;
+	/// Returns an IndexReader reading the index in the given Directory. 
+	static IndexReader* open( CL_NS(store)::Directory* directory, bool closeDirectory=false);
 
+	/** 
+	* Returns the time the index in the named directory was last modified.
+	* Do not use this to check whether the reader is still up-to-date, use
+	* {@link #isCurrent()} instead. 
+	*/
+	static uint64_t lastModified(const char* directory);
 
+	/** 
+	* Returns the time the index in the named directory was last modified. 
+	* Do not use this to check whether the reader is still up-to-date, use
+	* {@link #isCurrent()} instead. 
+	*/
+	static uint64_t lastModified(const CL_NS(store)::Directory* directory);
 
-        /** Expert: Resets the normalization factor for the named field of the named
-        * document.
-        *
-        * @see #norms(String)
-        * @see Similarity#decodeNorm(byte)
-        */
-        void setNorm(int32_t doc, const TCHAR* field, float_t value);
-      
-	    /** Expert: Resets the normalization factor for the named field of the named
-	    * document.  The norm represents the product of the field's {@link
-	    * Field#setBoost(float_t) boost} and its {@link Similarity#lengthNorm(String,
-	    * int32_t) length normalization}.  Thus, to preserve the length normalization
-	    * values when resetting this, one should base the new value upon the old.
-	    *
-	    * @see #norms(String)
-	    * @see Similarity#decodeNorm(byte)
-	    */
-        void setNorm(int32_t doc, const TCHAR* field, uint8_t value);
-
-		/// Release the write lock, if needed. 
-	    virtual ~IndexReader();
-
-		/// Returns an IndexReader reading the index in an FSDirectory in the named path. 
-		static IndexReader* open(const char* path);
-
-		/// Returns an IndexReader reading the index in the given Directory. 
-		static IndexReader* open( CL_NS(store)::Directory* directory, bool closeDirectory=false);
-
-		/** 
-        * Returns the time the index in the named directory was last modified. 
-        * 
-        * <p>Synchronization of IndexReader and IndexWriter instances is 
-        * no longer done via time stamps of the segments file since the time resolution 
-        * depends on the hardware platform. Instead, a version number is maintained
-        * within the segments file, which is incremented everytime when the index is
-        * changed.</p>
-        * 
-        * @deprecated  Replaced by {@link #getCurrentVersion(String)}
-        */
-		static uint64_t lastModified(const char* directory);
-
-		/** 
-      * Returns the time the index in the named directory was last modified. 
-      * 
-      * <p>Synchronization of IndexReader and IndexWriter instances is 
-      * no longer done via time stamps of the segments file since the time resolution 
-      * depends on the hardware platform. Instead, a version number is maintained
-      * within the segments file, which is incremented everytime when the index is
-      * changed.</p>
-      * 
-      * @deprecated  Replaced by {@link #getCurrentVersion(Directory)}
-      * */
-		static uint64_t lastModified(const CL_NS(store)::Directory* directory);
-
-
-  /**
-   * Reads version number from segments files. The version number counts the
-   * number of changes of the index.
+	
+	/**
+	* Reads version number from segments files. The version number is
+	* initialized with a timestamp and then increased by one for each change of
+	* the index.
+	* 
+	* @param directory where the index resides.
+	* @return version number.
+	* @throws IOException if segments file cannot be read
+	*/
+	static int64_t getCurrentVersion(CL_NS(store)::Directory* directory);
+	
+	/**
+   * Reads version number from segments files. The version number is
+   * initialized with a timestamp and then increased by one for each change of
+   * the index.
    * 
    * @param directory where the index resides.
    * @return version number.
-   * @throws IOException if segments file cannot be read.
+   * @throws IOException if segments file cannot be read
    */
-      static int64_t getCurrentVersion(CL_NS(store)::Directory* directory);
-      	
+	static int64_t getCurrentVersion(const char* directory);
+	
 	/**
-	* Reads version number from segments files. The version number counts the
-	* number of changes of the index.
+	* Version number when this IndexReader was opened.
+	*/
+	int64_t getVersion();
+	
+	/**
+	* Check whether this IndexReader still works on a current version of the index.
+	* If this is not the case you will need to re-open the IndexReader to
+	* make sure you see the latest changes made to the index.
 	* 
-	* @param directory where the index resides.
-	* @return version number.
-	* @throws IOException if segments file cannot be read
+	* @throws IOException
 	*/
-      static int64_t getCurrentVersion(const char* directory);
+	bool isCurrent();
 
-      
-      /** Return an array of term frequency vectors for the specified document.
-      *  The array contains a vector for each vectorized field in the document.
-      *  Each vector contains terms and frequencies for all terms
-      *  in a given vectorized field.
-      *  If no such fields existed, the method returns null.
-      *
-      * @see Field#isTermVectorStored()
-      */
-      virtual TermFreqVector** getTermFreqVectors(int32_t docNumber) =0;
 
-      /** Return a term frequency vector for the specified document and field. The
-      *  vector returned contains terms and frequencies for those terms in
-      *  the specified field of this document, if the field had storeTermVector
-      *  flag set.  If the flag was not set, the method returns null.
-      *
-      * @see Field#isTermVectorStored()
-      */
-      virtual TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field) = 0;
-		
-		///Checks if an index exists in the named directory
-		static bool indexExists(const char* directory);
+	/**
+	*  Return an array of term frequency vectors for the specified document.
+	*  The array contains a vector for each vectorized field in the document.
+	*  Each vector contains terms and frequencies for all terms in a given vectorized field.
+	*  If no such fields existed, the method returns null. The term vectors that are
+	* returned my either be of type TermFreqVector or of type TermPositionsVector if
+	* positions or offsets have been stored.
+	* 
+	* @param docNumber document for which term frequency vectors are returned
+	* @return array of term frequency vectors. May be null if no term vectors have been
+	*  stored for the specified document.
+	* @throws IOException if index cannot be accessed
+	* @see org.apache.lucene.document.Field.TermVector
+	*/
+	virtual TermFreqVector** getTermFreqVectors(int32_t docNumber) =0;
+	
+	/**
+	*  Return a term frequency vector for the specified document and field. The
+	*  returned vector contains terms and frequencies for the terms in
+	*  the specified field of this document, if the field had the storeTermVector
+	*  flag set. If termvectors had been stored with positions or offsets, a 
+	*  TermPositionsVector is returned.
+	* 
+	* @param docNumber document for which the term frequency vector is returned
+	* @param field field for which the term frequency vector is returned.
+	* @return term frequency vector May be null if field does not exist in the specified
+	* document or term vector was not stored.
+	* @throws IOException if index cannot be accessed
+	* @see org.apache.lucene.document.Field.TermVector
+	*/
+	virtual TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field) = 0;
+	
+	/**
+	* Returns <code>true</code> if an index exists at the specified directory.
+	* If the directory does not exist or if there is no index in it.
+	* @param  directory the directory to check for an index
+	* @return <code>true</code> if an index exists; <code>false</code> otherwise
+	*/
+	static bool indexExists(const char* directory);
 
-        //Checks if an index exists in the directory
-		static bool indexExists(const CL_NS(store)::Directory* directory);
+    /**
+	* Returns <code>true</code> if an index exists at the specified directory.
+	* If the directory does not exist or if there is no index in it.
+	* @param  directory the directory to check for an index
+	* @return <code>true</code> if an index exists; <code>false</code> otherwise
+	* @throws IOException if there is a problem with accessing the index
+	*/
+	static bool indexExists(const CL_NS(store)::Directory* directory);
 
-		///Returns the number of documents in this index. 
-		virtual int32_t numDocs() = 0;
+	/** Returns the number of documents in this index. */
+  	virtual int32_t numDocs() = 0;
 
-		///Returns one greater than the largest possible document number.
-		///This may be used to, e.g., determine how big to allocate an array which
-		///will have an element for every document number in an index.
-		virtual int32_t maxDoc() const = 0;
+	/** Returns one greater than the largest possible document number.
+	* This may be used to, e.g., determine how big to allocate an array which
+	* will have an element for every document number in an index.
+	*/
+	virtual int32_t maxDoc() const = 0;
 
-		///Returns the stored fields of the n-th Document in this index. 
-		virtual CL_NS(document)::Document* document(const int32_t n) =0;
+	/** Returns the stored fields of the <code>n</code><sup>th</sup>
+   	<code>Document</code> in this index. */ 
+	virtual CL_NS(document)::Document* document(const int32_t n) =0;
 
-		///Returns true if document n has been deleted 
-		virtual bool isDeleted(const int32_t n) = 0;
+	/** Returns true if document <i>n</i> has been deleted */
+  	virtual bool isDeleted(const int32_t n) = 0;
 
-		/** Returns true if any documents have been deleted */
-		virtual bool hasDeletions() = 0;
+	/** Returns true if any documents have been deleted */
+	virtual bool hasDeletions() = 0;
 
-		///Returns an enumeration of all the terms in the index.
-		///The enumeration is ordered by Term.compareTo().  Each term
-		///is greater than all that precede it in the enumeration.
-		virtual TermEnum* terms() const =0;
+	/** Returns true if there are norms stored for this field. */
+	virtual bool hasNorms(const TCHAR* field);
 
-		///Returns an enumeration of all terms after a given term.
-		///The enumeration is ordered by Term.compareTo().  Each term
-		///is greater than all that precede it in the enumeration.
-		virtual TermEnum* terms(const Term* t) const = 0;
+	/** Returns an enumeration of all the terms in the index.
+	* The enumeration is ordered by Term.compareTo().  Each term
+	* is greater than all that precede it in the enumeration.
+	*/
+	virtual TermEnum* terms() const =0;
 
-		///Returns the number of documents containing the term t. 
-		virtual int32_t docFreq(const Term* t) const = 0;
+	/** Returns an enumeration of all terms after a given term.
+	* The enumeration is ordered by Term.compareTo().  Each term
+	* is greater than all that precede it in the enumeration.
+	*/
+	virtual TermEnum* terms(const Term* t) const = 0;
 
-		/// Returns an unpositioned TermPositions enumerator. 
-		virtual TermPositions* termPositions() const = 0;
-		
-        //Returns an enumeration of all the documents which contain  term. For each 
-        //document, in addition to the document number and frequency of the term in 
-        //that document, a list of all of the ordinal positions of the term in the document 
-        //is available.		
-		TermPositions* termPositions(Term* term) const;
+	/** Returns the number of documents containing the term <code>t</code>. */
+	virtual int32_t docFreq(const Term* t) const = 0;
 
-		/// Returns an unpositioned TermDocs enumerator. 
-		virtual TermDocs* termDocs() const = 0;
+	/// Returns an unpositioned TermPositions enumerator. 
+	virtual TermPositions* termPositions() const = 0;
+	
+    /** Returns an enumeration of all the documents which contain
+	* <code>term</code>.  For each document, in addition to the document number
+	* and frequency of the term in that document, a list of all of the ordinal
+	* positions of the term in the document is available.  Thus, this method
+	* implements the mapping:
+	*
+	* <p><ul>
+	* Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
+	* &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
+	* pos<sub>freq-1</sub>&gt;
+	* &gt;<sup>*</sup>
+	* </ul>
+	* <p> This positional information faciliates phrase and proximity searching.
+	* <p>The enumeration is ordered by document number.  Each document number is
+	* greater than all that precede it in the enumeration.
+	*/
+	TermPositions* termPositions(Term* term) const;
 
-		///Returns an enumeration of all the documents which contain term. 
-		TermDocs* termDocs(Term* term) const;
+	/** Returns an unpositioned {@link TermDocs} enumerator. */
+	virtual TermDocs* termDocs() const = 0;
 
-		///Deletes the document numbered docNum.  Once a document is deleted it will not appear 
-        ///in TermDocs or TermPostitions enumerations. Attempts to read its field with the document 
-        ///method will result in an error.  The presence of this document may still be reflected in 
-        ///the docFreq statistic, though this will be corrected eventually as the index is further modified.  
-        ///Note: API renamed, because delete is a reserved word in c++.
-		void deleteDocument(const int32_t docNum);
+	/** Returns an enumeration of all the documents which contain
+	* <code>term</code>. For each document, the document number, the frequency of
+	* the term in that document is also provided, for use in search scoring.
+	* Thus, this method implements the mapping:
+	* <p><ul>
+	* Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
+	* </ul>
+	* <p>The enumeration is ordered by document number.  Each document number
+	* is greater than all that precede it in the enumeration.
+	*/
+	TermDocs* termDocs(Term* term) const;
 
-		///@Deprecated. Use deleteDocument instead.
-		void deleteDoc(const int32_t docNum){ deleteDocument(docNum); }
+	/** Deletes the document numbered <code>docNum</code>.  Once a document is
+	* deleted it will not appear in TermDocs or TermPostitions enumerations.
+	* Attempts to read its field with the {@link #document}
+	* method will result in an error.  The presence of this document may still be
+	* reflected in the {@link #docFreq} statistic, though
+	* this will be corrected eventually as the index is further modified.
+	*/
+	void deleteDocument(const int32_t docNum);
 
-		///Deletes all documents containing term. Returns the number of deleted documents
-		int32_t deleteDocuments(Term* term);
+	///@Deprecated. Use deleteDocument instead.
+	void deleteDoc(const int32_t docNum){ deleteDocument(docNum); }
 
-		///@Deprecated. Use deleteDocuments instead.
-		int32_t deleteTerm(Term* term){ return deleteDocuments(term); }
+	/** Deletes all documents containing <code>term</code>.
+	* This is useful if one uses a document field to hold a unique ID string for
+	* the document.  Then to delete such a document, one merely constructs a
+	* term with the appropriate field and the unique ID string as its text and
+	* passes it to this method.
+	* See {@link #deleteDocument(int)} for information about when this deletion will 
+	* become effective.
+	* @return the number of documents deleted
+	*/
+	int32_t deleteDocuments(Term* term);
 
-		/** 
-		* Closes files associated with this index and also saves any new deletions to disk.
-        * No other methods should be called after this has been called.
-        */
-		void close();
+	///@Deprecated. Use deleteDocuments instead.
+	int32_t deleteTerm(Term* term){ return deleteDocuments(term); }
 
-      ///Checks if the index in the named directory is currently locked.       
-      static bool isLocked(CL_NS(store)::Directory* directory);
+	/** 
+	* Closes files associated with this index and also saves any new deletions to disk.
+    * No other methods should be called after this has been called.
+    */
+	void close();
 
-      ///Checks if the index in the named directory is currently locked.       
-		static bool isLocked(const char* directory);
+	///Checks if the index in the named directory is currently locked.       
+	static bool isLocked(CL_NS(store)::Directory* directory);
+	
+	///Checks if the index in the named directory is currently locked.       
+	static bool isLocked(const char* directory);
 
 
-		///Forcibly unlocks the index in the named directory.
-		///Caution: this should only be used by failure recovery code,
-		///when it is known that no other process nor thread is in fact
-		///currently accessing this index.
-		static void unlock(CL_NS(store)::Directory* directory);
-		static void unlock(const char* path);
+	///Forcibly unlocks the index in the named directory.
+	///Caution: this should only be used by failure recovery code,
+	///when it is known that no other process nor thread is in fact
+	///currently accessing this index.
+	static void unlock(CL_NS(store)::Directory* directory);
+	static void unlock(const char* path);
 
-		 /** Returns the directory this index resides in. */
-		CL_NS(store)::Directory* getDirectory() { return directory; }
+	 /** Returns the directory this index resides in. */
+	CL_NS(store)::Directory* getDirectory() { return directory; }
 
+	/** Returns true if the file is a lucene filename (based on extension or filename) */
+	static bool isLuceneFile(const char* filename);
 
-
 #ifndef LUCENE_HIDE_INTERNAL
-		//this should be protected, but MSVC 6 does not allow access
-		//to these fuctions in the protected classes IndexReaderLockWith
-		//which is wrong, since they themselves are members of the class!!
+	//this should be protected, but MSVC 6 does not allow access
+	//to these fuctions in the protected classes IndexReaderLockWith
+	//which is wrong, since they themselves are members of the class!!
 
-		///for internal use. Public so that lock class can access it
-		SegmentInfos* segmentInfos;
-      
-        /** Internal use. Implements commit. Public so that lock class can access it*/
-        virtual void doCommit() = 0;
+	///for internal use. Public so that lock class can access it
+	SegmentInfos* segmentInfos;
+  
+    /** Internal use. Implements commit. Public so that lock class can access it*/
+    virtual void doCommit() = 0;
 #endif
 
-		/**
-		* For classes that need to know when the IndexReader closes (such as caches, etc),
-		* should pass their callback function to this.
-		*/
-		void addCloseCallback(CloseCallback callback, void* parameter);
+	/**
+	* For classes that need to know when the IndexReader closes (such as caches, etc),
+	* should pass their callback function to this.
+	*/
+	void addCloseCallback(CloseCallback callback, void* parameter);
 
-	  protected:
-		class IndexReaderLockWith:public CL_NS(store)::LuceneLockWith{
-		public:
-			CL_NS(store)::Directory* directory;
-			IndexReader* indexReader;
+protected:
+	class IndexReaderLockWith:public CL_NS(store)::LuceneLockWith{
+	public:
+		CL_NS(store)::Directory* directory;
+		IndexReader* indexReader;
 
-			//Constructor	
-			IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir);
+		//Constructor	
+		IndexReaderLockWith(CL_NS(store)::LuceneLock* lock, CL_NS(store)::Directory* dir);
 
-			//Reads the segmentinfo file and depending on the number of segments found
-			//it returns a MultiReader or a SegmentReader
-			void* doBody();
+		//Reads the segmentinfo file and depending on the number of segments found
+		//it returns a MultiReader or a SegmentReader
+		void* doBody();
 
-		};
+	};
 
-	    class IndexReaderCommitLockWith:public CL_NS(store)::LuceneLockWith{
-	    private:
-		    IndexReader* reader;
-		public:
-    			
-		    //Constructor	
-		    IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r );
-		    void* doBody();
-		};
+    class IndexReaderCommitLockWith:public CL_NS(store)::LuceneLockWith{
+    private:
+	    IndexReader* reader;
+	public:
+			
+	    //Constructor	
+	    IndexReaderCommitLockWith( CL_NS(store)::LuceneLock* lock, IndexReader* r );
+	    void* doBody();
 	};
-	
+};
+
 CL_NS_END
 #endif
 

Modified: trunk/src/CLucene/index/IndexWriter.cpp
===================================================================
--- trunk/src/CLucene/index/IndexWriter.cpp	2006-10-10 21:00:50 UTC (rev 2329)
+++ trunk/src/CLucene/index/IndexWriter.cpp	2006-10-10 21:08:10 UTC (rev 2330)
@@ -21,11 +21,15 @@
 CL_NS_USE(analysis)
 CL_NS_DEF(index)
 
+
+  const char* IndexWriter::WRITE_LOCK_NAME = "write.lock";
+  const char* IndexWriter::COMMIT_LOCK_NAME = "commit.lock";
+
   IndexWriter::IndexWriter(const char* path, Analyzer* a, const bool create, const bool _closeDir):
 		directory( FSDirectory::getDirectory(path, create) ),
 		analyzer(a),
 		segmentInfos (_CLNEW SegmentInfos),
-    closeDir(_closeDir){
+		closeDir(_closeDir){
   //Func - Constructor
   //       Constructs an IndexWriter for the index in path.
   //Pre  - path != NULL and contains a named directory path
@@ -62,11 +66,10 @@
   //Func - Initialises the instances
   //Pre  - create indicates if the indexWriter must create a new index located at path or just open it
   //Post -
-	  maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH;
 
-   similarity = CL_NS(search)::Similarity::getDefault();
+	similarity = CL_NS(search)::Similarity::getDefault();
 
-   useCompoundFile = true;
+	useCompoundFile = true;
 
 	//Create a ramDirectory
 	ramDirectory = _CLNEW TransactionalRAMDirectory;
@@ -75,23 +78,24 @@
 
 	//Initialize the writeLock to
 	writeLock  = NULL;
-	//Initialize the mergeFactor to 10 indicating that a merge will occur after 10 documents
-	//have been added to the index managed by this IndexWriter
-	mergeFactor = 10;
-	//Initialize maxMergeDocs to INT_MAX
-	maxMergeDocs = INT_MAX;
+	
+	//initialise the settings...
+	maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
+	mergeFactor = DEFAULT_MERGE_FACTOR;
+	maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+	writeLockTimeout = WRITE_LOCK_TIMEOUT;
+	commitLockTimeout = COMMIT_LOCK_TIMEOUT;
+	minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;
+	termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
 
-   //initialise to LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS
-   minMergeDocs = LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS;
-
 	//Create a new lock using the name "write.lock"
-	LuceneLock* newLock = directory->makeLock("write.lock");
+	LuceneLock* newLock = directory->makeLock(IndexWriter::WRITE_LOCK_NAME);
 
 	//Condition check to see if newLock has been allocated properly
 	CND_CONDITION(newLock != NULL, "No memory could be allocated for LuceneLock newLock");
 
 	//Try to obtain a write lock
-	if (!newLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)){
+	if (!newLock->obtain(writeLockTimeout)){
 		//Write lock could not be obtained so delete it
 		_CLDELETE(newLock);
 		//Reset the instance
@@ -101,16 +105,15 @@
 	}
 
 	//The Write Lock has been obtained so save it for later use
-	writeLock = newLock;
+	this->writeLock = newLock;
 
 	//Create a new lock using the name "commit.lock"
-	LuceneLock* lock = directory->makeLock("commit.lock");
+	LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
 
 	//Condition check to see if lock has been allocated properly
 	CND_CONDITION(lock != NULL, "No memory could be allocated for LuceneLock lock");
 
-	IndexWriterLockWith with ( lock,LUCENE_WRITE_LOCK_TIMEOUT,this,create );
-
+	LockWith2 with ( lock,commitLockTimeout,this, NULL, create );
 	{
 		SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
 		with.run();
@@ -153,37 +156,6 @@
   }
 
 
-  void* IndexWriterLockWith::doBody() {
-  //Func - Writes segmentInfos to or reads  segmentInfos from disk
-  //Pre  - writer != NULL
-  //Post - if create is true then segementInfos has been written to disk otherwise
-  //       segmentInfos has been read from disk
-
-	  CND_PRECONDITION(writer != NULL, "writer is NULL");
-
-	  if (create)
-		  writer->segmentInfos->write(writer->getDirectory());
-	  else
-		  writer->segmentInfos->read(writer->getDirectory());
-
-	  return NULL;
-  }
-
-  void* IndexWriterLockWith2::doBody(){
-  //Func - Writes the segmentInfos to Disk and deletes unused segments
-  //Pre  - writer != NULL
-  //Post - segmentInfos have been written to disk and unused segments have been deleted
-
-	  CND_PRECONDITION(writer != NULL, "writer is NULL");
-
-	  //commit before deleting
-	  writer->segmentInfos->write(writer->getDirectory());
-	  //delete now-unused segments
-	  writer->deleteSegments(segmentsToDelete);
-
-	  return NULL;
-  }
-
   void IndexWriter::close( ) {
   //Func - Flushes all changes to an index, closes all associated files, and closes
   //       the directory that the index is stored in.
@@ -248,7 +220,7 @@
 
 	if ( analyzer == NULL )
 		analyzer = this->analyzer;
-		
+
 	ramDirectory->transStart();
 	try {
 		char* segmentName = newSegmentName();
@@ -257,7 +229,7 @@
 			//Create the DocumentWriter using a ramDirectory and analyzer
 			// supplied by the IndexWriter (this).
 			DocumentWriter* dw = _CLNEW DocumentWriter(
-				ramDirectory, analyzer, similarity, maxFieldLength );
+				ramDirectory, analyzer, this );
 			CND_CONDITION(dw != NULL, "dw is NULL");
 			try {
 				//Add the client-supplied document to the new segment.
@@ -385,15 +357,18 @@
 	}
   }
 
+  void IndexWriter::mergeSegments(const uint32_t minSegment) {
+    mergeSegments(minSegment, segmentInfos->size());
+  }
 
-  void IndexWriter::mergeSegments(const uint32_t minSegment) {
+  void IndexWriter::mergeSegments(const uint32_t minSegment, const uint32_t end) {
     CLVector<SegmentReader*> segmentsToDelete(false);
     const char* mergedName = newSegmentName();
 #ifdef _CL_DEBUG_INFO
 	fprintf(_CL_DEBUG_INFO, "merging segments\n");
 #endif
-    SegmentMerger merger(directory, mergedName, useCompoundFile);
-    for (int32_t i = minSegment; i < segmentInfos->size(); i++) {
+    SegmentMerger merger(this, mergedName);
+    for (size_t i = minSegment; i < end; i++) {
       SegmentInfo* si = segmentInfos->info(i);
 #ifdef _CL_DEBUG_INFO
 	  fprintf(_CL_DEBUG_INFO, " %s (%d docs)\n",si->name,si->docCount);
@@ -402,7 +377,7 @@
       merger.add(reader);
       if ((reader->getDirectory() == this->directory) || // if we own the directory
 		(reader->getDirectory() == this->ramDirectory)){
-        segmentsToDelete.push_back((SegmentReader*)reader);	  // queue segment for deletion
+        segmentsToDelete.push_back(reader);	  // queue segment for deletion
 	  }
     }
 
@@ -411,76 +386,91 @@
 #ifdef _CL_DEBUG_INFO
 	 fprintf(_CL_DEBUG_INFO,"\n into %s (%d docs)\n",mergedName, mergedDocCount);
 #endif
-	  
-	segmentInfos->clearto(minSegment); // pop old infos & add new
-    segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory));
 
+	segmentInfos->clearto(minSegment);// remove old infos & add new
+    segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory) );
 
     // close readers before we attempt to delete now-obsolete segments
     merger.closeReaders();
 
-    LuceneLock* lock = directory->makeLock("commit.lock");
-    IndexWriterLockWith2 with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,&segmentsToDelete );
+	LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+	LockWith2 with ( lock, commitLockTimeout,this, &segmentsToDelete, true );
 
     {
     	SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
     	with.run();
     }
+    _CLDELETE( lock );
 
-    _CLDELETE( lock );
+
+	
+    if (useCompoundFile) {
+		char cmpdTmpName[CL_MAX_PATH];
+		strcpy(cmpdTmpName,mergedName);
+		strcat(cmpdTmpName,".tmp");
+
+		AStringArrayWithDeletor filesToDelete;
+		merger.createCompoundFile(cmpdTmpName, filesToDelete);
+
+		LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+		LockWithCFS with ( lock,commitLockTimeout,directory, this, mergedName, &filesToDelete);
+		{
+			SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+			with.run();
+		}
+		_CLDELETE(lock);
+    }
+
     _CLDELETE_CaARRAY( mergedName ); //ADD:
   }
 
   void IndexWriter::deleteSegments(CLVector<SegmentReader*>* segments) {
-    AStringArrayConstWithDeletor deletable;
+    AStringArrayWithDeletor deletable;
 
-    AStringArrayConstWithDeletor* deleteArray = readDeleteableFiles();
-    deleteFiles(deleteArray, &deletable); // try to delete deleteable
-    _CLDELETE(deleteArray);
+	{//scope delete deleteArray object
+		AStringArrayWithDeletor deleteArray;
+		readDeleteableFiles(deleteArray);
+		deleteFiles(deleteArray, deletable); // try to delete deleteable
+	}
 
+	AStringArrayWithDeletor files;
     for (uint32_t i = 0; i < segments->size(); i++) {
       SegmentReader* reader = (*segments)[i];
-      AStringArrayConstWithDeletor* files = reader->files();
+      files.clear();
+	  reader->files(files);
       if (reader->getDirectory() == this->directory)
-        deleteFiles(files, &deletable);	  // try to delete our files
+        deleteFiles(files, deletable);	  // try to delete our files
       else
         deleteFiles(files, reader->getDirectory()); // delete, eg, RAM files
-
-      _CLDELETE(files);
     }
 
-    writeDeleteableFiles(&deletable);		  // note files we can't delete
+    writeDeleteableFiles(deletable);		  // note files we can't delete
   }
 
-  AStringArrayConstWithDeletor* IndexWriter::readDeleteableFiles() {
-    AStringArrayConstWithDeletor* result = _CLNEW AStringArrayConstWithDeletor;
-
+  void IndexWriter::readDeleteableFiles(AStringArrayWithDeletor& result) {
     if (!directory->fileExists("deletable"))
-      return result;
+      return;
 
     IndexInput* input = directory->openInput("deletable");
     try {
 		TCHAR tname[CL_MAX_PATH];
 		for (int32_t i = input->readInt(); i > 0; i--){	  // read file names
 			input->readString(tname,CL_MAX_PATH);
-			result->push_back(STRDUP_TtoA(tname));
+			result.push_back(STRDUP_TtoA(tname));
 		}
     } _CLFINALLY(
         input->close();
         _CLDELETE(input);
     );
-
-
-    return result;
   }
 
-  void IndexWriter::writeDeleteableFiles(AStringArrayConstWithDeletor* files) {
+  void IndexWriter::writeDeleteableFiles(AStringArrayWithDeletor& files) {
     IndexOutput* output = directory->createOutput("deleteable.new");
     try {
-      output->writeInt(files->size());
+      output->writeInt(files.size());
 	  TCHAR tfile[CL_MAX_PATH]; //temporary space for tchar file name
-	  for (uint32_t i = 0; i < files->size(); i++){
-		STRCPY_AtoT(tfile,(*files)[i],CL_MAX_PATH);
+	  for (uint32_t i = 0; i < files.size(); i++){
+		STRCPY_AtoT(tfile,files[i],CL_MAX_PATH);
         output->writeString( tfile, _tcslen(tfile) );
 	  }
     } _CLFINALLY(
@@ -491,33 +481,38 @@
     directory->renameFile("deleteable.new", "deletable");
   }
 
-  void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, Directory* directory) {
-	AStringArrayConstWithDeletor::const_iterator itr = files->begin();
-	while ( itr != files->end() ){
-		directory->deleteFile( *itr );
+  void IndexWriter::deleteFiles(AStringArrayWithDeletor& files){
+	AStringArrayWithDeletor deletable;
+	AStringArrayWithDeletor currDeletable;
+	readDeleteableFiles(currDeletable);
+	deleteFiles(currDeletable, deletable); // try to delete deleteable
+	deleteFiles(files, deletable);     // try to delete our files
+	writeDeleteableFiles(deletable);        // note files we can't delete
+  }
+
+  void IndexWriter::deleteFiles(AStringArrayWithDeletor& files, Directory* directory) {
+	AStringArrayWithDeletor::iterator itr = files.begin();
+	while ( itr != files.end() ){
+		directory->deleteFile( *itr, true );
 		++itr;
 	}
   }
 
-  void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, AStringArrayConstWithDeletor* deletable) {
-	  AStringArrayConstWithDeletor::const_iterator itr=files->begin();
-	  while ( itr != files->end() ){
+  void IndexWriter::deleteFiles(AStringArrayWithDeletor& files, AStringArrayWithDeletor& deletable) {
+	  AStringArrayWithDeletor::iterator itr=files.begin();
+	  while ( itr != files.end() ){
 		const char* file = *itr;
-		try {
-			if ( directory->fileExists(file) )
-				directory->deleteFile(file);		  // try to delete each file
-		} catch (CLuceneError& err) {			  // if delete fails
-		    if ( err.number() != CL_ERR_IO )
-		        throw err; //not an IO err... re-throw
-
-			if (directory->fileExists(file)) {
-	#ifdef _CL_DEBUG_INFO
-				fprintf(_CL_DEBUG_INFO,"%s; Will re-try later.\n", err.what());
-	#endif
-			deletable->push_back(STRDUP_AtoA(file));		  // add to deletable
+		if ( getDirectory()->fileExists(file) ){
+			if ( !getDirectory()->deleteFile(file, false) ){
+				if (directory->fileExists(file)) {
+					#ifdef _CL_DEBUG_INFO
+					fprintf(_CL_DEBUG_INFO,"%s; Will re-try later.\n", err.what());
+					#endif
+					deletable.push_back(STRDUP_AtoA(file));		  // add to deletable
+				}
 			}
 		}
-	  ++itr;
+		++itr;
 	 }
   }
 
@@ -537,61 +532,135 @@
 
 	  // start with zero or 1 seg so optimize the current
 	  optimize();
+	  
+	  int32_t start = segmentInfos->size();
 
 	  //Iterate through the directories
-     int32_t i = 0;
+      int32_t i = 0;
 	  while ( dirs[i] != NULL ) {
 		  // DSR: Changed SegmentInfos constructor arg (see bug discussion below).
 		  SegmentInfos sis(false);
 		  sis.read( dirs[i]);
 
 		  for (int32_t j = 0; j < sis.size(); j++) {
-		   /* DSR:CL_BUG:
-		   ** In CLucene 0.8.11, the next call placed a pointer to a SegmentInfo
-		   ** object from stack variable $sis into the vector this->segmentInfos.
-		   ** Then, when the call to optimize() is made just before exiting this
-		   ** function, $sis had already been deallocated (and has deleted its
-		   ** member objects), leaving dangling pointers in this->segmentInfos.
-		   ** I added a SegmentInfos constructor that allowed me to order it not
-		   ** to delete its members, invoked the new constructor form above for
-		   ** $sis, and the problem was solved. */
-		   segmentInfos->add(sis.info(j));	  // add each info
+			segmentInfos->add(sis.info(j));	  // add each info
 		  }
         i++;
 	}
+	
+	// merge newly added segments in log(n) passes
+    while (segmentInfos->size() > start+mergeFactor) {
+      for (int32_t base = start; base < segmentInfos->size(); base++) {
+        int32_t end = min(segmentInfos->size(), base+mergeFactor);
+        if (end-base > 1)
+          mergeSegments(base, end);
+      }
+    }
+
 	optimize();					  // cleanup
   }
 
 
   void IndexWriter::addIndexes(IndexReader** readers){
-	 SCOPED_LOCK_MUTEX(THIS_LOCK)
+	SCOPED_LOCK_MUTEX(THIS_LOCK)
     optimize();					  // start with zero or 1 seg
 
     char* mergedName = newSegmentName();
-    SegmentMerger* merger = _CLNEW SegmentMerger(directory, mergedName, false);
+    SegmentMerger merger(this, mergedName);
 
-    if (segmentInfos->size() == 1)                 // add existing index, if any
-      merger->add(_CLNEW SegmentReader(segmentInfos->info(0)));
+    CLVector<SegmentReader*> segmentsToDelete;
+	SegmentReader* sReader = NULL;
+    if (segmentInfos->size() == 1){ // add existing index, if any
+        sReader = _CLNEW SegmentReader(segmentInfos->info(0));
+		merger.add(sReader);
+        segmentsToDelete.push_back(sReader);   // queue segment for deletion
+    }
 
     int32_t readersLength = 0;
     while ( readers[readersLength] != NULL )
-      merger->add((SegmentReader*) readers[readersLength++]);
+      merger.add(readers[readersLength++]);
 
-    int32_t docCount = merger->merge();                // merge 'em
+    int32_t docCount = merger.merge();                // merge 'em
 
     // pop old infos & add new
 	segmentInfos->clearto(0);
     segmentInfos->add(_CLNEW SegmentInfo(mergedName, docCount, directory));
 
-    LuceneLock* lock = directory->makeLock("commit.lock");
-    IndexWriterLockWith with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,true);
+	if ( sReader != NULL ){
+		sReader->close();
+		_CLDELETE(sReader);
+	}
 
+	LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+    LockWith2 with ( lock,commitLockTimeout,this, &segmentsToDelete, true);
 	{
 		SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
 	   	with.run();
 	}
+    _CLDELETE(lock);
 
-      _CLDELETE(lock);
-   }
+	if (useCompoundFile) {
+		char cmpdTmpName[CL_MAX_PATH];
+		strcpy(cmpdTmpName,mergedName);
+		strcat(cmpdTmpName,".tmp");
 
+		AStringArrayWithDeletor filesToDelete;
+		merger.createCompoundFile(cmpdTmpName, filesToDelete);
+
+		LuceneLock* cfslock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);
+		LockWithCFS with ( lock,commitLockTimeout,directory, this, mergedName, &filesToDelete);
+		{
+			SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
+			with.run();
+		}
+		_CLDELETE(cfslock);
+    }
+  }
+
+
+
+  
+  void* IndexWriter::LockWith2::doBody() {
+  //Func - Writes segmentInfos to or reads  segmentInfos from disk
+  //Pre  - writer != NULL
+  //Post - if create is true then segementInfos has been written to disk otherwise
+  //       segmentInfos has been read from disk
+
+	  CND_PRECONDITION(writer != NULL, "writer is NULL");
+
+	  if (create){
+		  writer->segmentInfos->write(writer->getDirectory());
+		  if ( segmentsToDelete != NULL )
+			writer->deleteSegments(segmentsToDelete);  // delete now-unused segments
+	  }else
+		  writer->segmentInfos->read(writer->getDirectory());
+
+	  return NULL;
+  }
+
+  void* IndexWriter::LockWithCFS::doBody() {
+  //Func - Writes segmentInfos to or reads  segmentInfos from disk
+  //Pre  - writer != NULL
+  //Post - if create is true then segementInfos has been written to disk otherwise
+  //       segmentInfos has been read from disk
+
+		CND_PRECONDITION(directory != NULL, "directory is NULL");
+		CND_PRECONDITION(segName != NULL, "mergedName is NULL");
+
+		char from[CL_MAX_PATH];
+		char nu[CL_MAX_PATH];
+
+		strcpy(from,segName);
+		strcat(from,".tmp");
+		strcpy(nu,segName);
+		strcat(nu,".cfs");
+
+		// make compound file visible for SegmentReaders
+		directory->renameFile(from, nu);
+		// delete now unused files of segment 
+		writer->deleteFiles(*filesToDelete);   
+
+		return NULL;
+  }
+
 CL_NS_END

Modified: trunk/src/CLucene/index/IndexWriter.h
===================================================================
--- trunk/src/CLucene/index/IndexWriter.h	2006-10-10 21:00:50 UTC (rev 2329)
+++ trunk/src/CLucene/index/IndexWriter.h	2006-10-10 21:08:10 UTC (rev 2330)
@@ -21,278 +21,403 @@
 
 CL_NS_DEF(index)
 
-	///	An IndexWriter creates and maintains an index.
-	///
-	///	The third argument to the <a href="#IndexWriter"><b>constructor</b></a>
-	///	determines whether a new index is created, or whether an existing index is
-	///	opened for the addition of new documents.
-	///
-	///	In either case, documents are added with the <a
-	///	href="#addDocument"><b>addDocument</b></a> method.  When finished adding
-	///	documents, <a href="#close"><b>close</b></a> should be called.
-	///
-	///	If an index will not have more documents added for a while and optimal search
-	///	performance is desired, then the <a href="#optimize"><b>optimize</b></a>
-	///	method should be called before the index is closed.
-	class IndexWriter:LUCENE_BASE {
-	private:
-	    // where this index resides
-			CL_NS(store)::Directory* directory;
-			// how to analyze text
-			CL_NS(analysis)::Analyzer* analyzer;
+/**
+An IndexWriter creates and maintains an index.
+
+The third argument to the 
+<a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
+determines whether a new index is created, or whether an existing index is
+opened for the addition of new documents.
+
+In either case, documents are added with the <a
+href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method.  
+When finished adding documents, <a href="#close()"><b>close</b></a> should be called.
+
+<p>If an index will not have more documents added for a while and optimal search
+performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
+method should be called before the index is closed.
+
+<p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open
+another IndexWriter on the same directory will lead to an IOException. The IOException
+is also thrown if an IndexReader on the same directory is used to delete documents
+from the index.
+
+@see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion
+*/
+class IndexWriter:LUCENE_BASE {
+	class LockWith2:public CL_NS(store)::LuceneLockWith{
 	public:
-    // Release the write lock, if needed.
-		SegmentInfos* segmentInfos;
-	private:
-		  bool closeDir;
+		CL_NS(util)::CLVector<SegmentReader*>* segmentsToDelete;
+		IndexWriter* writer;
+		bool create;
+		void* doBody();
+		LockWith2(CL_NS(store)::LuceneLock* lock, int64_t lockWaitTimeout,
+				IndexWriter* wr, 
+				CL_NS(util)::CLVector<SegmentReader*>* std,
+				bool create):
+			CL_NS(store)::LuceneLockWith(lock,lockWaitTimeout)
+		{
+			this->writer = wr;
+			this->segmentsToDelete = std;
+			this->create = create;
+		}
+		~LockWith2(){
+		}
+	};
+	class LockWithCFS:public CL_NS(store)::LuceneLockWith{
+	public:
+		CL_NS(store)::Directory* directory;
+		IndexWriter* writer;
+		const char* segName;
+		CL_NS(util)::AStringArrayWithDeletor* filesToDelete;
+		void* doBody();
+		LockWithCFS(CL_NS(store)::LuceneLock* lock, int64_t lockWaitTimeout, 
+				CL_NS(store)::Directory* dir, 
+				IndexWriter* wr, 
+				const char* segName, 
+				CL_NS(util)::AStringArrayWithDeletor* ftd):
+			CL_NS(store)::LuceneLockWith(lock,lockWaitTimeout)
+		{
+			this->segName = segName;
+			this->directory = dir;
+			this->writer = wr;
+			this->filesToDelete = ftd;
+		}
+		~LockWithCFS(){
+		}
+	};
 
-      bool isOpen; //indicates if the writers is open - this way close can be called multiple times
 
-      CL_NS(search)::Similarity* similarity; // how to normalize
+	bool isOpen; //indicates if the writers is open - this way close can be called multiple times
 
-      /** Use compound file setting. Defaults to true, minimizing the number of
-      * files used.  Setting this to false may improve indexing performance, but
-      * may also cause file handle problems.
-      */
-      bool useCompoundFile;
+	// how to analyze text
+	CL_NS(analysis)::Analyzer* analyzer;
 
-		CL_NS(store)::TransactionalRAMDirectory* ramDirectory; // for temp segs
+	CL_NS(search)::Similarity* similarity; // how to normalize
 
-		CL_NS(store)::LuceneLock* writeLock;
+	/** Use compound file setting. Defaults to true, minimizing the number of
+	* files used.  Setting this to false may improve indexing performance, but
+	* may also cause file handle problems.
+	*/
+	bool useCompoundFile;
+	bool closeDir;
 
-		void _IndexWriter(const bool create);
+	CL_NS(store)::TransactionalRAMDirectory* ramDirec...
 
[truncated message content]