[Clucene-cvs] SF.net SVN: clucene: [2634] branches/lucene2_3_2/src/CLucene/index

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 2634
          http://clucene.svn.sourceforge.net/clucene/?rev=2634&view=rev
Author:   synhershko
Date:     2008-05-28 13:59:39 -0700 (Wed, 28 May 2008)

Log Message:
-----------
FieldInfo and FieldInfos Payloads support

Modified Paths:
--------------
    branches/lucene2_3_2/src/CLucene/index/FieldInfos.cpp
    branches/lucene2_3_2/src/CLucene/index/FieldInfos.h
    branches/lucene2_3_2/src/CLucene/index/SegmentHeader.h

Modified: branches/lucene2_3_2/src/CLucene/index/FieldInfos.cpp
===================================================================

--- branches/lucene2_3_2/src/CLucene/index/FieldInfos.cpp	2008-05-28 18:53:47 UTC (rev 2633)
+++ branches/lucene2_3_2/src/CLucene/index/FieldInfos.cpp	2008-05-28 20:59:39 UTC (rev 2634)
@@ -26,14 +26,15 @@
 						const bool _storeTermVector,
 						const bool _storeOffsetWithTermVector,
 						const bool _storePositionWithTermVector,
-						const bool _omitNorms):
+						const bool _omitNorms,
+						const bool _storePayloads):
 	name(CLStringIntern::intern(_fieldName CL_FILELINE)),
 	isIndexed(_isIndexed),
 	number(_fieldNumber),
 	storeTermVector(_storeTermVector),
 	storeOffsetWithTermVector(_storeOffsetWithTermVector),
 	storePositionWithTermVector(_storeTermVector),
-	omitNorms(_omitNorms)
+	omitNorms(_omitNorms), storePayloads(_storePayloads)
 {
 }
 
@@ -72,13 +73,13 @@
 	_CLDELETE(fields);
 }
 
-void FieldInfos::add( const TCHAR* name, const bool isIndexed, const bool storeTermVector,
-		bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) {
+FieldInfo* FieldInfos::add( const TCHAR* name, const bool isIndexed, const bool storeTermVector,
+		const bool storePositionWithTermVector, const bool storeOffsetWithTermVector, const bool omitNorms, const bool storePayloads) {
 	FieldInfo* fi = fieldInfo(name);
 	if (fi == NULL) {
-		addInternal(name, isIndexed, storeTermVector, 
+		return addInternal(name, isIndexed, storeTermVector, 
 			storePositionWithTermVector, 
-			storeOffsetWithTermVector, omitNorms);
+			storeOffsetWithTermVector, omitNorms, storePayloads);
 	} else {
 		if (fi->isIndexed != isIndexed) {
 			fi->isIndexed = true;                      // once indexed, always index
@@ -95,17 +96,22 @@
 	    if (fi->omitNorms != omitNorms) {
 	        fi->omitNorms = false;                // once norms are stored, always store
 	    }
+		if (fi->storePayloads != storePayloads) {
+			fi->storePayloads = true;
+		}
 	}
+	return fi;
 }
 
 void FieldInfos::add(const TCHAR** names,const bool isIndexed, const bool storeTermVectors,
-              bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) {
-  int32_t i=0;      
-  while ( names[i] != NULL ){
-     add(names[i], isIndexed, storeTermVectors, storePositionWithTermVector, 
-		 storeOffsetWithTermVector, omitNorms);
-	 ++i;
-  }
+					 const bool storePositionWithTermVector, const bool storeOffsetWithTermVector, const bool omitNorms, const bool storePayloads)
+{
+	int32_t i=0;      
+	while ( names[i] != NULL ){
+		add(names[i], isIndexed, storeTermVectors, storePositionWithTermVector, 
+			storeOffsetWithTermVector, omitNorms, storePayloads);
+		++i;
+	}
 }
 
 int32_t FieldInfos::fieldNumber(const TCHAR* fieldName)const {
@@ -155,6 +161,7 @@
  		if (fi->storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
  		if (fi->storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
  		if (fi->omitNorms) bits |= OMIT_NORMS;
+		if (fi->storePayloads) bits |= STORE_PAYLOADS;
 
 	    output->writeString(fi->name,_tcslen(fi->name));
 	    output->writeByte(bits);
@@ -164,7 +171,7 @@
 void FieldInfos::read(IndexInput* input) {
 	int32_t size = input->readVInt();
     uint8_t bits;
-	bool isIndexed,storeTermVector,storePositionsWithTermVector,storeOffsetWithTermVector,omitNorms;
+	bool isIndexed,storeTermVector,storePositionsWithTermVector,storeOffsetWithTermVector,omitNorms,storePayloads;
 	for (int32_t i = 0; i < size; ++i){
 	    TCHAR* name = input->readString(); //we could read name into a string buffer, but we can't be sure what the maximum field length will be.
 		bits = input->readByte();
@@ -173,17 +180,19 @@
    		storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
    		storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
    		omitNorms = (bits & OMIT_NORMS) != 0;
+		storePayloads = (bits & STORE_PAYLOADS) != 0;
    
-   		addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
+   		addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
    		_CLDELETE_CARRAY(name);
 	}
 }
-void FieldInfos::addInternal( const TCHAR* name, const bool isIndexed, const bool storeTermVector,
-		bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) {
+FieldInfo* FieldInfos::addInternal( const TCHAR* name, const bool isIndexed, const bool storeTermVector,
+		const bool storePositionWithTermVector, const bool storeOffsetWithTermVector, const bool omitNorms, const bool storePayloads) {
 	FieldInfo* fi = _CLNEW FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, 
-		storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
+		storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
 	byNumber.push_back(fi);
 	byName.put( fi->name, fi);
+	return fi;
 }
 
 bool FieldInfos::hasVectors() const{
@@ -193,4 +202,17 @@
 	}
 	return false;
 }
+
+FieldInfos* FieldInfos::clone()
+{
+	FieldInfos* fis = _CLNEW FieldInfos();
+	const size_t numField = byNumber.size();
+	for(size_t i=0;i<numField;i++) {
+		FieldInfo* fi = byNumber[i]->clone();
+		fis->byNumber.push_back(fi);
+		fis->byName.put( fi->name, fi);
+	}
+	return fis;
+}
+
 CL_NS_END

Modified: branches/lucene2_3_2/src/CLucene/index/FieldInfos.h
===================================================================
--- branches/lucene2_3_2/src/CLucene/index/FieldInfos.h	2008-05-28 18:53:47 UTC (rev 2633)
+++ branches/lucene2_3_2/src/CLucene/index/FieldInfos.h	2008-05-28 20:59:39 UTC (rev 2634)
@@ -37,6 +37,8 @@
 
 	bool omitNorms; // omit norms associated with indexed fields
 
+	bool storePayloads; // whether this field stores payloads together with term positions
+
 	//Func - Constructor
 	//       Initialises FieldInfo.
 	//       na holds the name of the field
@@ -55,20 +57,26 @@
 		const bool storeTermVector,
 		const bool storeOffsetWithTermVector,
 		const bool storePositionWithTermVector,
-		const bool omitNorms);
+		const bool omitNorms,
+		const bool storePayloads);
 
     //Func - Destructor
 	//Pre  - true
 	//Post - The instance has been destroyed
 	~FieldInfo();
+
+	FieldInfo* clone() {
+		return _CLNEW FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
+			storeOffsetWithTermVector, omitNorms, storePayloads);
+	}
 };
 
-/** Access to the Field Info file that describes document fields and whether or
-*  not they are indexed. Each segment has a separate Field Info file. Objects
-*  of this class are thread-safe for multiple readers, but only one thread can
-*  be adding documents at a time, with no other reader or writer threads
-*  accessing this object.
-*/
+/** Access to the Fieldable Info file that describes document fields and whether or
+ *  not they are indexed. Each segment has a separate Fieldable Info file. Objects
+ *  of this class are thread-safe for multiple readers, but only one thread can
+ *  be adding documents at a time, with no other reader or writer threads
+ *  accessing this object.
+ */
 class FieldInfos :LUCENE_BASE{
 private:
 	//we now use internd field names, so we can use the voidCompare
@@ -85,7 +93,8 @@
 		STORE_TERMVECTOR = 0x2,
 		STORE_POSITIONS_WITH_TERMVECTOR = 0x4,
 		STORE_OFFSET_WITH_TERMVECTOR = 0x8,
-		OMIT_NORMS = 0x10
+		OMIT_NORMS = 0x10,
+		STORE_PAYLOADS = 0x20
 	};
 
 	FieldInfos();
@@ -127,26 +136,32 @@
 
   	bool hasVectors() const;
 
+	/**
+	* Returns a deep clone of this FieldInfos instance.
+	*/
+	FieldInfos* clone();
+
 	// Adds field info for a Document. 
 	void add(const CL_NS(document)::Document* doc);
 
 	// Merges in information from another FieldInfos. 
 	void add(FieldInfos* other);
 	
-	
-	/** If the field is not yet known, adds it. If it is known, checks to make
-	*  sure that the isIndexed flag is the same as was given previously for this
-	*  field. If not - marks it as being indexed.  Same goes for the TermVector
-	* parameters.
-	* 
-	* @param name The name of the field
-	* @param isIndexed true if the field is indexed
-	* @param storeTermVector true if the term vector should be stored
-	* @param storePositionWithTermVector true if the term vector with positions should be stored
-	* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+	/** If the field is not yet known, adds it. If it is known, checks to make
+	*  sure that the isIndexed flag is the same as was given previously for this
+	*  field. If not - marks it as being indexed.  Same goes for the TermVector
+	* parameters.
+	*
+	* @param name The name of the field
+	* @param isIndexed true if the field is indexed
+	* @param storeTermVector true if the term vector should be stored
+	* @param storePositionWithTermVector true if the term vector with positions should be stored
+	* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+	* @param omitNorms true if the norms for the indexed field should be omitted
+	* @param storePayloads true if payloads should be stored for this field
 	*/
-	void add(const TCHAR* name, const bool isIndexed, const bool storeTermVector=false,
-	          bool storePositionWithTermVector=false, bool storeOffsetWithTermVector=false, bool omitNorms=false);
+	FieldInfo* add(const TCHAR* name, const bool isIndexed, const bool storeTermVector=false,
+	          const bool storePositionWithTermVector=false, const bool storeOffsetWithTermVector=false, const bool omitNorms=false, const bool storePayloads=false);
 	
 	/**
 	* Assumes the fields are not storing term vectors 
@@ -157,15 +172,16 @@
 	* @see #add(String, boolean)
 	*/
 	void add(const TCHAR** names, const bool isIndexed, const bool storeTermVector=false,
-              bool storePositionWithTermVector=false, bool storeOffsetWithTermVector=false, bool omitNorms=false);
+              const bool storePositionWithTermVector=false, const bool storeOffsetWithTermVector=false, const bool omitNorms=false, const bool storePayloads=false);
 
 	void write(CL_NS(store)::Directory* d, const char* name) const;
 	void write(CL_NS(store)::IndexOutput* output) const;
 
 private:
 	void read(CL_NS(store)::IndexInput* input);
-	void addInternal( const TCHAR* name,const bool isIndexed, const bool storeTermVector,
-		const bool storePositionWithTermVector, const bool storeOffsetWithTermVector, const bool omitNorms);
+	// was void
+	FieldInfo* addInternal( const TCHAR* name,const bool isIndexed, const bool storeTermVector,
+		const bool storePositionWithTermVector, const bool storeOffsetWithTermVector, const bool omitNorms, const bool storePayloads);
 
 };
 CL_NS_END

Modified: branches/lucene2_3_2/src/CLucene/index/SegmentHeader.h
===================================================================
--- branches/lucene2_3_2/src/CLucene/index/SegmentHeader.h	2008-05-28 18:53:47 UTC (rev 2633)
+++ branches/lucene2_3_2/src/CLucene/index/SegmentHeader.h	2008-05-28 20:59:39 UTC (rev 2634)
@@ -1,124 +1,124 @@
-/*------------------------------------------------------------------------------
-* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
-* 
-* Distributable under the terms of either the Apache License (Version 2.0) or 
-* the GNU Lesser General Public License, as specified in the COPYING file.
-------------------------------------------------------------------------------*/
-#ifndef _lucene_index_SegmentHeader_
-#define _lucene_index_SegmentHeader_
-
-#if defined(_LUCENE_PRAGMA_ONCE)
-# pragma once
-#endif
-
-#include "SegmentInfos.h"
-#include "CLucene/util/BitSet.h"
-#include "CLucene/util/VoidMap.h"
-#include "Term.h"
-#include "FieldInfos.h"
-#include "FieldsReader.h"
-#include "IndexReader.h"
-#include "TermInfosReader.h"
-#include "CompoundFile.h"
-#include "CLucene/util/ThreadLocal.h"
-
-CL_NS_DEF(index)
-class SegmentReader;
-
-class SegmentTermDocs:public virtual TermDocs {
-	
-	int32_t _doc;
-	
-	int32_t skipInterval;
-	
-	int64_t freqBasePointer;
-	int64_t proxBasePointer;
-	
-	int32_t numSkips;
-	int32_t skipCount;
-	CL_NS(store)::IndexInput* skipStream;
-	int32_t skipDoc;
-	int64_t freqPointer;
-	int64_t proxPointer;
-	int64_t skipPointer;
-	bool haveSkipped;
-	
-protected:
-	// SegmentReader parent
-	const SegmentReader* parent;
-	CL_NS(store)::IndexInput* freqStream;
-	int32_t count;
-	int32_t df;
-	int32_t _freq;
-	CL_NS(util)::BitSet* deletedDocs;
-public:
-    virtual ~SegmentTermDocs();
-
-    virtual void seek(TermEnum* termEnum);
-	virtual void seek(Term* term);
-	virtual void seek(const TermInfo* ti);
-
-	virtual void close();
-	virtual int32_t doc()const;
-	virtual int32_t freq()const;
-
-	virtual bool next();
-
-	/** Optimized implementation. */
-	virtual int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
-
-	/** Optimized implementation. */
-	virtual bool skipTo(const int32_t target);
-	
-	virtual TermPositions* __asTermPositions();
-	
-	///\param Parent must be a segment reader
-	SegmentTermDocs( const SegmentReader* Parent);
-protected:
-	virtual void skippingDoc(){}
-	virtual void skipProx(int64_t proxPointer){}
-};
-
-
-class SegmentTermPositions: public SegmentTermDocs, public TermPositions {
-private:
-	CL_NS(store)::IndexInput* proxStream;
-	int32_t proxCount;
-	int32_t position;
-
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_index_SegmentHeader_
+#define _lucene_index_SegmentHeader_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include "SegmentInfos.h"
+#include "CLucene/util/BitSet.h"
+#include "CLucene/util/VoidMap.h"
+#include "Term.h"
+#include "FieldInfos.h"
+#include "FieldsReader.h"
+#include "IndexReader.h"
+#include "TermInfosReader.h"
+#include "CompoundFile.h"
+#include "CLucene/util/ThreadLocal.h"
+
+CL_NS_DEF(index)
+class SegmentReader;
+
+class SegmentTermDocs:public virtual TermDocs {
+	
+	int32_t _doc;
+	
+	int32_t skipInterval;
+	
+	int64_t freqBasePointer;
+	int64_t proxBasePointer;
+	
+	int32_t numSkips;
+	int32_t skipCount;
+	CL_NS(store)::IndexInput* skipStream;
+	int32_t skipDoc;
+	int64_t freqPointer;
+	int64_t proxPointer;
+	int64_t skipPointer;
+	bool haveSkipped;
+	
+protected:
+	// SegmentReader parent
+	const SegmentReader* parent;
+	CL_NS(store)::IndexInput* freqStream;
+	int32_t count;
+	int32_t df;
+	int32_t _freq;
+	CL_NS(util)::BitSet* deletedDocs;
+public:
+    virtual ~SegmentTermDocs();
+
+    virtual void seek(TermEnum* termEnum);
+	virtual void seek(Term* term);
+	virtual void seek(const TermInfo* ti);
+
+	virtual void close();
+	virtual int32_t doc()const;
+	virtual int32_t freq()const;
+
+	virtual bool next();
+
+	/** Optimized implementation. */
+	virtual int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
+
+	/** Optimized implementation. */
+	virtual bool skipTo(const int32_t target);
+	
+	virtual TermPositions* __asTermPositions();
+	
+	///\param Parent must be a segment reader
+	SegmentTermDocs( const SegmentReader* Parent);
+protected:
+	virtual void skippingDoc(){}
+	virtual void skipProx(int64_t proxPointer){}
+};
+
+
+class SegmentTermPositions: public SegmentTermDocs, public TermPositions {
+private:
+	CL_NS(store)::IndexInput* proxStream;
+	int32_t proxCount;
+	int32_t position;
+
 	// the current payload length
 	int32_t payloadLength;
 	// indicates whether the payload of the currend position has
 	// been read from the proxStream yet
-	bool needToLoadPayload;
-	
-	int64_t lazySkipPointer;
-	int64_t lazySkipDocCount;
-	//int32_t lazySkipProxCount;
-
-	void skipPositions( int32_t n );
-	void lazySkip();
-	
-public:
-	///\param Parent must be a segment reader
-	SegmentTermPositions(const SegmentReader* Parent);
-	~SegmentTermPositions();
-
-    void seek(const TermInfo* ti);
-	void close();
-	int32_t nextPosition();
-	bool next();
-	int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
-	virtual TermDocs* __asTermDocs();
-	virtual TermPositions* __asTermPositions();
-
-    //resolve SegmentTermDocs/TermPositions ambiguity
-	void seek(Term* term){ SegmentTermDocs::seek(term); }
-    void seek(TermEnum* termEnum){ SegmentTermDocs::seek(termEnum); }
-    int32_t doc() const{ return SegmentTermDocs::doc(); }
-	int32_t freq() const{ return SegmentTermDocs::freq(); }
-	bool skipTo(const int32_t target){ return SegmentTermDocs::skipTo(target); }
-
+	bool needToLoadPayload;
+	
+	int64_t lazySkipPointer;
+	int64_t lazySkipDocCount;
+	//int32_t lazySkipProxCount;
+
+	void skipPositions( int32_t n );
+	void lazySkip();
+	
+public:
+	///\param Parent must be a segment reader
+	SegmentTermPositions(const SegmentReader* Parent);
+	~SegmentTermPositions();
+
+    void seek(const TermInfo* ti);
+	void close();
+	int32_t nextPosition();
+	bool next();
+	int32_t read(int32_t* docs, int32_t* freqs, int32_t length);
+	virtual TermDocs* __asTermDocs();
+	virtual TermPositions* __asTermPositions();
+
+    //resolve SegmentTermDocs/TermPositions ambiguity
+	void seek(Term* term){ SegmentTermDocs::seek(term); }
+    void seek(TermEnum* termEnum){ SegmentTermDocs::seek(termEnum); }
+    int32_t doc() const{ return SegmentTermDocs::doc(); }
+	int32_t freq() const{ return SegmentTermDocs::freq(); }
+	bool skipTo(const int32_t target){ return SegmentTermDocs::skipTo(target); }
+
 	int32_t getPayloadLength() const {
 		return payloadLength;
 	}
@@ -145,217 +145,217 @@
 		proxStream->readBytes(retArray, retOffset/*, payloadLength*/);
 		needToLoadPayload = false;
 		return retArray;
-	}
-
+	}
+
 	bool isPayloadAvailable() const {
 		return needToLoadPayload && (payloadLength > 0);
-	}
-
-protected:
-	void skippingDoc();
-	/** Called by super.skipTo(). */
-	void skipProx(int64_t proxPointer);
-};
-
-
-
-
-/**
-* An IndexReader responsible for reading 1 segment of an index
-*/
-class SegmentReader: public IndexReader{
-	/**
-	* The class Norm represents the normalizations for a field.
-	* These normalizations are read from an IndexInput in into an array of bytes called bytes
-	*/
-	class Norm :LUCENE_BASE{
-		int32_t number;
-		int64_t normSeek;
-		SegmentReader* reader;
-		const char* segment; ///< pointer to segment name
-	public:
-		CL_NS(store)::IndexInput* in;
-		uint8_t* bytes;
-		bool dirty;
-		//Constructor
-		Norm(CL_NS(store)::IndexInput* instrm, int32_t number, SegmentReader* reader, const char* segment);
-		Norm(CL_NS(store)::IndexInput* instrm, int32_t number, int64_t normSeek, SegmentReader* reader, const char* segment);
-		//Destructor
-		~Norm();
-
-		void reWrite();
-	};
-	friend class SegmentReader::Norm;
-
-	//Holds the name of the segment that is being read
-	const char* segment;
-	
-	//Indicates if there are documents marked as deleted
-	bool deletedDocsDirty;
-	bool normsDirty;
-	bool undeleteAll;
-
-	//Holds all norms for all fields in the segment
-	typedef CL_NS(util)::CLHashtable<const TCHAR*,Norm*,CL_NS(util)::Compare::TChar, CL_NS(util)::Equals::TChar> NormsType;
-    NormsType _norms; 
-    
-	uint8_t* ones;
-	uint8_t* fakeNorms();
-
-	uint8_t hasSingleNorm;
-	CL_NS(store)::IndexInput* singleNormStream;
-	
-	// Compound File Reader when based on a compound file segment
-	CompoundFileReader* cfsReader;
-	///Reads the Field Info file
-	FieldsReader* fieldsReader;
-	TermVectorsReader* termVectorsReaderOrig;
-	CL_NS(util)::ThreadLocal<TermVectorsReader*,
-		CL_NS(util)::Deletor::Object<TermVectorsReader> >termVectorsLocal;
-
-	void initialize(SegmentInfo* si);
-
-	/**
-	* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
-	* @return TermVectorsReader
-	*/
-	TermVectorsReader* getTermVectorsReader();
-	
-protected:
-	///Marks document docNum as deleted
-	void doDelete(const int32_t docNum);
-	void doUndeleteAll();
-	void doCommit();
-	void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value);
-
-	// can return null if norms aren't stored
-	uint8_t* getNorms(const TCHAR* field);
-  
-public:
-	/**
-	Func - Constructor.
-	Opens all files of a segment
-	.fnm     -> Field Info File
-				Field names are stored in the field info file, with suffix .fnm.
-	.frq     -> Frequency File
-				The .frq file contains the lists of documents which contain 
-				each term, along with the frequency of the term in that document.
-	.prx     -> Prox File
-				The prox file contains the lists of positions that each term occurs
-				at within documents.
-	.tis     -> Term Info File
-				This file is sorted by Term. Terms are ordered first lexicographically 
-				by the term's field name, and within that lexicographically by the term's text.
-	.del     -> Deletion File
-				The .del file is optional, and only exists when a segment contains deletions
-	.f[0-9]* -> Norm File
-				Contains s, for each document, a byte that encodes a value that is 
-				multiplied into the score for hits on that field:
-	*/
-	SegmentReader(SegmentInfo* si);
-
-	SegmentReader(SegmentInfos* sis, SegmentInfo* si);
-	///Destructor.
-	virtual ~SegmentReader();
-
-	///Closes all streams to the files of a single segment
-	void doClose();
-
-	///Checks if a segment managed by SegmentInfo si has deletions
-	static bool hasDeletions(const SegmentInfo* si);
-    bool hasDeletions() const;
-	bool hasNorms(const TCHAR* field) const;
-
-	///Returns all file names managed by this SegmentReader
-	void files(CL_NS(util)::AStringArrayWithDeletor& retarray);
-	///Returns an enumeration of all the Terms and TermInfos in the set.
-	TermEnum* terms() const;
-	///Returns an enumeration of terms starting at or after the named term t
-	TermEnum* terms(const Term* t) const;
-
-	///Gets the document identified by n
-	bool document(int32_t n, CL_NS(document)::Document* doc);
-
-	///Checks if the n-th document has been marked deleted
-	bool isDeleted(const int32_t n);
-
-	///Returns an unpositioned TermDocs enumerator.
-	TermDocs* termDocs() const;
-	///Returns an unpositioned TermPositions enumerator.
-	TermPositions* termPositions() const;
-
-	///Returns the number of documents which contain the term t
-	int32_t docFreq(const Term* t) const;
-
-	///Returns the actual number of documents in the segment
-	int32_t numDocs();
-	///Returns the number of  all the documents in the segment including the ones that have
-	///been marked deleted
-	int32_t maxDoc() const;
-
-    ///Returns the bytes array that holds the norms of a named field.
-	///Returns fake norms if norms aren't available
-    uint8_t* norms(const TCHAR* field);
-	
-    ///Reads the Norms for field from disk
-	void norms(const TCHAR* field, uint8_t* bytes);
-	
-	///concatenating segment with ext and x
-	char* SegmentName(const char* ext, const int32_t x=-1);
-    ///Creates a filename in buffer by concatenating segment with ext and x
-	void SegmentName(char* buffer,int32_t bufferLen,const char* ext, const int32_t x=-1 );
-
-	/**
-	* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
-	*/
-	void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
-    
-    static bool usesCompoundFile(SegmentInfo* si);
-
-	/** Return a term frequency vector for the specified document and field. The
-	*  vector returned contains term numbers and frequencies for all terms in
-	*  the specified field of this document, if the field had storeTermVector
-	*  flag set.  If the flag was not set, the method returns null.
-	* @throws IOException
-	*/
-    TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field=NULL);
-
-	/** Return an array of term frequency vectors for the specified document.
-	*  The array contains a vector for each vectorized field in the document.
-	*  Each vector vector contains term numbers and frequencies for all terms
-	*  in a given vectorized field.
-	*  If no such fields existed, the method returns null.
-	* @throws IOException
-	*/
-	bool getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result);
-private:
-	//Open all norms files for all fields
-	void openNorms(CL_NS(store)::Directory* cfsDir);
-	//Closes all norms files
-	void closeNorms();
-	
-	///a bitVector that manages which documents have been deleted
-	CL_NS(util)::BitSet* deletedDocs;
-	///an IndexInput to the frequency file
-	CL_NS(store)::IndexInput* freqStream;
-	///For reading the fieldInfos file
-	FieldInfos* fieldInfos;
-    ///For reading the Term Dictionary .tis file
-	TermInfosReader* tis;
-	///an IndexInput to the prox file
-	CL_NS(store)::IndexInput* proxStream;\
-
-    static bool hasSeparateNorms(SegmentInfo* si);
-	static uint8_t* createFakeNorms(int32_t size);
-
-    //allow various classes to access the internals of this. this allows us to have
-    //a more tight idea of the package
-    friend class IndexReader;
-    friend class IndexWriter;
-    friend class SegmentTermDocs;
-    friend class SegmentTermPositions;
-    friend class MultiReader;
-};
-
-CL_NS_END
-#endif
+	}
+
+protected:
+	void skippingDoc();
+	/** Called by super.skipTo(). */
+	void skipProx(int64_t proxPointer);
+};
+
+
+
+
+/**
+* An IndexReader responsible for reading 1 segment of an index
+*/
+class SegmentReader: public IndexReader{
+	/**
+	* The class Norm represents the normalizations for a field.
+	* These normalizations are read from an IndexInput in into an array of bytes called bytes
+	*/
+	class Norm :LUCENE_BASE{
+		int32_t number;
+		int64_t normSeek;
+		SegmentReader* reader;
+		const char* segment; ///< pointer to segment name
+	public:
+		CL_NS(store)::IndexInput* in;
+		uint8_t* bytes;
+		bool dirty;
+		//Constructor
+		Norm(CL_NS(store)::IndexInput* instrm, int32_t number, SegmentReader* reader, const char* segment);
+		Norm(CL_NS(store)::IndexInput* instrm, int32_t number, int64_t normSeek, SegmentReader* reader, const char* segment);
+		//Destructor
+		~Norm();
+
+		void reWrite();
+	};
+	friend class SegmentReader::Norm;
+
+	//Holds the name of the segment that is being read
+	const char* segment;
+	
+	//Indicates if there are documents marked as deleted
+	bool deletedDocsDirty;
+	bool normsDirty;
+	bool undeleteAll;
+
+	//Holds all norms for all fields in the segment
+	typedef CL_NS(util)::CLHashtable<const TCHAR*,Norm*,CL_NS(util)::Compare::TChar, CL_NS(util)::Equals::TChar> NormsType;
+    NormsType _norms; 
+    
+	uint8_t* ones;
+	uint8_t* fakeNorms();
+
+	uint8_t hasSingleNorm;
+	CL_NS(store)::IndexInput* singleNormStream;
+	
+	// Compound File Reader when based on a compound file segment
+	CompoundFileReader* cfsReader;
+	///Reads the Field Info file
+	FieldsReader* fieldsReader;
+	TermVectorsReader* termVectorsReaderOrig;
+	CL_NS(util)::ThreadLocal<TermVectorsReader*,
+		CL_NS(util)::Deletor::Object<TermVectorsReader> >termVectorsLocal;
+
+	void initialize(SegmentInfo* si);
+
+	/**
+	* Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
+	* @return TermVectorsReader
+	*/
+	TermVectorsReader* getTermVectorsReader();
+	
+protected:
+	///Marks document docNum as deleted
+	void doDelete(const int32_t docNum);
+	void doUndeleteAll();
+	void doCommit();
+	void doSetNorm(int32_t doc, const TCHAR* field, uint8_t value);
+
+	// can return null if norms aren't stored
+	uint8_t* getNorms(const TCHAR* field);
+  
+public:
+	/**
+	Func - Constructor.
+	Opens all files of a segment
+	.fnm     -> Field Info File
+				Field names are stored in the field info file, with suffix .fnm.
+	.frq     -> Frequency File
+				The .frq file contains the lists of documents which contain 
+				each term, along with the frequency of the term in that document.
+	.prx     -> Prox File
+				The prox file contains the lists of positions that each term occurs
+				at within documents.
+	.tis     -> Term Info File
+				This file is sorted by Term. Terms are ordered first lexicographically 
+				by the term's field name, and within that lexicographically by the term's text.
+	.del     -> Deletion File
+				The .del file is optional, and only exists when a segment contains deletions
+	.f[0-9]* -> Norm File
+				Contains s, for each document, a byte that encodes a value that is 
+				multiplied into the score for hits on that field:
+	*/
+	SegmentReader(SegmentInfo* si);
+
+	SegmentReader(SegmentInfos* sis, SegmentInfo* si);
+	///Destructor.
+	virtual ~SegmentReader();
+
+	///Closes all streams to the files of a single segment
+	void doClose();
+
+	///Checks if a segment managed by SegmentInfo si has deletions
+	static bool hasDeletions(const SegmentInfo* si);
+    bool hasDeletions() const;
+	bool hasNorms(const TCHAR* field) const;
+
+	///Returns all file names managed by this SegmentReader
+	void files(CL_NS(util)::AStringArrayWithDeletor& retarray);
+	///Returns an enumeration of all the Terms and TermInfos in the set.
+	TermEnum* terms() const;
+	///Returns an enumeration of terms starting at or after the named term t
+	TermEnum* terms(const Term* t) const;
+
+	///Gets the document identified by n
+	bool document(int32_t n, CL_NS(document)::Document* doc);
+
+	///Checks if the n-th document has been marked deleted
+	bool isDeleted(const int32_t n);
+
+	///Returns an unpositioned TermDocs enumerator.
+	TermDocs* termDocs() const;
+	///Returns an unpositioned TermPositions enumerator.
+	TermPositions* termPositions() const;
+
+	///Returns the number of documents which contain the term t
+	int32_t docFreq(const Term* t) const;
+
+	///Returns the actual number of documents in the segment
+	int32_t numDocs();
+	///Returns the number of  all the documents in the segment including the ones that have
+	///been marked deleted
+	int32_t maxDoc() const;
+
+    ///Returns the bytes array that holds the norms of a named field.
+	///Returns fake norms if norms aren't available
+    uint8_t* norms(const TCHAR* field);
+	
+    ///Reads the Norms for field from disk
+	void norms(const TCHAR* field, uint8_t* bytes);
+	
+	///concatenating segment with ext and x
+	char* SegmentName(const char* ext, const int32_t x=-1);
+    ///Creates a filename in buffer by concatenating segment with ext and x
+	void SegmentName(char* buffer,int32_t bufferLen,const char* ext, const int32_t x=-1 );
+
+	/**
+	* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
+	*/
+	void getFieldNames(FieldOption fldOption, CL_NS(util)::StringArrayWithDeletor& retarray);
+    
+    static bool usesCompoundFile(SegmentInfo* si);
+
+	/** Return a term frequency vector for the specified document and field. The
+	*  vector returned contains term numbers and frequencies for all terms in
+	*  the specified field of this document, if the field had storeTermVector
+	*  flag set.  If the flag was not set, the method returns null.
+	* @throws IOException
+	*/
+    TermFreqVector* getTermFreqVector(int32_t docNumber, const TCHAR* field=NULL);
+
+	/** Return an array of term frequency vectors for the specified document.
+	*  The array contains a vector for each vectorized field in the document.
+	*  Each vector vector contains term numbers and frequencies for all terms
+	*  in a given vectorized field.
+	*  If no such fields existed, the method returns null.
+	* @throws IOException
+	*/
+	bool getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result);
+private:
+	//Open all norms files for all fields
+	void openNorms(CL_NS(store)::Directory* cfsDir);
+	//Closes all norms files
+	void closeNorms();
+	
+	///a bitVector that manages which documents have been deleted
+	CL_NS(util)::BitSet* deletedDocs;
+	///an IndexInput to the frequency file
+	CL_NS(store)::IndexInput* freqStream;
+	///For reading the fieldInfos file
+	FieldInfos* fieldInfos;
+    ///For reading the Term Dictionary .tis file
+	TermInfosReader* tis;
+	///an IndexInput to the prox file
+	CL_NS(store)::IndexInput* proxStream;\
+
+    static bool hasSeparateNorms(SegmentInfo* si);
+	static uint8_t* createFakeNorms(int32_t size);
+
+    //allow various classes to access the internals of this. this allows us to have
+    //a more tight idea of the package
+    friend class IndexReader;
+    friend class IndexWriter;
+    friend class SegmentTermDocs;
+    friend class SegmentTermPositions;
+    friend class MultiReader;
+};
+
+CL_NS_END
+#endif


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.