[Clucene-cvs] SF.net SVN: clucene:[2985] branches/lucene2_3_2/src

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2985
          http://clucene.svn.sourceforge.net/clucene/?rev=2985&view=rev
Author:   synhershko
Date:     2009-04-12 13:21:07 +0000 (Sun, 12 Apr 2009)

Log Message:
-----------
Brings 2.3.2 support for TermVectorReader and supporting classes.
Also:
* Updates ObjectArray with a better coding interface and memory management
* Makes several dependent classes to use ObjectArray/ValueArray instead of Array<>
* Updates tests to comply with new Array.h changes

Modified Paths:
--------------
    branches/lucene2_3_2/src/core/CLucene/files_list.txt
    branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp
    branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h
    branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h
    branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp
    branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp
    branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp
    branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp
    branches/lucene2_3_2/src/core/CLucene/index/TermVector.h
    branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/TermVectorWriter.cpp
    branches/lucene2_3_2/src/core/CLucene/index/Terms.h
    branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h
    branches/lucene2_3_2/src/core/CLucene/index/_MultiReader.h
    branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h
    branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h
    branches/lucene2_3_2/src/core/CLucene/search/PhrasePositions.cpp
    branches/lucene2_3_2/src/core/CLucene/search/_PhrasePositions.h
    branches/lucene2_3_2/src/core/CLucene/util/Array.h
    branches/lucene2_3_2/src/core/CMakeLists.txt
    branches/lucene2_3_2/src/test/search/TestTermVector.cpp

Added Paths:
-----------
    branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h
    branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp
    branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h

Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt
===================================================================
(Binary files differ)

Added: branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp
===================================================================

--- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp	                        (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -0,0 +1,86 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+
+#include "CLucene/_ApiHeader.h"
+#include "DefaultSkipListReader.h"
+
+CL_NS_DEF(index)
+
+DefaultSkipListReader::DefaultSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval)
+		: MultiLevelSkipListReader(_skipStream, maxSkipLevels, _skipInterval)
+{
+	freqPointer = _CL_NEWARRAY(int64_t,maxSkipLevels);
+	proxPointer = _CL_NEWARRAY(int64_t,maxSkipLevels);
+	payloadLength = _CL_NEWARRAY(int32_t,maxSkipLevels);
+}
+
+DefaultSkipListReader::~DefaultSkipListReader(){
+	_CLLDELETE(freqPointer);
+	_CLLDELETE(proxPointer);
+	_CLLDELETE(payloadLength);
+}
+
+void DefaultSkipListReader::init(const int64_t _skipPointer, const int64_t freqBasePointer, const int64_t proxBasePointer, const int32_t df, const bool storesPayloads) {
+	MultiLevelSkipListReader::init(_skipPointer, df);
+	this->currentFieldStoresPayloads = storesPayloads;
+	lastFreqPointer = freqBasePointer;
+	lastProxPointer = proxBasePointer;
+
+	for (int32_t j=0; j<numberOfSkipLevels; j++){
+		freqPointer[j] = freqBasePointer;
+		proxPointer[j] = proxBasePointer;
+		payloadLength[j] = 0;
+	}
+}
+
+int64_t DefaultSkipListReader::getFreqPointer() const {
+	return lastFreqPointer;
+}
+int64_t DefaultSkipListReader::getProxPointer() const {
+	return lastProxPointer;
+}
+int32_t DefaultSkipListReader::getPayloadLength() const {
+	return lastPayloadLength;
+}
+
+void DefaultSkipListReader::seekChild(const int32_t level) {
+	//super.seekChild(level);
+	freqPointer[level] = lastFreqPointer;
+	proxPointer[level] = lastProxPointer;
+	payloadLength[level] = lastPayloadLength;
+}
+
+void DefaultSkipListReader::setLastSkipData(const int32_t level) {
+	//super.setLastSkipData(level);
+	lastFreqPointer = freqPointer[level];
+	lastProxPointer = proxPointer[level];
+	lastPayloadLength = payloadLength[level];
+}
+
+int32_t DefaultSkipListReader::readSkipData(const int32_t level, CL_NS(store)::IndexInput* _skipStream) {
+	int32_t delta;
+	if (currentFieldStoresPayloads) {
+		// the current field stores payloads.
+		// if the doc delta is odd then we have
+		// to read the current payload length
+		// because it differs from the length of the
+		// previous payload
+		delta = _skipStream->readVInt();
+		if ((delta & 1) != 0) {
+			payloadLength[level] = _skipStream->readVInt();
+		}
+		delta = (int32_t)(((uint32_t)delta) >> (uint32_t)1);
+	} else {
+		delta = _skipStream->readVInt();
+	}
+	freqPointer[level] += _skipStream->readVInt();
+	proxPointer[level] += _skipStream->readVInt();
+
+	return delta;
+}
+
+CL_NS_END
\ No newline at end of file

Added: branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h	                        (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h	2009-04-12 13:21:07 UTC (rev 2985)
@@ -0,0 +1,57 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_index_DefaultSkipListReader_
+#define _lucene_index_DefaultSkipListReader_
+
+#include "MultiLevelSkipListReader.h"
+
+CL_NS_DEF(index)
+
+/**
+ * Implements the skip list reader for the default posting list format
+ * that stores positions and payloads.
+ *
+ */
+class DefaultSkipListReader: public MultiLevelSkipListReader {
+private:
+	bool currentFieldStoresPayloads;
+	int64_t* freqPointer;
+	int64_t* proxPointer;
+	int32_t* payloadLength;
+  
+	int64_t lastFreqPointer;
+	int64_t lastProxPointer;
+	int32_t lastPayloadLength;
+                           
+public:
+	DefaultSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval);
+	virtual ~DefaultSkipListReader();
+
+	void init(const int64_t _skipPointer, const int64_t freqBasePointer, const int64_t proxBasePointer, const int32_t df, const bool storesPayloads);
+
+	/** Returns the freq pointer of the doc to which the last call of 
+	* {@link MultiLevelSkipListReader#skipTo(int)} has skipped.  */
+	int64_t getFreqPointer() const;
+
+	/** Returns the prox pointer of the doc to which the last call of 
+	* {@link MultiLevelSkipListReader#skipTo(int)} has skipped.  */
+	int64_t getProxPointer() const;
+
+	/** Returns the payload length of the payload stored just before 
+	* the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} 
+	* has skipped.  */
+	int32_t getPayloadLength() const;
+
+protected:
+	void seekChild(const int32_t level);
+  
+	void setLastSkipData(const int32_t level);
+
+	int32_t readSkipData(const int32_t level, CL_NS(store)::IndexInput* _skipStream);
+};
+CL_NS_END
+#endif

Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -35,7 +35,7 @@
 
 /*Posting*/
 
-DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset)
+DocumentWriter::Posting::Posting(Term* t, const int32_t position, TermVectorOffsetInfo* offset):offsets(NULL)
 {
 //Func - Constructor
 //Pre  - t contains a valid reference to a Term
@@ -43,14 +43,18 @@
 	freq = 1;
 	
 	term = _CL_POINTER(t);
-	positions.values = (int32_t*)malloc(sizeof(int32_t));
-	positions.values[0] = position;
-	positions.length = 1;
+	this->positions = _CLNEW ValueArray<int32_t>(1);
+	this->positions->values[0] = position;
+	//positions.values = (int32_t*)malloc(sizeof(int32_t));
+	//positions.values[0] = position;
+	//positions.length = 1;
 	
 	if ( offset != NULL ){
-		this->offsets.values = (TermVectorOffsetInfo*)malloc(sizeof(TermVectorOffsetInfo));
-		this->offsets.values[0] = *offset;
-		this->offsets.length = 1;
+		this->offsets = _CLNEW ObjectArray<TermVectorOffsetInfo>(1);
+		this->offsets->values[0] = offset;
+		//this->offsets.values = (TermVectorOffsetInfo**)malloc(sizeof(TermVectorOffsetInfo));
+		//this->offsets.values[0] = offset;
+		//this->offsets.length = 1;
 	}
 }
 DocumentWriter::Posting::~Posting(){
@@ -58,9 +62,11 @@
 //Pre  - true
 //Post - The instance has been destroyed
 
-	free(this->positions.values);
-	if ( this->offsets.values != NULL )
-		free(this->offsets.values);
+	_CLDELETE_LARRAY(this->positions->values);
+	//_CLLDELETE(this->positions);
+	//_CLLDELETE(this->offsets);
+	if ( this->offsets != NULL )
+		_CLDELETE_LARRAY(this->offsets->values);
 	_CLDECDELETE(this->term);
 }
 
@@ -376,19 +382,19 @@
 	Posting* ti = postingTable->get(termBuffer);
 	if (ti != NULL) {				  // word seen before
 		int32_t freq = ti->freq;
-		if (ti->positions.length == freq) {
+		if (ti->positions->length == freq) {
 		    // positions array is full, realloc its size
-				ti->positions.length = freq*2;
-		    ti->positions.values = (int32_t*)realloc(ti->positions.values, ti->positions.length * sizeof(int32_t));
+				ti->positions->length = freq*2;
+		    ti->positions->values = (int32_t*)realloc(ti->positions->values, ti->positions->length * sizeof(int32_t));
 		}
-		ti->positions.values[freq] = position;		  // add new position
+		ti->positions->values[freq] = position;		  // add new position
 		
 		if (offset != NULL) {
-			if (ti->offsets.length == freq){
-				ti->offsets.length = freq*2;
-				ti->offsets.values = (TermVectorOffsetInfo*)realloc(ti->offsets.values, ti->offsets.length * sizeof(TermVectorOffsetInfo));
+			if (ti->offsets->length == freq){
+				ti->offsets->length = freq*2;
+				ti->offsets->values = (TermVectorOffsetInfo**)realloc(ti->offsets->values, ti->offsets->length * sizeof(TermVectorOffsetInfo));
 			}
-			ti->offsets[freq] = *offset;
+			ti->offsets->values[freq] = offset;
 		}
 
 		ti->freq = freq + 1;			  // update frequency
@@ -490,8 +496,8 @@
 			
 			int32_t lastPosition = 0;			  // write positions
 			for (int32_t j = 0; j < postingFreq; ++j) {		  // use delta-encoding
-				prox->writeVInt(posting->positions.values[j] - lastPosition);
-				lastPosition = posting->positions.values[j];
+				prox->writeVInt(posting->positions->values[j] - lastPosition);
+				lastPosition = posting->positions->values[j];
 			}
 
 	        // check to see if we switched to a new field
@@ -513,7 +519,7 @@
 	           }
 	        }
 	        if (termVectorWriter != NULL && termVectorWriter->isFieldOpen()) {
-	           termVectorWriter->addTerm(posting->term->text(), postingFreq, &posting->positions, &posting->offsets);
+	           termVectorWriter->addTerm(posting->term->text(), postingFreq, posting->positions, posting->offsets);
 	        }
 		}
 	    if (termVectorWriter != NULL)

Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -367,7 +367,7 @@
       return _termPositions;
   }
 
-  bool IndexReader::getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result){
+  bool IndexReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result){
 	  return this->getTermFreqVectors(docNumber, result);
   }
   

Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h	2009-04-12 13:21:07 UTC (rev 2985)
@@ -260,7 +260,7 @@
 	* @throws IOException if index cannot be accessed
 	* @see org.apache.lucene.document.Field.TermVector
 	*/
-	virtual bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::Array<TermFreqVector*>& result) =0;
+	virtual bool getTermFreqVectors(int32_t docNumber, CL_NS(util)::ObjectArray<TermFreqVector>& result) =0;
 	
 	/**
 	*  Return a term frequency vector for the specified document and field. The

Added: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp	                        (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -0,0 +1,227 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/_ApiHeader.h"
+#include "MultiLevelSkipListReader.h"
+
+CL_NS_USE(store)
+CL_NS_DEF(index)
+
+MultiLevelSkipListReader::MultiLevelSkipListReader(IndexInput* _skipStream, const int32_t maxSkipLevels,
+												   const int32_t _skipInterval):
+		numberOfLevelsToBuffer(1),skipStream(NULL),skipPointer(NULL),skipInterval(NULL),
+		numSkipped(NULL),skipDoc(_CL_NEWARRAY(int32_t,maxSkipLevels)),childPointer(NULL)
+{
+	this->skipStream = _CL_NEWARRAY(IndexInput*,maxSkipLevels);
+	this->skipPointer = _CL_NEWARRAY(int64_t,maxSkipLevels);
+	this->childPointer = _CL_NEWARRAY(int64_t,maxSkipLevels);
+	this->numSkipped = _CL_NEWARRAY(int32_t,maxSkipLevels);
+	this->maxNumberOfSkipLevels = maxSkipLevels;
+	this->skipInterval = _CL_NEWARRAY(int32_t,maxSkipLevels);
+	this->skipStream[0] = _skipStream;
+	this->inputIsBuffered = (strcmp(_skipStream->getObjectName(),"BufferedIndexInput") == 0);
+	this->skipInterval[0] = _skipInterval;
+	for (int32_t i = 1; i < maxSkipLevels; i++) {
+		// cache skip intervals
+		this->skipInterval[i] = this->skipInterval[i - 1] * _skipInterval;
+	}
+	memset(skipDoc,0,maxSkipLevels*sizeof(int32_t)); // TODO: artificial init
+}
+MultiLevelSkipListReader::~MultiLevelSkipListReader(){
+	close();
+	_CLDELETE_LARRAY(skipStream);
+	_CLDELETE_LARRAY(skipPointer);
+	_CLDELETE_LARRAY(childPointer);
+	_CLDELETE_LARRAY(numSkipped);
+	_CLDELETE_LARRAY(skipInterval);
+	_CLDELETE_LARRAY(skipDoc);
+}
+
+int32_t MultiLevelSkipListReader::getDoc() const {
+	return lastDoc;
+}
+
+int32_t MultiLevelSkipListReader::skipTo(const int32_t target) {
+	if (!haveSkipped) {
+		// first time, load skip levels
+		loadSkipLevels();
+		haveSkipped = true;
+	}
+
+	// walk up the levels until highest level is found that has a skip
+	// for this target
+	int32_t level = 0;
+	while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
+		level++;
+	}    
+
+	while (level >= 0) {
+		if (target > skipDoc[level]) {
+			if (!loadNextSkip(level)) {
+				continue;
+			}
+		} else {
+			// no more skips on this level, go down one level
+			if (level > 0 && lastChildPointer > skipStream[level - 1]->getFilePointer()) {
+				seekChild(level - 1);
+			} 
+			level--;
+		}
+	}
+
+	return numSkipped[0] - skipInterval[0] - 1;
+}
+
+bool MultiLevelSkipListReader::loadNextSkip(const int32_t level) {
+	// we have to skip, the target document is greater than the current
+	// skip list entry        
+	setLastSkipData(level);
+
+	numSkipped[level] += skipInterval[level];
+
+	if (numSkipped[level] > docCount) {
+		// this skip list is exhausted
+		skipDoc[level] = LUCENE_INT32_MAX_SHOULDBE;
+		if (numberOfSkipLevels > level) numberOfSkipLevels = level; 
+		return false;
+	}
+
+	// read next skip entry
+	skipDoc[level] += readSkipData(level, skipStream[level]);
+
+	if (level != 0) {
+		// read the child pointer if we are not on the leaf level
+		childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1];
+	}
+	return true;
+}
+
+void MultiLevelSkipListReader::seekChild(const int32_t level) {
+	skipStream[level]->seek(lastChildPointer);
+	numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
+	skipDoc[level] = lastDoc;
+	if (level > 0) {
+		childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1];
+	}
+}
+
+void MultiLevelSkipListReader::close() {
+	for (int32_t i = 1; i < maxNumberOfSkipLevels; i++) {
+		if (skipStream[i] != NULL) {
+			//skipStream[i]->close();
+			_CLLDELETE(skipStream[i]);
+		}
+	}
+}
+
+void MultiLevelSkipListReader::init(const int64_t _skipPointer, const int32_t df) {
+	this->skipPointer[0] = _skipPointer;
+	this->docCount = df;
+	for (int32_t j=0; j<numberOfSkipLevels; j++){
+		skipDoc[j] = 0;
+		numSkipped[j] = 0;
+		childPointer[j] = 0;
+	}
+
+	haveSkipped = false;
+	for (int32_t i = 1; i < numberOfSkipLevels; i++) {
+		_CLDELETE(skipStream[i]);
+	}
+}
+
+void MultiLevelSkipListReader::loadSkipLevels() {
+	numberOfSkipLevels = (docCount == 0) ? 0 : (int32_t)floor(log((double)docCount) / log((double)skipInterval[0]));
+	if (numberOfSkipLevels > maxNumberOfSkipLevels) {
+		numberOfSkipLevels = maxNumberOfSkipLevels;
+	}
+
+	skipStream[0]->seek(skipPointer[0]);
+
+	int32_t toBuffer = numberOfLevelsToBuffer;
+
+	for (int32_t i = numberOfSkipLevels - 1; i > 0; i--) {
+		// the length of the current level
+		int64_t length = skipStream[0]->readVLong();
+
+		// the start pointer of the current level
+		skipPointer[i] = skipStream[0]->getFilePointer();
+		if (toBuffer > 0) {
+			// buffer this level
+			skipStream[i] = static_cast<IndexInput*>(_CLNEW SkipBuffer(skipStream[0], (int32_t) length));
+			toBuffer--;
+		} else {
+			// clone this stream, it is already at the start of the current level
+			skipStream[i] = (IndexInput*) skipStream[0]->clone();
+			if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) {
+				((BufferedIndexInput*) skipStream[i])->setBufferSize((int32_t) length);
+			}
+
+			// move base stream beyond the current level
+			skipStream[0]->seek(skipStream[0]->getFilePointer() + length);
+		}
+	}
+
+	// use base stream for the lowest level
+	skipPointer[0] = skipStream[0]->getFilePointer();
+}
+
+void MultiLevelSkipListReader::setLastSkipData(const int32_t level) {
+	lastDoc = skipDoc[level];
+	lastChildPointer = childPointer[level];
+}
+
+MultiLevelSkipListReader::SkipBuffer::SkipBuffer(IndexInput* input, const int32_t _length):pos(0)
+{
+	data = _CL_NEWARRAY(uint8_t,_length);
+	this->_datalength = _length;
+	pointer = input->getFilePointer();
+	input->readBytes(data, _length);
+}
+MultiLevelSkipListReader::SkipBuffer::~SkipBuffer()
+{
+	_CLLDELETE(data);
+}
+
+void MultiLevelSkipListReader::SkipBuffer::close() {
+	_CLDELETE(data);
+	_datalength=0;
+}
+
+int64_t MultiLevelSkipListReader::SkipBuffer::getFilePointer() const {
+	return pointer + pos;
+}
+
+int64_t MultiLevelSkipListReader::SkipBuffer::length() const {
+	return _datalength;
+}
+
+uint8_t MultiLevelSkipListReader::SkipBuffer::readByte() {
+	return data[pos++];
+}
+
+void MultiLevelSkipListReader::SkipBuffer::readBytes(uint8_t* b, const int32_t len) {
+	memcpy(b,data+pos,len);
+	pos += len;
+}
+
+void MultiLevelSkipListReader::SkipBuffer::seek(const int64_t _pos) {
+	this->pos = (int32_t) (_pos - pointer);
+}
+
+const char* MultiLevelSkipListReader::SkipBuffer::getObjectName(){ return "SkipBuffer"; }
+const char* MultiLevelSkipListReader::SkipBuffer::getDirectoryType() const{ return "SKIP"; }
+MultiLevelSkipListReader::SkipBuffer::SkipBuffer(const SkipBuffer& other): IndexInput(other){
+	data = _CL_NEWARRAY(uint8_t,other._datalength);
+	memcpy(data,other.data,other._datalength * sizeof(uint8_t));
+	this->_datalength = other._datalength;
+	this->pointer = other.pointer;
+	this->pos = other.pos;
+}
+IndexInput* MultiLevelSkipListReader::SkipBuffer::clone() const{
+	return _CLNEW SkipBuffer(*this);
+}
+
+CL_NS_END
\ No newline at end of file

Added: branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h	                        (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h	2009-04-12 13:21:07 UTC (rev 2985)
@@ -0,0 +1,132 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+* 
+* Distributable under the terms of either the Apache License (Version 2.0) or 
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_index_MultiLevelSkipListReader_
+#define _lucene_index_MultiLevelSkipListReader_
+
+#include "CLucene/store/IndexInput.h"
+//#include "CLucene/util/Array.h"
+
+CL_NS_DEF(index)
+
+/**
+ * This abstract class reads skip lists with multiple levels.
+ * 
+ * See {@link MultiLevelSkipListWriter} for the information about the encoding 
+ * of the multi level skip lists. 
+ * 
+ * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
+ * which defines the actual format of the skip data.
+ */
+class MultiLevelSkipListReader : LUCENE_BASE {
+private:
+	// the maximum number of skip levels possible for this index
+	int32_t maxNumberOfSkipLevels;
+
+protected:
+	// number of levels in this skip list
+	int32_t numberOfSkipLevels;
+
+private:
+	// Expert: defines the number of top skip levels to buffer in memory.
+	// Reducing this number results in less memory usage, but possibly
+	// slower performance due to more random I/Os.
+	// Please notice that the space each level occupies is limited by
+	// the skipInterval. The top level can not contain more than
+	// skipLevel entries, the second top level can not contain more
+	// than skipLevel^2 entries and so forth.
+	int32_t numberOfLevelsToBuffer;
+
+	int32_t docCount;
+	bool haveSkipped;
+
+	CL_NS(store)::IndexInput** skipStream;		// skipStream for each level
+	int64_t* skipPointer;			// the start pointer of each skip level
+	int32_t* skipInterval;         // skipInterval of each level
+	int32_t* numSkipped;				// number of docs skipped per level
+
+	int32_t* skipDoc;					// doc id of current skip entry per level 
+	int32_t lastDoc;                // doc id of last read skip entry with docId <= target
+	int64_t* childPointer;			// child pointer of current skip entry per level
+	int64_t lastChildPointer;		// childPointer of last read skip entry with docId <= target
+
+	bool inputIsBuffered;
+
+public:
+	MultiLevelSkipListReader(CL_NS(store)::IndexInput* _skipStream, const int32_t maxSkipLevels, const int32_t _skipInterval);
+	virtual ~MultiLevelSkipListReader();
+
+	/** Returns the id of the doc to which the last call of {@link #skipTo(int)}
+	*  has skipped.  */
+	int32_t getDoc() const;
+
+	/** Skips entries to the first beyond the current whose document number is
+	*  greater than or equal to <i>target</i>. Returns the current doc count. 
+	*/
+	int32_t skipTo(const int32_t target);
+
+private:
+	bool loadNextSkip(const int32_t level);
+
+protected:
+	/** Seeks the skip entry on the given level */
+	virtual void seekChild(const int32_t level);
+
+	void close();
+
+	/** initializes the reader */
+	void init(const int64_t _skipPointer, const int32_t df);
+
+private:
+	/** Loads the skip levels  */
+	void loadSkipLevels();
+
+protected:
+	/**
+	* Subclasses must implement the actual skip data encoding in this method.
+	*  
+	* @param level the level skip data shall be read from
+	* @param skipStream the skip stream to read from
+	*/  
+	virtual int32_t readSkipData(const int32_t level, CL_NS(store)::IndexInput* skipStream) = 0;
+
+	/** Copies the values of the last read skip entry on this level */
+	virtual void setLastSkipData(const int32_t level);
+
+protected:
+	/** used to buffer the top skip levels */
+	class SkipBuffer : public CL_NS(store)::IndexInput {
+	private:
+		uint8_t* data;
+		int64_t pointer;
+		int32_t pos;
+		size_t _datalength;
+
+	public:
+		SkipBuffer(CL_NS(store)::IndexInput* input, const int32_t length);
+		virtual ~SkipBuffer();
+
+	private:
+		void close();
+
+		int64_t getFilePointer() const;
+
+		int64_t length() const;
+
+		uint8_t readByte();
+
+		void readBytes(uint8_t* b, const int32_t len);
+
+		void seek(const int64_t _pos);
+
+		SkipBuffer(const SkipBuffer& other);
+		CL_NS(store)::IndexInput* clone() const;
+		const char* getObjectName();
+		const char* getDirectoryType() const;
+	};
+};
+CL_NS_END
+#endif

Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -103,7 +103,7 @@
 	_CLDELETE(internal);
 }
 
-bool MultiReader::getTermFreqVectors(int32_t n, Array<TermFreqVector*>& result){
+bool MultiReader::getTermFreqVectors(int32_t n, ObjectArray<TermFreqVector>& result){
 	int32_t i = readerIndex(n);        // find segment num
 	return subReaders[i]->getTermFreqVectors(n - starts[i], result); // dispatch to segment
 }

Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h	2009-04-12 13:21:07 UTC (rev 2985)
@@ -58,7 +58,7 @@
 	*  in a given vectorized field.
 	*  If no such fields existed, the method returns null.
 	*/
-	bool getTermFreqVectors(int32_t n, CL_NS(util)::Array<TermFreqVector*>& result);
+	bool getTermFreqVectors(int32_t n, CL_NS(util)::ObjectArray<TermFreqVector>& result);
 	TermFreqVector* getTermFreqVector(int32_t n, const TCHAR* field);
 
 

Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -307,9 +307,9 @@
 				if (reader->isDeleted(docNum))
 					continue;
 
-				ObjectArray<TermFreqVector*> tmp;
-				if ( reader->getTermFreqVectors(docNum, (Array<TermFreqVector*>&)tmp) )
-					termVectorsWriter->addAllDocVectors((Array<TermFreqVector*>&)tmp);
+				ObjectArray<TermFreqVector> tmp;
+				if ( reader->getTermFreqVectors(docNum, (ObjectArray<TermFreqVector>&)tmp) )
+					termVectorsWriter->addAllDocVectors((ObjectArray<TermFreqVector>&)tmp);
 				tmp.deleteValues();
 			}
 		}

Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -817,7 +817,7 @@
 		return termVectorsReader->get(docNumber, field);
   }
 
-   bool SegmentReader::getTermFreqVectors(int32_t docNumber, Array<TermFreqVector*>& result) {
+   bool SegmentReader::getTermFreqVectors(int32_t docNumber, ObjectArray<TermFreqVector>& result) {
     if (termVectorsReaderOrig == NULL)
       return false;
     
@@ -825,7 +825,8 @@
     if (termVectorsReader == NULL)
       return false;
     
-    return termVectorsReader->get(docNumber, result);
+    result = (*termVectorsReader->get(docNumber));
+	return true;
   }
 
 CL_NS_END

Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -13,40 +13,15 @@
 
 CL_NS_DEF(index)
 
-  SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent){
-  //Func - Constructor
-  //Pre  - Paren != NULL
-  //Post - The instance has been created
-
-      CND_PRECONDITION(_parent != NULL,"Parent is NULL");
-
-      parent      = _parent;
-      deletedDocs =  parent->deletedDocs;
-
-      _doc         = 0;
-      _freq        = 0;
-	  count =		 0;
-	  df		   = 0;
-
-      skipInterval=0;
-      numSkips=0;
-      skipCount=0;
-      skipStream=NULL;
-      skipDoc=0;
-      freqPointer=0;
-      proxPointer=0;
-      skipPointer=0;
-      haveSkipped=false;
-
-      freqStream  = parent->freqStream->clone();
-      skipInterval = parent->tis->getSkipInterval();
+  SegmentTermDocs::SegmentTermDocs(const SegmentReader* _parent) : parent(_parent),freqStream(_parent->freqStream->clone()),
+		count(0),df(0),deletedDocs(_parent->deletedDocs),_doc(0),_freq(0),skipInterval(_parent->tis->getSkipInterval()),
+		maxSkipLevels(_parent->tis->getMaxSkipLevels()),skipListReader(NULL),freqBasePointer(0),proxBasePointer(0),
+		skipPointer(0),haveSkipped(false)
+	{
+      CND_CONDITION(_parent != NULL,"Parent is NULL");
    }
 
   SegmentTermDocs::~SegmentTermDocs() {
-  //Func - Destructor
-  //Pre  - true
-  //Post - The instance has been destroyed
-
       close();
   }
 
@@ -56,52 +31,47 @@
 
   void SegmentTermDocs::seek(Term* term) {
     TermInfo* ti = parent->tis->get(term);
-    seek(ti);
+    seek(ti, term);
     _CLDELETE(ti);
   }
 
   void SegmentTermDocs::seek(TermEnum* termEnum){
     TermInfo* ti=NULL;
+	Term* term = NULL;
     
     // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
 	if ( termEnum->getObjectName() == SegmentTermEnum::getClassName() && ((SegmentTermEnum*)termEnum)->fieldInfos == parent->fieldInfos ){
-      ti = ((SegmentTermEnum*)termEnum)->getTermInfo();
-    }else{
-      ti = parent->tis->get(termEnum->term(false));
+		SegmentTermEnum* segmentTermEnum = ((SegmentTermEnum*) termEnum);
+		term = segmentTermEnum->term(false);
+		ti = segmentTermEnum->getTermInfo();
+	}else{
+		term = termEnum->term(false);
+		ti = parent->tis->get(term);
     }
     
-    seek(ti);
+    seek(ti,term);
 	_CLDELETE(ti);
   }
-  void SegmentTermDocs::seek(const TermInfo* ti) {
-     count = 0;
-    if (ti == NULL) {
-      df = 0;
-    } else {
-      df = ti->docFreq;
-      _doc = 0;
-      skipDoc = 0;
-      skipCount = 0;
-      numSkips = df / skipInterval;
-      freqPointer = ti->freqPointer;
-      proxPointer = ti->proxPointer;
-      skipPointer = freqPointer + ti->skipOffset;
-      freqStream->seek(freqPointer);
-      haveSkipped = false;
-    }
+  void SegmentTermDocs::seek(const TermInfo* ti,Term* term) {
+	  count = 0;
+	  FieldInfo* fi = parent->fieldInfos->fieldInfo(term->field());
+	  currentFieldStoresPayloads = (fi != NULL) ? fi->storePayloads : false;
+	  if (ti == NULL) {
+		  df = 0;
+	  } else {					// punt case
+		  df = ti->docFreq;
+		  _doc = 0;
+		  freqBasePointer = ti->freqPointer;
+		  proxBasePointer = ti->proxPointer;
+		  skipPointer = freqBasePointer + ti->skipOffset;
+		  freqStream->seek(freqBasePointer);
+		  haveSkipped = false;
+	  }
   }
 
   void SegmentTermDocs::close() {
-
-      //Check if freqStream still exists
-	  if (freqStream != NULL){
-		freqStream->close(); //todo: items like these can probably be delete, because deleting the object also closes it...do everywhere
-		_CLDELETE( freqStream );
-	  }
-     if (skipStream != NULL){
-		skipStream->close();
-		_CLDELETE( skipStream );
-     }
+	  _CLDELETE( freqStream );
+	  _CLDELETE( skipListReader );
   }
 
   int32_t SegmentTermDocs::doc()const { 
@@ -132,76 +102,51 @@
   }
 
   int32_t SegmentTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length) {
-    int32_t i = 0;
-//todo: one optimization would be to get the pointer buffer for ram or mmap dirs 
-//and iterate over them instead of using readByte() intensive functions.
-    while (i<length && count < df) {
-      uint32_t docCode = freqStream->readVInt();
-      _doc += docCode >> 1;
-      if ((docCode & 1) != 0)			  // if low bit is set
-        _freq = 1;				  // _freq is one
-      else
-        _freq = freqStream->readVInt();		  // else read _freq
-      count++;
+	  int32_t i = 0;
+	  //todo: one optimization would be to get the pointer buffer for ram or mmap dirs 
+	  //and iterate over them instead of using readByte() intensive functions.
+	  while (i<length && count < df) {
+		  // manually inlined call to next() for speed
+		  uint32_t docCode = freqStream->readVInt();
+		  _doc += docCode >> 1;
+		  if ((docCode & 1) != 0)			  // if low bit is set
+			  _freq = 1;				  // _freq is one
+		  else
+			  _freq = freqStream->readVInt();		  // else read _freq
+		  count++;
 
-      if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) {
-        docs[i] = _doc;
-        freqs[i] = _freq;
-        i++;
-      }
-    }
-    return i;
+		  if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) {
+			  docs[i] = _doc;
+			  freqs[i] = _freq;
+			  i++;
+		  }
+	  }
+	  return i;
   }
 
   bool SegmentTermDocs::skipTo(const int32_t target){
     assert(count <= df );
     
     if (df >= skipInterval) {                      // optimized case
-      if (skipStream == NULL)
-         skipStream = freqStream->clone(); // lazily clone
+      if (skipListReader == NULL)
+		  skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), maxSkipLevels, skipInterval); // lazily clone
 
-      if (!haveSkipped) {                          // lazily seek skip stream
-        skipStream->seek(skipPointer);
-        haveSkipped = true;
-      }
+	  if (!haveSkipped) {                          // lazily initialize skip stream
+		  skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
+		  haveSkipped = true;
+	  }
 
-      // scan skip data
-      int32_t lastSkipDoc = skipDoc;
-      int64_t lastFreqPointer = freqStream->getFilePointer();
-      int64_t lastProxPointer = -1;
-      int32_t numSkipped = -1 - (count % skipInterval);
+      int32_t newCount = skipListReader->skipTo(target); 
+      if (newCount > count) {
+        freqStream->seek(skipListReader->getFreqPointer());
+        skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength());
+
+        _doc = skipListReader->getDoc();
+        count = newCount;
+      }      
+	}
 
-      while (target > skipDoc) {
-        lastSkipDoc = skipDoc;
-        lastFreqPointer = freqPointer;
-        lastProxPointer = proxPointer;
-        
-        if (skipDoc != 0 && skipDoc >= _doc)
-          numSkipped += skipInterval;
-        
-        if(skipCount >= numSkips)
-          break;
-
-        skipDoc += skipStream->readVInt();
-        freqPointer += skipStream->readVInt();
-        proxPointer += skipStream->readVInt();
-
-        skipCount++;
-      }
-      
-      // if we found something to skip, then skip it
-      if (lastFreqPointer > freqStream->getFilePointer()) {
-        freqStream->seek(lastFreqPointer);
-        skipProx(lastProxPointer);
-
-        _doc = lastSkipDoc;
-        count += numSkipped;
-      }
-
-    }
-
     // done skipping, now just scan
-
     do {
       if (!next())
         return false;

Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -13,25 +13,13 @@
 CL_NS_DEF(index)
 
 SegmentTermPositions::SegmentTermPositions(const SegmentReader* _parent):
-  SegmentTermDocs(_parent){
-//Func - Constructor
-//Pre  - Parent != NULL
-//Post - The instance has been created
-
-    CND_PRECONDITION(_parent != NULL, "Parent is NULL");
-    
-    proxStream = _parent->proxStream->clone();
-    
-    CND_CONDITION(proxStream != NULL,"proxStream is NULL");
-    
-    position  = 0;
-    proxCount = 0;
+	SegmentTermDocs(_parent), proxStream(NULL)// the proxStream will be cloned lazily when nextPosition() is called for the first time
+	,lazySkipPointer(-1), lazySkipProxCount(0)
+{
+    CND_CONDITION(_parent != NULL, "Parent is NULL");
 }
 
 SegmentTermPositions::~SegmentTermPositions() {
-//Func - Destructor
-//Pre  - true
-//Post - The intance has been closed
     close();
 }
 
@@ -42,43 +30,61 @@
     return (TermPositions*) this;
 }
 
-void SegmentTermPositions::seek(const TermInfo* ti) {
-    SegmentTermDocs::seek(ti);
+void SegmentTermPositions::seek(const TermInfo* ti, Term* term) {
+    SegmentTermDocs::seek(ti, term);
     if (ti != NULL)
-    	//lazySkipPointer = ti->proxPointer;
-        proxStream->seek(ti->proxPointer);
+    	lazySkipPointer = ti->proxPointer;
     
-    //lazySkipDocCount = 0;
+    lazySkipProxCount = 0;
     proxCount = 0;
+    payloadLength = 0;
+    needToLoadPayload = false;
 }
 
 void SegmentTermPositions::close() {
-//Func - Frees the resources
-//Pre  - true
-//Post - The resources  have been freed
-
     SegmentTermDocs::close();
     //Check if proxStream still exists
     if(proxStream){
-        proxStream->close();         
+        proxStream->close();
         _CLDELETE( proxStream );
     }
 }
 
 int32_t SegmentTermPositions::nextPosition() {
-    /* DSR:CL_BUG: Should raise exception if proxCount == 0 at the
+    /* todo: DSR:CL_BUG: Should raise exception if proxCount == 0 at the
     ** beginning of this method, as in
     **   if (--proxCount == 0) throw ...;
     ** The JavaDocs for TermPositions.nextPosition declare this constraint,
     ** but CLucene doesn't enforce it. */
-	//lazySkip();
+	lazySkip();
     proxCount--;
-    return position += proxStream->readVInt();
+    return position += readDeltaPosition();
 }
 
+int32_t SegmentTermPositions::readDeltaPosition() {
+	int32_t delta = proxStream->readVInt();
+	if (currentFieldStoresPayloads) {
+		// if the current field stores payloads then
+		// the position delta is shifted one bit to the left.
+		// if the LSB is set, then we have to read the current
+		// payload length
+		if ((delta & 1) != 0) {
+			payloadLength = proxStream->readVInt();
+		} 
+		delta = (int32_t)((uint32_t)delta >> (uint32_t)1);
+		needToLoadPayload = true;
+	}
+	return delta;
+}
+
+void SegmentTermPositions::skippingDoc() {
+	lazySkipProxCount += _freq;
+}
+
 bool SegmentTermPositions::next() {
-    for (int32_t f = proxCount; f > 0; f--)		  // skip unread positions
-        proxStream->readVInt();
+	// we remember to skip the remaining positions of the current
+    // document lazily
+    lazySkipProxCount += proxCount;
     
     if (SegmentTermDocs::next()) {				  // run super
         proxCount = _freq;				  // note frequency
@@ -89,35 +95,78 @@
 }
 
 int32_t SegmentTermPositions::read(int32_t* docs, int32_t* freqs, int32_t length) {
-    _CLTHROWA(CL_ERR_InvalidState,"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
+    _CLTHROWA(CL_ERR_UnsupportedOperation,"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
 }
 
-void SegmentTermPositions::skippingDoc() {
-    for (int32_t f = _freq; f > 0; f--)		  // skip all positions
-        proxStream->readVInt();
-//	lazySkipDocCount += _freq;
+void SegmentTermPositions::skipProx(const int64_t proxPointer, const int32_t _payloadLength){
+    // we save the pointer, we might have to skip there lazily
+    lazySkipPointer = proxPointer;
+    lazySkipProxCount = 0;
+    proxCount = 0;
+    this->payloadLength = _payloadLength;
+    needToLoadPayload = false;
 }
 
-void SegmentTermPositions::skipProx(int64_t proxPointer){
-    proxStream->seek(proxPointer);
-//	lazySkipPointer = proxPointer;
-//	lazySkipDocCount = 0;
-    proxCount = 0;
+void SegmentTermPositions::skipPositions(int32_t n) {
+	for ( int32_t f = n; f > 0; f-- ) {		// skip unread positions
+		readDeltaPosition();
+		skipPayload();
+	}
 }
 
-void SegmentTermPositions::skipPositions(int32_t n) {
-	for ( int32_t f = n; f > 0; f-- )
-		proxStream->readVInt();
+void SegmentTermPositions::skipPayload() {
+	if (needToLoadPayload && payloadLength > 0) {
+		proxStream->seek(proxStream->getFilePointer() + payloadLength);
+	}
+	needToLoadPayload = false;
 }
 
 void SegmentTermPositions::lazySkip() {
-	if ( lazySkipPointer != 0 ) {
-		proxStream->seek( lazySkipPointer );
-		lazySkipPointer = 0;
+    if (proxStream == NULL) {
+      // clone lazily
+      proxStream = parent->proxStream->clone();
+    }
+    
+    // we might have to skip the current payload
+    // if it was not read yet
+    skipPayload();
+      
+    if (lazySkipPointer != -1) {
+      proxStream->seek(lazySkipPointer);
+      lazySkipPointer = -1;
+    }
+     
+    if (lazySkipProxCount != 0) {
+      skipPositions(lazySkipProxCount);
+      lazySkipProxCount = 0;
+    }
+}
+
+int32_t SegmentTermPositions::getPayloadLength() const { return payloadLength; }
+
+uint8_t* SegmentTermPositions::getPayload(uint8_t* data, const int32_t offset) {
+	if (!needToLoadPayload) {
+		_CLTHROWA(CL_ERR_IO, "Payload cannot be loaded more than once for the same term position.");
 	}
-	if ( lazySkipDocCount != 0 ) {
-		skipPositions( lazySkipDocCount );
-		lazySkipDocCount = 0;
+
+	// read payloads lazily
+	uint8_t* retArray;
+	int32_t retOffset;
+	// TODO: Complete length logic ( possibly using ValueArray ? )
+	if (data == NULL /*|| data.length - offset < payloadLength*/) {
+		// the array is too small to store the payload data,
+		// so we allocate a new one
+		_CLDELETE_ARRAY(data);
+		retArray = _CL_NEWARRAY(uint8_t, payloadLength);
+		retOffset = 0;
+	} else {
+		retArray = data;
+		retOffset = offset;
 	}
-}
+	proxStream->readBytes(retArray + retOffset, payloadLength);
+	needToLoadPayload = false;
+	return retArray;
+}
+bool SegmentTermPositions::isPayloadAvailable() const { return needToLoadPayload && (payloadLength > 0); }
+
 CL_NS_END

Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -13,20 +13,18 @@
 CL_NS_USE(util)
 CL_NS_DEF(index)
 
-Array<int32_t> SegmentTermPositionVector::EMPTY_TERM_POS;
+ValueArray<int32_t> SegmentTermPositionVector::EMPTY_TERM_POS;
 
-SegmentTermVector::SegmentTermVector(const TCHAR* field, TCHAR** terms, Array<int32_t>* termFreqs) {
-	this->field = STRDUP_TtoT(field);
-	this->terms = terms;
-	this->termsLen = -1; //lazily get the size of the terms
-	this->termFreqs = termFreqs;
+SegmentTermVector::SegmentTermVector(const TCHAR* _field, TCHAR** _terms, ValueArray<int32_t>* _termFreqs) {
+	this->field = STRDUP_TtoT(_field); // TODO: Try and avoid this dup (using intern'ing perhaps?)
+	this->terms = _terms;
+	this->termsLen = -1; //lazily get the size of the terms array
+	this->termFreqs = _termFreqs;
 }
 
 SegmentTermVector::~SegmentTermVector(){
-  _CLDELETE_CARRAY(field);
-  _CLDELETE_CARRAY_ALL(terms);
-  
-  _CLDELETE_ARRAY(termFreqs->values);
+  _CLDELETE_LCARRAY(field);
+  _CLDELETE_LCARRAY_ALL(terms);
   _CLDELETE(termFreqs);
 }
 TermPositionVector* SegmentTermVector::__asTermPositionVector(){
@@ -34,45 +32,45 @@
 }
 
 const TCHAR* SegmentTermVector::getField() {
-return field;
+	return field;
 }
 
 TCHAR* SegmentTermVector::toString() const{
-StringBuffer sb;
-sb.appendChar('{');
-sb.append(field);
-sb.append(_T(": "));
+	StringBuffer sb;
+	sb.appendChar('{');
+	sb.append(field);
+	sb.append(_T(": "));
 
-int32_t i=0;
-while ( terms && terms[i] != NULL ){
-  if (i>0) 
-	  sb.append(_T(", "));
-  sb.append(terms[i]);
-  sb.appendChar('/');
+	int32_t i=0;
+	while ( terms && terms[i] != NULL ){
+		if (i>0) 
+			sb.append(_T(", "));
+		sb.append(terms[i]);
+		sb.appendChar('/');
 
-  sb.appendInt((*termFreqs)[i]);
+		sb.appendInt((*termFreqs)[i]);
+	}
+	sb.appendChar('}');
+	return sb.toString();
 }
-sb.appendChar('}');
-return sb.toString();
-}
 
 int32_t SegmentTermVector::size() {
-if ( terms == NULL )
-	return 0;
+	if ( terms == NULL )
+		return 0;
 
-if ( termsLen == -1 ){
-	termsLen=0;
-	while ( terms[termsLen] != 0 )
-		termsLen++;
+	if ( termsLen == -1 ){
+		termsLen=0;
+		while ( terms[termsLen] != 0 )
+			termsLen++;
+	}
+	return termsLen;
 }
-return termsLen;
-}
 
 const TCHAR** SegmentTermVector::getTerms() {
 	return (const TCHAR**)terms;
 }
 
-const Array<int32_t>* SegmentTermVector::getTermFrequencies() {
+const ValueArray<int32_t>* SegmentTermVector::getTermFrequencies() {
 	return termFreqs;
 }
 
@@ -103,92 +101,63 @@
 	return res >= 0 ? res : -1;
 }
 
-void SegmentTermVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, Array<int32_t>& ret) {
+ValueArray<int32_t>* SegmentTermVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len) {
 	// TODO: there must be a more efficient way of doing this.
 	//       At least, we could advance the lower bound of the terms array
 	//       as we find valid indexes. Also, it might be possible to leverage
 	//       this even more by starting in the middle of the termNumbers array
 	//       and thus dividing the terms array maybe in half with each found index.
-	ret.length = len;
-	ret.values = _CL_NEWARRAY(int32_t,len);
+	ValueArray<int32_t>* ret = _CLNEW ValueArray<int32_t>(len);
 	for (int32_t i=0; i<len; ++i) {
-	  ret.values[i] = indexOf(termNumbers[start+ i]);
+	  ret->values[i] = indexOf(termNumbers[start+ i]);
 	}
+	return ret;
 }
+void SegmentTermVector::indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, ValueArray<int32_t>& ret){
+	ret = *indexesOf(terms,start,len);
+}
 
 
-
     
-SegmentTermPositionVector::SegmentTermPositionVector(const TCHAR* field, TCHAR** terms, Array<int32_t>* termFreqs, Array< Array<int32_t> >* positions, Array< Array<TermVectorOffsetInfo> >* offsets):
-	SegmentTermVector(field,terms,termFreqs)
+SegmentTermPositionVector::SegmentTermPositionVector(const TCHAR* field, TCHAR** terms, ValueArray<int32_t>* termFreqs, ObjectArray< ValueArray<int32_t> >* _positions, ObjectArray< ObjectArray<TermVectorOffsetInfo> >* _offsets)
+			: SegmentTermVector(field,terms,termFreqs),offsets(_offsets),positions(_positions)
 {
-	this->offsets = offsets;
-	this->positions = positions;
 }
-
-void SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, CL_NS(util)::Array<int32_t>& ret)
-	{ SegmentTermVector::indexesOf(termNumbers, start, len, ret); }
-
-
 SegmentTermPositionVector::~SegmentTermPositionVector(){
-	if ( offsets ){
-		for (size_t i=0;i<offsets->length;i++){
-			if ( offsets->values != NULL ){
-				Array<TermVectorOffsetInfo>& offs = offsets->values[i];
-				for ( size_t j=0;j<offs.length;j++ ){
-					_CLDELETE_ARRAY(offs.values);
-				}
-			}
-		}
-		_CLDELETE_ARRAY(offsets->values);
-		_CLDELETE(offsets);
-	}
-	if ( positions ){
-		for (size_t i=0;i<positions->length;i++){
-			if ( positions->values != NULL ){
-				Array<int32_t>& pos = positions->values[i];
-				for ( size_t j=0;j<pos.length;j++ ){
-					_CLDELETE_ARRAY(pos.values);
-				}
-			}
-		}
-		_CLDELETE_ARRAY(positions->values);
-		_CLDELETE(positions);
-	}
+	_CLLDELETE(offsets);
+	_CLLDELETE(positions);
 }
 
+ValueArray<int32_t>* SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len)
+	{ return SegmentTermVector::indexesOf(termNumbers, start, len); }
+
 TermPositionVector* SegmentTermPositionVector::__asTermPositionVector(){
 	return this;
 }
-/**
-* Returns an array of TermVectorOffsetInfo in which the term is found.
-*
-* @param index The position in the array to get the offsets from
-* @return An array of TermVectorOffsetInfo objects or the empty list
-* @see org.apache.lucene.analysis.Token
-*/
-Array<TermVectorOffsetInfo>* SegmentTermPositionVector::getOffsets(const size_t index) {
+
+ObjectArray<TermVectorOffsetInfo>* SegmentTermPositionVector::getOffsets(const size_t index) {
 	if(offsets == NULL)
 		return NULL;
 	if (index >=0 && index < offsets->length)
-		return &offsets->values[index];
+		return offsets->values[index];
 	else
 		return &TermVectorOffsetInfo::EMPTY_OFFSET_INFO;
 }
 
-/**
-* Returns an array of positions in which the term is found.
-* Terms are identified by the index at which its number appears in the
-* term String array obtained from the <code>indexOf</code> method.
-*/
-Array<int32_t>* SegmentTermPositionVector::getTermPositions(const size_t index) {
+ValueArray<int32_t>* SegmentTermPositionVector::getTermPositions(const size_t index) {
 	if(positions == NULL)
 		return NULL;
 
 	if (index >=0 && index < positions->length)
-		return &positions->values[index];
+		return positions->values[index];
 	else
 		return &EMPTY_TERM_POS;
 }
+
+void SegmentTermPositionVector::indexesOf(const TCHAR** termNumbers, const int32_t start, const int32_t len, CL_NS(util)::ValueArray<int32_t>& ret)
+{
+	ret = *indexesOf(termNumbers,start,len);
+}
+
 CL_NS_END
 

Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVector.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/TermVector.h	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/TermVector.h	2009-04-12 13:21:07 UTC (rev 2985)
@@ -17,10 +17,10 @@
 struct TermVectorOffsetInfo;
 class TermPositionVector;
 
-/** Provides access to stored term vector of 
- *  a document field.  The vector consists of the name of the field, an array of the terms tha occur in the field of the
- * {@link org.apache.lucene.document.Document} and a parallel array of frequencies.  Thus, getTermFrequencies()[5] corresponds with the
- * frequency of getTerms()[5], assuming there are at least 5 terms in the Document.
+/** Provides access to stored term vector of 
+ *  a document field.  The vector consists of the name of the field, an array of the terms tha occur in the field of the
+ * {@link org.apache.lucene.document.Document} and a parallel array of frequencies.  Thus, getTermFrequencies()[5] corresponds with the
+ * frequency of getTerms()[5], assuming there are at least 5 terms in the Document.
  */
 class CLUCENE_EXPORT TermFreqVector:LUCENE_BASE {
 public:
@@ -28,7 +28,7 @@
 	}
 
 	/**
-	* The {@link org.apache.lucene.document.Fieldable} name. 
+	* The Field name. 
 	* @return The name of the field this vector is associated with.
 	* 
 	*/ 
@@ -53,7 +53,7 @@
 	*  The size of the returned array is size()
 	*  @memory Returning a pointer to internal data. Do not delete.
 	*/
-	virtual const CL_NS(util)::Array<int32_t>* getTermFrequencies() = 0;
+	virtual const CL_NS(util)::ValueArray<int32_t>* getTermFrequencies() = 0;
 
 
 	/** Return an index in the term numbers array returned from
@@ -73,7 +73,7 @@
 	*  @param start index in the array where the list of terms starts
 	*  @param len the number of terms in the list
 	*/
-	virtual void indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, CL_NS(util)::Array<int32_t>& ret) = 0;
+	virtual void indexesOf(const TCHAR** terms, const int32_t start, const int32_t len, CL_NS(util)::ValueArray<int32_t>& ret) = 0;
 
 	/** Solve the diamond inheritence problem by providing a reinterpret function.
     *	No dynamic casting is required and no RTTI data is needed to do this
@@ -82,19 +82,45 @@
 };
 
 
-
+/**
+* The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s
+* offset information.  This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the
+* original content).
+*/
 struct CLUCENE_EXPORT TermVectorOffsetInfo {
+public:
+	/**
+	* Convenience declaration when creating a {@link org.apache.lucene.index.TermPositionVector} that stores only position information.
+	*/
+private:
     int startOffset;
     int endOffset;
-public:
-	static CL_NS(util)::Array<TermVectorOffsetInfo> EMPTY_OFFSET_INFO;
+public: // TODO: Remove after TermVectorWriter has been ported
+	static CL_NS(util)::ObjectArray<TermVectorOffsetInfo> EMPTY_OFFSET_INFO;
     TermVectorOffsetInfo();
     ~TermVectorOffsetInfo();
     TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset);
+
+	/**
+	* The accessor for the ending offset for the term
+	* @return The offset
+	*/
     int32_t getEndOffset() const;
-    void setEndOffset(int32_t endOffset);
+    void setEndOffset(const int32_t _endOffset);
+
+	/**
+	* The accessor for the starting offset of the term.
+	*
+	* @return The offset
+	*/
     int32_t getStartOffset() const;
-    void setStartOffset(int32_t startOffset);
+    void setStartOffset(const int32_t _startOffset);
+
+	/**
+	* Two TermVectorOffsetInfos are equals if both the start and end offsets are the same
+	* @param o The comparison Object
+	* @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects.
+	*/
     bool equals(TermVectorOffsetInfo* o);
     size_t hashCode() const;
 };
@@ -112,7 +138,7 @@
      *  term String array obtained from the <code>indexOf</code> method.
      *  May return null if positions have not been stored.
      */
-    virtual CL_NS(util)::Array<int32_t>* getTermPositions(const size_t index) = 0;
+    virtual CL_NS(util)::ValueArray<int32_t>* getTermPositions(const size_t index) = 0;
   
     /**
      * Returns an array of TermVectorOffsetInfo in which the term is found.
@@ -123,7 +149,7 @@
      * @param index The position in the array to get the offsets from
      * @return An array of TermVectorOffsetInfo objects or the empty list
      */ 
-     virtual CL_NS(util)::Array<TermVectorOffsetInfo>* getOffsets(const size_t index) = 0;
+     virtual CL_NS(util)::ObjectArray<TermVectorOffsetInfo>* getOffsets(const size_t index) = 0;
      
      virtual ~TermPositionVector(){
 	 }

Modified: branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp	2009-04-12 12:53:18 UTC (rev 2984)
+++ branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp	2009-04-12 13:21:07 UTC (rev 2985)
@@ -40,14 +40,14 @@
 			tvf = d->openInput(fbuf, readBufferSize);
 			tvfFormat = checkValidFormat(tvf);
 			if (-1 == docStoreOffset) {
-				//this->docStoreOffset = 0;
-				this->_size = static_cast<int32_t>(tvx->length() >> 3);
+				this->docStoreOffset = 0;
+				this->_size = static_cast<int64_t>(tvx->length() >> 3);
 			} else {
 				this->docStoreOffset = docStoreOffset;
 				this->_size = size;
 				// Verify the file is long enough to hold all of our
 				// docs
-				CND_CONDITION( ((int32_t) (tvx->length() / 8)) >= size + docStoreOffset , "file is not ling enought to hold all our docs");
+				CND_CONDITION( ((int64_t) (tvx->length() / 8)) >= size + docStoreOffset , "file is not long enough to hold all of our docs");
 			}
 		}
 
@@ -63,34 +63,6 @@
 			close();
 		}
 	});
-/*
-	char fbuf[CL_MAX_NAME];
-	strcpy(fbuf,segment);
-	char* fpbuf=fbuf+strlen(fbuf);
-
-	strcpy(fpbuf, TermVectorsWriter::LUCENE_TVX_EXTENSION);
-	if (d->fileExists(fbuf)) {
-      tvx = d->openInput(fbuf);
-      checkValidFormat(tvx);
-	  
-	  strcpy(fpbuf, TermVectorsWriter::LUCENE_TVD_EXTENSION);
-	  tvd = d->openInput(fbuf);
-      tvdFormat = checkValidFormat(tvd);
-	  
-	  strcpy(fpbuf, TermVectorsWriter::LUCENE_TVF_EXTENSION);
-	  tvf = d->openInput(fbuf);
-      tvfFormat = checkValidFormat(tvf);
-
-      _size = tvx->length() / 8;
-	}else{
-	  tvx = NULL;
-	  tvd = NULL;
-	  tvf = NULL;
-	  _size = 0;
-	}
-
-    this->fieldInfos = fieldInfos;
-*/
 }
 
 TermVectorsReader::TermVectorsReader(const TermVectorsReader& copy)
@@ -115,16 +87,32 @@
 	close();
 }
 
+int32_t TermVectorsReader::checkValidFormat(CL_NS(store)::IndexInput* in){
+	int32_t format = in->readInt();
+	if (format > TermVectorsWriter::FORMAT_VERSION)
+	{
+		CL_NS(util)::StringBuffer err;
+		err.append(_T("Incompatible format version: "));
+		err.appendInt(format);
+		err.append(_T(" expected "));
+		err.appendInt(TermVectorsWriter::FORMAT_VERSION);
+		err.append(_T(" or less"));
+		_CLTHROWT(CL_ERR_CorruptIndex,err.getBuffer());
+	}
+	return format;
+}
+
 void TermVectorsReader::close(){
-	// why don't we trap the exception and at least make sure that
+	// make all effort to close up. Keep the first exception
+  	// and throw it as a new one.
+	// todo: why don't we trap the exception and at least make sure that
     // all streams that we can close are closed?
 	CLuceneError keep;
 	bool thrown = false;
 
 	if (tvx != NULL){
-		try{
-			tvx->close();
-		}catch(CLuceneError& err){
+		try{tvx->close();}
+		catch(CLuceneError& err){
 			if ( err.number() == CL_ERR_IO ){
 				keep = err;
 				thrown = true;
@@ -134,9 +122,8 @@
 		_CLDELETE(tvx);//delete even  if error thrown
 	}
     if (tvd != NULL){
-		try{
-			tvd->close();
-		}catch(CLuceneError& err){
+		try{tvd->close();}
+		catch(CLuceneError& err){
 			if ( err.number() == CL_ERR_IO ){
 				keep = err;
 				thrown = true;
@@ -146,9 +133,8 @@
 		_CLDELETE(tvd);
 	}
     if (tvf != NULL){
-		try{
-			tvf->close();
-		}catch(CLuceneError& err){
+		try{tvf->close();}
+		catch(CLuceneError& err){
 			if ( err.number() == CL_ERR_IO ){
 				keep = err;
 				thrown = true;
@@ -162,16 +148,18 @@
 		throw keep;
 }
 
-TermFreqVector* TermVectorsReader::get(const int32_t docNum, const TCHAR* field){
-	// Check if no term vectors are available for this segment at all
-    int32_t fieldNumber = fieldInfos->fieldNumber(field);
-    TermFreqVector* result = NULL;
-    if (tvx != NULL) {
+int64_t TermVectorsReader::size() const{
+    return _size;
+}
+
+void TermVectorsReader::get(const int32_t docNum, const TCHAR* field, TermVectorMapper* mapper){
+	if (tvx != NULL) {
+		int32_t fieldNumber = fieldInfos->fieldNumber(field);
 		//We need to account for the FORMAT_SIZE at when seeking in the tvx
 		//We don't need to do this in other seeks because we already have the
 		// file pointer
 		//that was written in another file
-        tvx->seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter::FORMAT_SIZE);
+        tvx->seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
         int64_t position = tvx->readLong();
 
         tvd->seek(position);
@@ -182,10 +170,11 @@
         int32_t number = 0;
         int32_t found = -1;
         for (int32_t i = 0; i < fieldCount; ++i) {
-			if(tvdFormat == TermVectorsWriter::FORMAT_VERSION)
+			if(tvdFormat == FORMAT_VERSION)
 				number = tvd->readVInt();
 			else
 				number += tvd->readVInt();
+
           if (number == fieldNumber) 
 			  found = i;
         }
@@ -195,20 +184,34 @@...
 
[truncated message content]