[Clucene-cvs] SF.net SVN: clucene:[2936] branches/lucene2_3_2/src/core/CLucene

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2936
          http://clucene.svn.sourceforge.net/clucene/?rev=2936&view=rev
Author:   synhershko
Date:     2008-10-10 10:47:49 +0000 (Fri, 10 Oct 2008)

Log Message:
-----------
Misc cleanups and some porting progress

Modified Paths:
--------------
    branches/lucene2_3_2/src/core/CLucene/files_list.txt
    branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp
    branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h
    branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp
    branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h
    branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp
    branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h
    branches/lucene2_3_2/src/core/CLucene/search/Sort.h
    branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp

Modified: branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp
===================================================================

--- branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp	2008-10-10 10:47:49 UTC (rev 2936)
@@ -57,7 +57,7 @@
 	}
 
 public:
-	CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length);
+	CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length, const int32_t readBufferSize = CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
 	CSIndexInput(const CSIndexInput& clone);
 	~CSIndexInput();
 
@@ -83,7 +83,7 @@
 };
 
 
-CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length){
+CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length, const int32_t _readBufferSize):BufferedIndexInput(_readBufferSize){
    this->base = base;
    this->fileOffset = fileOffset;
    this->_length = length;
@@ -97,7 +97,7 @@
    if(start + len > _length)
       _CLTHROWA(CL_ERR_IO,"read past EOF");
    base->seek(fileOffset + start);
-   base->readBytes(b, len);
+   base->readBytes(b, len /*todo: , false*/);
 }
 CSIndexInput::~CSIndexInput(){
 }
@@ -116,16 +116,17 @@
 
 
 
-CompoundFileReader::CompoundFileReader(Directory* dir, char* name):
+CompoundFileReader::CompoundFileReader(Directory* dir, char* name, int32_t _readBufferSize):
 	entries(_CLNEW EntriesType(true,true))
 {
    directory = dir;
    fileName = STRDUP_AtoA(name);
+   readBufferSize = _readBufferSize;
 
    bool success = false;
 
    try {
-      stream = dir->openInput(name);
+      stream = dir->openInput(name, readBufferSize);
 
       // read the directory and init files
       int32_t count = stream->readVInt();
@@ -192,24 +193,26 @@
 }
 
 bool CompoundFileReader::openInput(const char * id, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize){
-	SCOPED_LOCK_MUTEX(THIS_LOCK)
+	SCOPED_LOCK_MUTEX(THIS_LOCK);
 
 	if (stream == NULL){
-      error.set(CL_ERR_IO,"Stream closed");
-	  return false;
+		error.set(CL_ERR_IO,"Stream closed");
+		return false;
 	}
-	 
-  const ReaderFileEntry* entry = entries->get(id);
-  if (entry == NULL){
-      char buf[CL_MAX_PATH+30];
-      strcpy(buf,"No sub-file with id ");
-      strncat(buf,id,CL_MAX_PATH);
-      strcat(buf," found");
-      error.set(CL_ERR_IO,buf);
-	  return false;
-  }
-  ret = _CLNEW CSIndexInput(stream, entry->offset, entry->length);
-  return true;
+
+	const ReaderFileEntry* entry = entries->get(id);
+	if (entry == NULL){
+		char buf[CL_MAX_PATH+26];
+		cl_sprintf(buf, CL_MAX_PATH+26, "No sub-file with id %s found", id);
+		error.set(CL_ERR_IO,buf);
+		return false;
+	}
+
+	if (bufferSize < 1)
+		bufferSize = readBufferSize;
+
+	ret = _CLNEW CSIndexInput(stream, entry->offset, entry->length, bufferSize);
+	return true;
 }
 
 void CompoundFileReader::list(vector<string>* names) const{

Modified: branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h	2008-10-10 10:47:49 UTC (rev 2936)
@@ -11,6 +11,7 @@
 CL_CLASS_DEF(store,Lock)
 #include "CLucene/util/VoidMapSetDefinitions.h"
 #include "CLucene/store/Directory.h"
+#include "CLucene/store/IndexInput.h"
 
 CL_NS_DEF(index)
 
@@ -25,6 +26,8 @@
  */
 class CompoundFileReader: public CL_NS(store)::Directory {
 private:
+    int32_t readBufferSize;
+
 	// Base info
 	CL_NS(store)::Directory* directory;
 	char* fileName;
@@ -42,13 +45,13 @@
 	bool doDeleteFile(const char* name);
 
 public:
-	CompoundFileReader(CL_NS(store)::Directory* dir, char* name);
+	CompoundFileReader(CL_NS(store)::Directory* dir, char* name, int32_t _readBufferSize=CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
 	~CompoundFileReader();
 	CL_NS(store)::Directory* getDirectory();
 	const char* getName() const;
 
 	void close();
-	bool openInput(const char * name, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize=1);
+	bool openInput(const char * name, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize=0);
 
 	/** Returns an array of strings, one for each file in the directory-> */
 	void list(std::vector<std::string>* names) const;

Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp	2008-10-10 10:47:49 UTC (rev 2936)
@@ -362,4 +362,52 @@
 	  return ret;
   }
 
+  /*
+  Query* FuzzyQuery::rewrite(IndexReader* reader) {
+	  FilteredTermEnum* enumerator = getEnum(reader);
+	  int32_t maxClauseCount = BooleanQuery::getMaxClauseCount();
+	  ScoreTermQueue* stQueue = _CLNEW ScoreTermQueue(maxClauseCount);
+	  ScoreTerm* reusableST = NULL;
+
+	  try {
+		  do {
+			  float_t score = 0.0f;
+			  Term* t = enumerator->term();
+			  if (t != null) {
+				  score = enumerator->difference();
+				  if (reusableST == NULL) {
+					  reusableST = _CLNEW ScoreTerm(t, score);
+				  } else if (score >= reusableST->score) {
+					  // reusableST holds the last "rejected" entry, so, if
+					  // this new score is not better than that, there's no
+					  // need to try inserting it
+					  reusableST->score = score;
+					  reusableST->term = t;
+				  } else {
+					  continue;
+				  }
+
+				  reusableST = (ScoreTerm) stQueue->insertWithOverflow(reusableST);
+			  }
+		  } while (enumerator->next());
+	  } _CLFINALLY({
+		  enumerator->close();
+		  _CLDELETE(enumerator);
+	  }
+
+	  BooleanQuery query = _CLNEW BooleanQuery(true);
+	  int size = stQueue->size();
+	  for(int i = 0; i < size; i++){
+		ScoreTerm* st = (ScoreTerm) stQueue->pop();
+		TermQuery* tq = new TermQuery(st.term);      // found a match
+		tq->setBoost(getBoost() * st.score); // set the boost
+		query->add(tq, BooleanClause.Occur.SHOULD);          // add to query
+	  }
+
+	  _CLDELETE(reusableST);
+
+	  return query;
+	  }*/
+
+
 CL_NS_END

Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h	2008-10-10 10:47:49 UTC (rev 2936)
@@ -71,17 +71,19 @@
 		*/
 		size_t getPrefixLength() const;
 
+		//Query* FuzzyQuery::rewrite(IndexReader* reader)
+
   protected:
 	  FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader);
   };
 
-   /** FuzzyTermEnum is a subclass of FilteredTermEnum for enumerating all 
-  *  terms that are similiar to the specified filter term.
-  *
-  *  Term enumerations are always ordered by Term.compareTo().  Each term in
-  *  the enumeration is greater than all that precede it.
-  */
-  class CLUCENE_EXPORT FuzzyTermEnum: public FilteredTermEnum {
+/** Subclass of FilteredTermEnum for enumerating all terms that are similiar
+ * to the specified filter term.
+ *
+ * <p>Term enumerations are always ordered by Term.compareTo().  Each term in
+ * the enumeration is greater than all that precede it.
+ */
+class CLUCENE_EXPORT FuzzyTermEnum: public FilteredTermEnum {
   private:
 		float_t distance;
 		bool _endEnum;
@@ -119,10 +121,10 @@
 		int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) ;
 
     protected:
-		/**
-		The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
-		calculate the distance between the given term and the comparing term. 
-		*/
+		/**
+		* The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
+		* calculate the distance between the given term and the comparing term. 
+		*/
 		bool termCompare(CL_NS(index)::Term* term) ;
 		
 		///Returns the fact if the current term in the enumeration has reached the end

Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp	2008-10-10 10:47:49 UTC (rev 2936)
@@ -93,6 +93,7 @@
             buffer.append( _T(":"));
         }
         buffer.append(term->text());
+		// todo: use ToStringUtils.boost()
         if (getBoost() != 1.0f) {
             buffer.appendChar ( '^' );
             buffer.appendFloat( getBoost(),1);

Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h	2008-10-10 10:47:49 UTC (rev 2936)
@@ -56,7 +56,7 @@
         /** Prints a user-readable version of this query. */
         TCHAR* toString(const TCHAR* field) const;
 
-		Query* rewrite(CL_NS(index)::IndexReader* reader);
+		virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
     };
 CL_NS_END
 #endif

Modified: branches/lucene2_3_2/src/core/CLucene/search/Sort.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/Sort.h	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/Sort.h	2008-10-10 10:47:49 UTC (rev 2936)
@@ -32,34 +32,41 @@
 //    CL_NS(util)::Comparable** cachedValues;
 //    ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);
 
-	/**
-	 * Compares two ScoreDoc objects and returns a result indicating their
-	 * sort order.
-	 * @param i First ScoreDoc
-	 * @param j Second ScoreDoc
-	 * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
-	 * @see java.util.Comparator
+	 /**
+	 * Compares two ScoreDoc objects and returns a result indicating their
+	 * sort order.
+	 * @param i First ScoreDoc
+	 * @param j Second ScoreDoc
+	 * @return a negative integer if <code>i</code> should come before <code>j</code><br>
+	 *         a positive integer if <code>i</code> should come after <code>j</code><br>
+	 *         <code>0</code> if they are equal
+	 * @see java.util.Comparator
 	 */
     virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;
 
-	/**
-	 * Returns the value used to sort the given document.  The
-	 * object returned must implement the java.io.Serializable
-	 * interface.  This is used by multisearchers to determine how to collate results from their searchers.
-	 * @see FieldDoc
-	 * @param i Document
-	 * @return Serializable object
-	 */
+	/**
+	* Returns the value used to sort the given document.  The
+	* object returned must implement the java.io.Serializable
+	* interface.  This is used by multisearchers to determine how
+	* to collate results from their searchers.
+	* @see FieldDoc
+	* @param i Document
+	* @return Serializable object
+	*/
     virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;
 
 	
-	/**
-	 * Returns the type of sort.  Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>, 
-	 * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>.  It is not valid to return <code>SortField.AUTO</code>.
-	 * This is used by multisearchers to determine how to collate results from their searchers.
-	 * @return One of the constants in SortField.
-	 * @see SortField
-	 */
+	/**
+	* Returns the type of sort.  Should return <code>SortField.SCORE</code>,
+	* <code>SortField.DOC</code>, <code>SortField.STRING</code>,
+	* <code>SortField.INTEGER</code>, <code>SortField.FLOAT</code> or
+	* <code>SortField.CUSTOM</code>.  It is not valid to return
+	* <code>SortField.AUTO</code>.
+	* This is used by multisearchers to determine how to collate results
+	* from their searchers.
+	* @return One of the constants in SortField.
+	* @see SortField
+	*/
     virtual int32_t sortType() = 0;
 
 	/** Special comparator for sorting hits according to computed relevance (document score). */

Modified: branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp	2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp	2008-10-10 10:47:49 UTC (rev 2936)
@@ -149,6 +149,7 @@
     }
   }
 
+	//todo: support an extra parameter: bool useBuffer (as per JLucene)
   void BufferedIndexInput::readBytes(uint8_t* b, const int32_t len){
     if (len < bufferSize) {
       for (int32_t i = 0; i < len; ++i)		  // read byte-by-byte


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.