|
From: <syn...@us...> - 2008-10-10 11:32:19
|
Revision: 2936
http://clucene.svn.sourceforge.net/clucene/?rev=2936&view=rev
Author: synhershko
Date: 2008-10-10 10:47:49 +0000 (Fri, 10 Oct 2008)
Log Message:
-----------
Misc cleanups and some porting progress
Modified Paths:
--------------
branches/lucene2_3_2/src/core/CLucene/files_list.txt
branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp
branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h
branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp
branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h
branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp
branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h
branches/lucene2_3_2/src/core/CLucene/search/Sort.h
branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp
Modified: branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp 2008-10-10 10:47:49 UTC (rev 2936)
@@ -57,7 +57,7 @@
}
public:
- CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length);
+ CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length, const int32_t readBufferSize = CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
CSIndexInput(const CSIndexInput& clone);
~CSIndexInput();
@@ -83,7 +83,7 @@
};
-CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length){
+CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length, const int32_t _readBufferSize):BufferedIndexInput(_readBufferSize){
this->base = base;
this->fileOffset = fileOffset;
this->_length = length;
@@ -97,7 +97,7 @@
if(start + len > _length)
_CLTHROWA(CL_ERR_IO,"read past EOF");
base->seek(fileOffset + start);
- base->readBytes(b, len);
+ base->readBytes(b, len /*todo: , false*/);
}
CSIndexInput::~CSIndexInput(){
}
@@ -116,16 +116,17 @@
-CompoundFileReader::CompoundFileReader(Directory* dir, char* name):
+CompoundFileReader::CompoundFileReader(Directory* dir, char* name, int32_t _readBufferSize):
entries(_CLNEW EntriesType(true,true))
{
directory = dir;
fileName = STRDUP_AtoA(name);
+ readBufferSize = _readBufferSize;
bool success = false;
try {
- stream = dir->openInput(name);
+ stream = dir->openInput(name, readBufferSize);
// read the directory and init files
int32_t count = stream->readVInt();
@@ -192,24 +193,26 @@
}
bool CompoundFileReader::openInput(const char * id, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize){
- SCOPED_LOCK_MUTEX(THIS_LOCK)
+ SCOPED_LOCK_MUTEX(THIS_LOCK);
if (stream == NULL){
- error.set(CL_ERR_IO,"Stream closed");
- return false;
+ error.set(CL_ERR_IO,"Stream closed");
+ return false;
}
-
- const ReaderFileEntry* entry = entries->get(id);
- if (entry == NULL){
- char buf[CL_MAX_PATH+30];
- strcpy(buf,"No sub-file with id ");
- strncat(buf,id,CL_MAX_PATH);
- strcat(buf," found");
- error.set(CL_ERR_IO,buf);
- return false;
- }
- ret = _CLNEW CSIndexInput(stream, entry->offset, entry->length);
- return true;
+
+ const ReaderFileEntry* entry = entries->get(id);
+ if (entry == NULL){
+ char buf[CL_MAX_PATH+26];
+ cl_sprintf(buf, CL_MAX_PATH+26, "No sub-file with id %s found", id);
+ error.set(CL_ERR_IO,buf);
+ return false;
+ }
+
+ if (bufferSize < 1)
+ bufferSize = readBufferSize;
+
+ ret = _CLNEW CSIndexInput(stream, entry->offset, entry->length, bufferSize);
+ return true;
}
void CompoundFileReader::list(vector<string>* names) const{
Modified: branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h 2008-10-10 10:47:49 UTC (rev 2936)
@@ -11,6 +11,7 @@
CL_CLASS_DEF(store,Lock)
#include "CLucene/util/VoidMapSetDefinitions.h"
#include "CLucene/store/Directory.h"
+#include "CLucene/store/IndexInput.h"
CL_NS_DEF(index)
@@ -25,6 +26,8 @@
*/
class CompoundFileReader: public CL_NS(store)::Directory {
private:
+ int32_t readBufferSize;
+
// Base info
CL_NS(store)::Directory* directory;
char* fileName;
@@ -42,13 +45,13 @@
bool doDeleteFile(const char* name);
public:
- CompoundFileReader(CL_NS(store)::Directory* dir, char* name);
+ CompoundFileReader(CL_NS(store)::Directory* dir, char* name, int32_t _readBufferSize=CL_NS(store)::BufferedIndexInput::BUFFER_SIZE);
~CompoundFileReader();
CL_NS(store)::Directory* getDirectory();
const char* getName() const;
void close();
- bool openInput(const char * name, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize=1);
+ bool openInput(const char * name, CL_NS(store)::IndexInput *& ret, CLuceneError& error, int32_t bufferSize=0);
/** Returns an array of strings, one for each file in the directory-> */
void list(std::vector<std::string>* names) const;
Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2008-10-10 10:47:49 UTC (rev 2936)
@@ -362,4 +362,52 @@
return ret;
}
+ /*
+ Query* FuzzyQuery::rewrite(IndexReader* reader) {
+ FilteredTermEnum* enumerator = getEnum(reader);
+ int32_t maxClauseCount = BooleanQuery::getMaxClauseCount();
+ ScoreTermQueue* stQueue = _CLNEW ScoreTermQueue(maxClauseCount);
+ ScoreTerm* reusableST = NULL;
+
+ try {
+ do {
+ float_t score = 0.0f;
+ Term* t = enumerator->term();
+ if (t != null) {
+ score = enumerator->difference();
+ if (reusableST == NULL) {
+ reusableST = _CLNEW ScoreTerm(t, score);
+ } else if (score >= reusableST->score) {
+ // reusableST holds the last "rejected" entry, so, if
+ // this new score is not better than that, there's no
+ // need to try inserting it
+ reusableST->score = score;
+ reusableST->term = t;
+ } else {
+ continue;
+ }
+
+ reusableST = (ScoreTerm) stQueue->insertWithOverflow(reusableST);
+ }
+ } while (enumerator->next());
+ } _CLFINALLY({
+ enumerator->close();
+ _CLDELETE(enumerator);
+ }
+
+ BooleanQuery query = _CLNEW BooleanQuery(true);
+ int size = stQueue->size();
+ for(int i = 0; i < size; i++){
+ ScoreTerm* st = (ScoreTerm) stQueue->pop();
+ TermQuery* tq = new TermQuery(st.term); // found a match
+ tq->setBoost(getBoost() * st.score); // set the boost
+ query->add(tq, BooleanClause.Occur.SHOULD); // add to query
+ }
+
+ _CLDELETE(reusableST);
+
+ return query;
+ }*/
+
+
CL_NS_END
Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2008-10-10 10:47:49 UTC (rev 2936)
@@ -71,17 +71,19 @@
*/
size_t getPrefixLength() const;
+ //Query* FuzzyQuery::rewrite(IndexReader* reader)
+
protected:
FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader);
};
- /** FuzzyTermEnum is a subclass of FilteredTermEnum for enumerating all
- * terms that are similiar to the specified filter term.
- *
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- */
- class CLUCENE_EXPORT FuzzyTermEnum: public FilteredTermEnum {
+/** Subclass of FilteredTermEnum for enumerating all terms that are similiar
+ * to the specified filter term.
+ *
+ * <p>Term enumerations are always ordered by Term.compareTo(). Each term in
+ * the enumeration is greater than all that precede it.
+ */
+class CLUCENE_EXPORT FuzzyTermEnum: public FilteredTermEnum {
private:
float_t distance;
bool _endEnum;
@@ -119,10 +121,10 @@
int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) ;
protected:
- /**
- The termCompare method in FuzzyTermEnum uses Levenshtein distance to
- calculate the distance between the given term and the comparing term.
- */
+ /**
+ * The termCompare method in FuzzyTermEnum uses Levenshtein distance to
+ * calculate the distance between the given term and the comparing term.
+ */
bool termCompare(CL_NS(index)::Term* term) ;
///Returns the fact if the current term in the enumeration has reached the end
Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp 2008-10-10 10:47:49 UTC (rev 2936)
@@ -93,6 +93,7 @@
buffer.append( _T(":"));
}
buffer.append(term->text());
+ // todo: use ToStringUtils.boost()
if (getBoost() != 1.0f) {
buffer.appendChar ( '^' );
buffer.appendFloat( getBoost(),1);
Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h 2008-10-10 10:47:49 UTC (rev 2936)
@@ -56,7 +56,7 @@
/** Prints a user-readable version of this query. */
TCHAR* toString(const TCHAR* field) const;
- Query* rewrite(CL_NS(index)::IndexReader* reader);
+ virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
};
CL_NS_END
#endif
Modified: branches/lucene2_3_2/src/core/CLucene/search/Sort.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/search/Sort.h 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/search/Sort.h 2008-10-10 10:47:49 UTC (rev 2936)
@@ -32,34 +32,41 @@
// CL_NS(util)::Comparable** cachedValues;
// ScoreDocComparator(CL_NS(util)::Comparable** cachedValues);
- /**
- * Compares two ScoreDoc objects and returns a result indicating their
- * sort order.
- * @param i First ScoreDoc
- * @param j Second ScoreDoc
- * @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
- * @see java.util.Comparator
+ /**
+ * Compares two ScoreDoc objects and returns a result indicating their
+ * sort order.
+ * @param i First ScoreDoc
+ * @param j Second ScoreDoc
+ * @return a negative integer if <code>i</code> should come before <code>j</code><br>
+ * a positive integer if <code>i</code> should come after <code>j</code><br>
+ * <code>0</code> if they are equal
+ * @see java.util.Comparator
*/
virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0;
- /**
- * Returns the value used to sort the given document. The
- * object returned must implement the java.io.Serializable
- * interface. This is used by multisearchers to determine how to collate results from their searchers.
- * @see FieldDoc
- * @param i Document
- * @return Serializable object
- */
+ /**
+ * Returns the value used to sort the given document. The
+ * object returned must implement the java.io.Serializable
+ * interface. This is used by multisearchers to determine how
+ * to collate results from their searchers.
+ * @see FieldDoc
+ * @param i Document
+ * @return Serializable object
+ */
virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0;
- /**
- * Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
- * <code>SortField::FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
- * This is used by multisearchers to determine how to collate results from their searchers.
- * @return One of the constants in SortField.
- * @see SortField
- */
+ /**
+ * Returns the type of sort. Should return <code>SortField.SCORE</code>,
+ * <code>SortField.DOC</code>, <code>SortField.STRING</code>,
+ * <code>SortField.INTEGER</code>, <code>SortField.FLOAT</code> or
+ * <code>SortField.CUSTOM</code>. It is not valid to return
+ * <code>SortField.AUTO</code>.
+ * This is used by multisearchers to determine how to collate results
+ * from their searchers.
+ * @return One of the constants in SortField.
+ * @see SortField
+ */
virtual int32_t sortType() = 0;
/** Special comparator for sorting hits according to computed relevance (document score). */
Modified: branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp 2008-10-06 19:42:01 UTC (rev 2935)
+++ branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp 2008-10-10 10:47:49 UTC (rev 2936)
@@ -149,6 +149,7 @@
}
}
+ //todo: support an extra parameter: bool useBuffer (as per JLucene)
void BufferedIndexInput::readBytes(uint8_t* b, const int32_t len){
if (len < bufferSize) {
for (int32_t i = 0; i < len; ++i) // read byte-by-byte
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|