|
From: <ust...@us...> - 2009-07-08 10:10:55
|
Revision: 3020
http://clucene.svn.sourceforge.net/clucene/?rev=3020&view=rev
Author: ustramooner
Date: 2009-07-08 10:10:52 +0000 (Wed, 08 Jul 2009)
Log Message:
-----------
Isidor Zeuner's fix & test for norms not being merged
Modified Paths:
--------------
branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h
branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp
branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp
Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 10:10:09 UTC (rev 3019)
+++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 10:10:52 UTC (rev 3020)
@@ -270,6 +270,8 @@
/** Reads the byte-encoded normalization factor for the named field of every
* document. This is used by the search code to score documents.
*
+ * The size of bytes must be the size of the IndexReader->maxDoc()
+ *
* @see Field#setBoost(float_t)
*/
virtual void norms(const TCHAR* field, uint8_t* bytes) = 0;
Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 10:10:09 UTC (rev 3019)
+++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 10:10:52 UTC (rev 3020)
@@ -769,6 +769,7 @@
normBuffer.resize(maxDoc);
memset(normBuffer.values,0,sizeof(uint8_t) * maxDoc);
}
+ reader->norms(fi->name, normBuffer.values);
if (!reader->hasDeletions()) {
//optimized case for segments without deleted docs
Modified: branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp
===================================================================
--- branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-07-08 10:10:09 UTC (rev 3019)
+++ branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-07-08 10:10:52 UTC (rev 3020)
@@ -6,7 +6,66 @@
------------------------------------------------------------------------------*/
#include "test.h"
#include <iostream>
+#include <sys/stat.h>
+#include <sys/types.h>
+//checks if a merged index finds phrases correctly
+void testIWmergePhraseSegments(CuTest *tc){
+ char fsdir[CL_MAX_PATH];
+ sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter");
+ mkdir(fsdir, 0777);
+ SimpleAnalyzer a;
+
+ IndexWriter ndx2(fsdir,&a,true);
+ ndx2.setUseCompoundFile(false);
+ Document doc0;
+ doc0.add(
+ *_CLNEW Field(
+ _T("field0"),
+ _T("value0 value1"),
+ Field::STORE_YES | Field::INDEX_TOKENIZED
+ )
+ );
+ ndx2.addDocument(&doc0);
+ ndx2.optimize();
+ ndx2.close();
+
+ IndexWriter ndx(fsdir,&a,false);
+ ndx.setUseCompoundFile(false);
+ Document doc1;
+ doc1.add(
+ *_CLNEW Field(
+ _T("field0"),
+ _T("value1 value0"),
+ Field::STORE_YES | Field::INDEX_TOKENIZED
+ )
+ );
+ ndx.addDocument(&doc1);
+ ndx.optimize();
+ ndx.close();
+
+ //test the index querying
+ IndexSearcher searcher(fsdir);
+ Query* query0 = QueryParser::parse(
+ _T("\"value0 value1\""),
+ _T("field0"),
+ &a
+ );
+ Hits* hits0 = searcher.search(query0);
+ CLUCENE_ASSERT(hits0->length() > 0);
+ Query* query1 = QueryParser::parse(
+ _T("\"value1 value0\""),
+ _T("field0"),
+ &a
+ );
+ Hits* hits1 = searcher.search(query1);
+ CLUCENE_ASSERT(hits1->length() > 0);
+ _CLDELETE(query0);
+ _CLDELETE(query1);
+ _CLDELETE(hits0);
+ _CLDELETE(hits1);
+}
+
//checks that adding more than the min_merge value goes ok...
//checks for a mem leak that used to occur
void testIWmergeSegments1(CuTest *tc){
@@ -45,6 +104,60 @@
_CLDECDELETE(term);
_CLDELETE(reader2);
}
+
+//checks if appending to an index works correctly
+void testIWmergeSegments2(CuTest *tc){
+ char fsdir[CL_MAX_PATH];
+ sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter");
+ mkdir(fsdir, 0777);
+ SimpleAnalyzer a;
+
+ IndexWriter ndx2(fsdir,&a,true);
+ ndx2.setUseCompoundFile(false);
+ Document doc0;
+ doc0.add(
+ *_CLNEW Field(
+ _T("field0"),
+ _T("value0"),
+ Field::STORE_YES | Field::INDEX_TOKENIZED
+ )
+ );
+ ndx2.addDocument(&doc0);
+ ndx2.optimize();
+ ndx2.close();
+
+ IndexWriter ndx(fsdir,&a,false);
+ ndx.setUseCompoundFile(false);
+ Document doc1;
+ doc1.add(
+ *_CLNEW Field(
+ _T("field0"),
+ _T("value1"),
+ Field::STORE_YES | Field::INDEX_TOKENIZED
+ )
+ );
+ ndx.addDocument(&doc1);
+ ndx.optimize();
+ ndx.close();
+
+ //test the ram querying
+ IndexSearcher searcher(fsdir);
+ Term* term0 = _CLNEW Term(_T("field0"),_T("value1"));
+ Query* query0 = QueryParser::parse(_T("value0"),_T("field0"),&a);
+ Hits* hits0 = searcher.search(query0);
+ CLUCENE_ASSERT(hits0->length() > 0);
+ Term* term1 = _CLNEW Term(_T("field0"),_T("value0"));
+ Query* query1 = QueryParser::parse(_T("value1"),_T("field0"),&a);
+ Hits* hits1 = searcher.search(query1);
+ CLUCENE_ASSERT(hits1->length() > 0);
+ _CLDELETE(query0);
+ _CLDELETE(query1);
+ _CLDELETE(hits0);
+ _CLDELETE(hits1);
+ _CLDECDELETE(term0);
+ _CLDECDELETE(term1);
+}
+
void testAddIndexes(CuTest *tc){
char reuters_origdirectory[1024];
strcpy(reuters_origdirectory, clucene_data_location);
@@ -73,11 +186,14 @@
CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters...
}
}
+
CuSuite *testindexwriter(void)
{
CuSuite *suite = CuSuiteNew(_T("CLucene IndexWriter Test"));
SUITE_ADD_TEST(suite, testAddIndexes);
SUITE_ADD_TEST(suite, testIWmergeSegments1);
+ SUITE_ADD_TEST(suite, testIWmergeSegments2);
+ SUITE_ADD_TEST(suite, testIWmergePhraseSegments);
return suite;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|