You can subscribe to this list here.
2004 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(25) |
Dec
(67) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2005 |
Jan
(125) |
Feb
(23) |
Mar
(167) |
Apr
(2) |
May
|
Jun
(19) |
Jul
(304) |
Aug
(181) |
Sep
(189) |
Oct
(145) |
Nov
(110) |
Dec
(44) |
2006 |
Jan
(303) |
Feb
(40) |
Mar
(2) |
Apr
(143) |
May
|
Jun
(74) |
Jul
(31) |
Aug
(7) |
Sep
(21) |
Oct
(33) |
Nov
(102) |
Dec
(36) |
2007 |
Jan
|
Feb
(16) |
Mar
(38) |
Apr
(34) |
May
(3) |
Jun
(4) |
Jul
(4) |
Aug
(13) |
Sep
(5) |
Oct
|
Nov
|
Dec
|
2008 |
Jan
(2) |
Feb
|
Mar
(13) |
Apr
|
May
(18) |
Jun
(48) |
Jul
(136) |
Aug
(45) |
Sep
(21) |
Oct
(32) |
Nov
|
Dec
(9) |
2009 |
Jan
(4) |
Feb
|
Mar
(33) |
Apr
(23) |
May
(6) |
Jun
(3) |
Jul
(11) |
Aug
|
Sep
(5) |
Oct
|
Nov
|
Dec
|
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
(3) |
Sep
|
Oct
|
Nov
|
Dec
|
From: <ust...@us...> - 2010-08-24 15:01:10
|
Revision: 3027 http://clucene.svn.sourceforge.net/clucene/?rev=3027&view=rev Author: ustramooner Date: 2010-08-24 15:00:54 +0000 (Tue, 24 Aug 2010) Log Message: ----------- merged git master Modified Paths: -------------- branches/lucene2_3_2/CMakeLists.txt branches/lucene2_3_2/COPYING branches/lucene2_3_2/INSTALL branches/lucene2_3_2/cmake/CreateClucenePackages.cmake branches/lucene2_3_2/dist-test.sh branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h branches/lucene2_3_2/src/contribs/benchmarker/Timer.h branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp branches/lucene2_3_2/src/contribs/benchmarker/Unit.h branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestUtf8.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/contribTests.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/test.h branches/lucene2_3_2/src/contribs/contribs-lib-test/testall.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballFilter.h branches/lucene2_3_2/src/contribs-lib/CMakeLists.txt branches/lucene2_3_2/src/core/CLucene/CLConfig.h branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h branches/lucene2_3_2/src/core/CLucene/debug/error.cpp branches/lucene2_3_2/src/core/CLucene/debug/error.h branches/lucene2_3_2/src/core/CLucene/debug/lucenebase.h branches/lucene2_3_2/src/core/CLucene/debug/mem.h branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp branches/lucene2_3_2/src/core/CLucene/document/DateTools.h branches/lucene2_3_2/src/core/CLucene/document/Document.h branches/lucene2_3_2/src/core/CLucene/document/Field.cpp branches/lucene2_3_2/src/core/CLucene/document/Field.h branches/lucene2_3_2/src/core/CLucene/document/FieldSelector.cpp branches/lucene2_3_2/src/core/CLucene/document/FieldSelector.h branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.h branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexDeletionPolicy.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexDeletionPolicy.h branches/lucene2_3_2/src/core/CLucene/index/IndexFileDeleter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexFileNameFilter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexFileNames.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h branches/lucene2_3_2/src/core/CLucene/index/MergePolicy.cpp branches/lucene2_3_2/src/core/CLucene/index/MergePolicy.h branches/lucene2_3_2/src/core/CLucene/index/MergeScheduler.h branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentMergeInfo.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentMergeQueue.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/Term.cpp branches/lucene2_3_2/src/core/CLucene/index/Term.h branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/Terms.cpp branches/lucene2_3_2/src/core/CLucene/index/Terms.h branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h branches/lucene2_3_2/src/core/CLucene/index/_FieldsWriter.h branches/lucene2_3_2/src/core/CLucene/index/_IndexFileNames.h branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentInfos.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentMergeInfo.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentTermEnum.h branches/lucene2_3_2/src/core/CLucene/index/_Term.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosReader.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h branches/lucene2_3_2/src/core/CLucene/queryParser/FastCharStream.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer2.cpp branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.h branches/lucene2_3_2/src/core/CLucene/search/ChainedFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/ConjunctionScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/ConstantScoreQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/ConstantScoreQuery.h branches/lucene2_3_2/src/core/CLucene/search/DateFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/DateFilter.h branches/lucene2_3_2/src/core/CLucene/search/DisjunctionSumScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp branches/lucene2_3_2/src/core/CLucene/search/Explanation.h branches/lucene2_3_2/src/core/CLucene/search/FieldCacheImpl.cpp branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.h branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h branches/lucene2_3_2/src/core/CLucene/search/Hits.h branches/lucene2_3_2/src/core/CLucene/search/IndexSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/IndexSearcher.h branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.h branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.h branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.h branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h branches/lucene2_3_2/src/core/CLucene/search/PhraseScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h branches/lucene2_3_2/src/core/CLucene/search/Query.h branches/lucene2_3_2/src/core/CLucene/search/QueryFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h branches/lucene2_3_2/src/core/CLucene/search/Scorer.cpp branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp branches/lucene2_3_2/src/core/CLucene/search/Searchable.h branches/lucene2_3_2/src/core/CLucene/search/Similarity.cpp branches/lucene2_3_2/src/core/CLucene/search/Similarity.h branches/lucene2_3_2/src/core/CLucene/search/TermQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/TermQuery.h branches/lucene2_3_2/src/core/CLucene/search/TermScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardQuery.h branches/lucene2_3_2/src/core/CLucene/search/WildcardTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardTermEnum.h branches/lucene2_3_2/src/core/CLucene/search/_DisjunctionSumScorer.h branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp branches/lucene2_3_2/src/core/CLucene/store/Directory.h branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexInput.h branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.h branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp branches/lucene2_3_2/src/core/CLucene/store/Lock.h branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h branches/lucene2_3_2/src/core/CLucene/store/MMapInput.cpp branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/_Lock.h branches/lucene2_3_2/src/core/CLucene/store/_RAMDirectory.h branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h branches/lucene2_3_2/src/core/CLucene/util/Equators.h branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp branches/lucene2_3_2/src/core/CLucene/util/StringIntern.cpp branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp branches/lucene2_3_2/src/core/CLucene/util/VoidList.h branches/lucene2_3_2/src/core/CLucene/util/_bufferedstream.h branches/lucene2_3_2/src/core/CLucene/util/_streambase.h branches/lucene2_3_2/src/core/CLucene/util/_streambuffer.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/core/files_list.txt branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h branches/lucene2_3_2/src/shared/CLucene/SharedHeader.cpp branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/clucene-config.h.cmake branches/lucene2_3_2/src/shared/CLucene/config/_threads.h branches/lucene2_3_2/src/shared/CLucene/config/repl_tprintf.cpp branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp branches/lucene2_3_2/src/shared/CLucene/config/utf8.cpp branches/lucene2_3_2/src/shared/CLucene/util/Misc.cpp branches/lucene2_3_2/src/shared/CLucene/util/Misc.h branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp branches/lucene2_3_2/src/shared/CMakeLists.txt branches/lucene2_3_2/src/shared/README branches/lucene2_3_2/src/shared/cmake/MacroCheckGccVisibility.cmake branches/lucene2_3_2/src/test/CLMonolithic_Test.cpp branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/analysis/TestAnalyzers.cpp branches/lucene2_3_2/src/test/document/TestDateTools.cpp branches/lucene2_3_2/src/test/index/TestHighFreqTerms.cpp branches/lucene2_3_2/src/test/index/TestIndexModifier.cpp branches/lucene2_3_2/src/test/index/TestIndexReader.cpp branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp branches/lucene2_3_2/src/test/index/TestReuters.cpp branches/lucene2_3_2/src/test/index/TestThreading.cpp branches/lucene2_3_2/src/test/index/TestUtf8.cpp branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/search/TestBoolean.cpp branches/lucene2_3_2/src/test/search/TestDateFilter.cpp branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/search/TestRangeFilter.cpp branches/lucene2_3_2/src/test/search/TestSearch.cpp branches/lucene2_3_2/src/test/search/TestSort.cpp branches/lucene2_3_2/src/test/search/TestTermVector.cpp branches/lucene2_3_2/src/test/search/TestWildcard.cpp branches/lucene2_3_2/src/test/store/TestStore.cpp branches/lucene2_3_2/src/test/test.h branches/lucene2_3_2/src/test/testall.cpp branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/cmake/CLuceneBoost.cmake branches/lucene2_3_2/src/core/CLucene/index/_IndexFileNameFilter.h branches/lucene2_3_2/src/core/CLucene/store/BufferedIndexInput.cpp branches/lucene2_3_2/src/core/CLucene/store/BufferedIndexInput.h branches/lucene2_3_2/src/core/CLucene/store/BufferedIndexOutput.cpp branches/lucene2_3_2/src/core/CLucene/store/BufferedIndexOutput.h branches/lucene2_3_2/src/core/CLucene/store/MMapDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/SharedHandle.cpp branches/lucene2_3_2/src/core/CLucene/store/_FSDirectory.h branches/lucene2_3_2/src/core/CLucene/store/_MMapDirectory.h branches/lucene2_3_2/src/core/CLucene/store/_MMapIndexInput.h branches/lucene2_3_2/src/core/CLucene/store/_SharedHandle.h branches/lucene2_3_2/src/core/libclucene-core.pc.cmake branches/lucene2_3_2/src/ext/ branches/lucene2_3_2/src/ext/CMakeLists.txt branches/lucene2_3_2/src/ext/boost/ branches/lucene2_3_2/src/ext/boost/assert.hpp branches/lucene2_3_2/src/ext/boost/checked_delete.hpp branches/lucene2_3_2/src/ext/boost/config/ branches/lucene2_3_2/src/ext/boost/config/abi/ branches/lucene2_3_2/src/ext/boost/config/abi/borland_prefix.hpp branches/lucene2_3_2/src/ext/boost/config/abi/borland_suffix.hpp branches/lucene2_3_2/src/ext/boost/config/abi/msvc_prefix.hpp branches/lucene2_3_2/src/ext/boost/config/abi/msvc_suffix.hpp branches/lucene2_3_2/src/ext/boost/config/abi_prefix.hpp branches/lucene2_3_2/src/ext/boost/config/abi_suffix.hpp branches/lucene2_3_2/src/ext/boost/config/auto_link.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/ branches/lucene2_3_2/src/ext/boost/config/compiler/borland.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/codegear.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/comeau.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/common_edg.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/compaq_cxx.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/digitalmars.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/gcc.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/gcc_xml.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/greenhills.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/hp_acc.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/intel.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/kai.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/metrowerks.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/mpw.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/pgi.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/sgi_mipspro.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/sunpro_cc.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/vacpp.hpp branches/lucene2_3_2/src/ext/boost/config/compiler/visualc.hpp branches/lucene2_3_2/src/ext/boost/config/no_tr1/ branches/lucene2_3_2/src/ext/boost/config/no_tr1/cmath.hpp branches/lucene2_3_2/src/ext/boost/config/no_tr1/complex.hpp branches/lucene2_3_2/src/ext/boost/config/no_tr1/functional.hpp branches/lucene2_3_2/src/ext/boost/config/no_tr1/memory.hpp branches/lucene2_3_2/src/ext/boost/config/no_tr1/utility.hpp branches/lucene2_3_2/src/ext/boost/config/platform/ branches/lucene2_3_2/src/ext/boost/config/platform/aix.hpp branches/lucene2_3_2/src/ext/boost/config/platform/amigaos.hpp branches/lucene2_3_2/src/ext/boost/config/platform/beos.hpp branches/lucene2_3_2/src/ext/boost/config/platform/bsd.hpp branches/lucene2_3_2/src/ext/boost/config/platform/cygwin.hpp branches/lucene2_3_2/src/ext/boost/config/platform/hpux.hpp branches/lucene2_3_2/src/ext/boost/config/platform/irix.hpp branches/lucene2_3_2/src/ext/boost/config/platform/linux.hpp branches/lucene2_3_2/src/ext/boost/config/platform/macos.hpp branches/lucene2_3_2/src/ext/boost/config/platform/qnxnto.hpp branches/lucene2_3_2/src/ext/boost/config/platform/solaris.hpp branches/lucene2_3_2/src/ext/boost/config/platform/vxworks.hpp branches/lucene2_3_2/src/ext/boost/config/platform/win32.hpp branches/lucene2_3_2/src/ext/boost/config/posix_features.hpp branches/lucene2_3_2/src/ext/boost/config/requires_threads.hpp branches/lucene2_3_2/src/ext/boost/config/select_compiler_config.hpp branches/lucene2_3_2/src/ext/boost/config/select_platform_config.hpp branches/lucene2_3_2/src/ext/boost/config/select_stdlib_config.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/ branches/lucene2_3_2/src/ext/boost/config/stdlib/dinkumware.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/libcomo.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/libstdcpp3.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/modena.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/msl.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/roguewave.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/sgi.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/stlport.hpp branches/lucene2_3_2/src/ext/boost/config/stdlib/vacpp.hpp branches/lucene2_3_2/src/ext/boost/config/suffix.hpp branches/lucene2_3_2/src/ext/boost/config/user.hpp branches/lucene2_3_2/src/ext/boost/config/warning_disable.hpp branches/lucene2_3_2/src/ext/boost/config.hpp branches/lucene2_3_2/src/ext/boost/current_function.hpp branches/lucene2_3_2/src/ext/boost/detail/ branches/lucene2_3_2/src/ext/boost/detail/algorithm.hpp branches/lucene2_3_2/src/ext/boost/detail/allocator_utilities.hpp branches/lucene2_3_2/src/ext/boost/detail/atomic_count.hpp branches/lucene2_3_2/src/ext/boost/detail/binary_search.hpp branches/lucene2_3_2/src/ext/boost/detail/call_traits.hpp branches/lucene2_3_2/src/ext/boost/detail/catch_exceptions.hpp branches/lucene2_3_2/src/ext/boost/detail/compressed_pair.hpp branches/lucene2_3_2/src/ext/boost/detail/container_fwd.hpp branches/lucene2_3_2/src/ext/boost/detail/dynamic_bitset.hpp branches/lucene2_3_2/src/ext/boost/detail/endian.hpp branches/lucene2_3_2/src/ext/boost/detail/has_default_constructor.hpp branches/lucene2_3_2/src/ext/boost/detail/identifier.hpp branches/lucene2_3_2/src/ext/boost/detail/indirect_traits.hpp branches/lucene2_3_2/src/ext/boost/detail/interlocked.hpp branches/lucene2_3_2/src/ext/boost/detail/is_function_ref_tester.hpp branches/lucene2_3_2/src/ext/boost/detail/is_incrementable.hpp branches/lucene2_3_2/src/ext/boost/detail/is_xxx.hpp branches/lucene2_3_2/src/ext/boost/detail/iterator.hpp branches/lucene2_3_2/src/ext/boost/detail/lcast_precision.hpp branches/lucene2_3_2/src/ext/boost/detail/lightweight_mutex.hpp branches/lucene2_3_2/src/ext/boost/detail/lightweight_test.hpp branches/lucene2_3_2/src/ext/boost/detail/lightweight_thread.hpp branches/lucene2_3_2/src/ext/boost/detail/limits.hpp branches/lucene2_3_2/src/ext/boost/detail/named_template_params.hpp branches/lucene2_3_2/src/ext/boost/detail/no_exceptions_support.hpp branches/lucene2_3_2/src/ext/boost/detail/none_t.hpp branches/lucene2_3_2/src/ext/boost/detail/numeric_traits.hpp branches/lucene2_3_2/src/ext/boost/detail/ob_call_traits.hpp branches/lucene2_3_2/src/ext/boost/detail/ob_compressed_pair.hpp branches/lucene2_3_2/src/ext/boost/detail/quick_allocator.hpp branches/lucene2_3_2/src/ext/boost/detail/reference_content.hpp branches/lucene2_3_2/src/ext/boost/detail/scoped_enum_emulation.hpp branches/lucene2_3_2/src/ext/boost/detail/select_type.hpp branches/lucene2_3_2/src/ext/boost/detail/sp_typeinfo.hpp branches/lucene2_3_2/src/ext/boost/detail/templated_streams.hpp branches/lucene2_3_2/src/ext/boost/detail/utf8_codecvt_facet.hpp branches/lucene2_3_2/src/ext/boost/detail/workaround.hpp branches/lucene2_3_2/src/ext/boost/exception/ branches/lucene2_3_2/src/ext/boost/exception/all.hpp branches/lucene2_3_2/src/ext/boost/exception/current_exception_cast.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/ branches/lucene2_3_2/src/ext/boost/exception/detail/attribute_noreturn.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/error_info_impl.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/exception_ptr.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/is_output_streamable.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/object_hex_dump.hpp branches/lucene2_3_2/src/ext/boost/exception/detail/type_info.hpp branches/lucene2_3_2/src/ext/boost/exception/diagnostic_information.hpp branches/lucene2_3_2/src/ext/boost/exception/enable_current_exception.hpp branches/lucene2_3_2/src/ext/boost/exception/enable_error_info.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_api_function.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_at_line.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_errno.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_file_handle.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_file_name.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_file_open_mode.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_nested_exception.hpp branches/lucene2_3_2/src/ext/boost/exception/errinfo_type_info_name.hpp branches/lucene2_3_2/src/ext/boost/exception/error_info.hpp branches/lucene2_3_2/src/ext/boost/exception/exception.hpp branches/lucene2_3_2/src/ext/boost/exception/get_error_info.hpp branches/lucene2_3_2/src/ext/boost/exception/info.hpp branches/lucene2_3_2/src/ext/boost/exception/info_tuple.hpp branches/lucene2_3_2/src/ext/boost/exception/to_string.hpp branches/lucene2_3_2/src/ext/boost/exception/to_string_stub.hpp branches/lucene2_3_2/src/ext/boost/memory_order.hpp branches/lucene2_3_2/src/ext/boost/shared_ptr.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/ branches/lucene2_3_2/src/ext/boost/smart_ptr/bad_weak_ptr.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/ branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_gcc.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_gcc_x86.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_pthreads.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_solaris.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_sync.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/atomic_count_win32.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/lightweight_mutex.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/lwm_nop.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/lwm_pthreads.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/lwm_win32_cs.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/operator_bool.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/quick_allocator.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/shared_array_nmt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/shared_count.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/shared_ptr_nmt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_convertible.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_acc_ia64.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_cw_ppc.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_cw_x86.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_gcc_ia64.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_gcc_mips.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_gcc_ppc.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_gcc_sparc.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_nt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_pt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_solaris.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_spin.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_sync.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_base_w32.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_counted_impl.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/sp_has_sync.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_gcc_arm.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_nt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_pool.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_pt.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_sync.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/spinlock_w32.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/detail/yield_k.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/enable_shared_from_this.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/enable_shared_from_this2.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/intrusive_ptr.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/make_shared.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/scoped_array.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/scoped_ptr.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/shared_array.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/shared_ptr.hpp branches/lucene2_3_2/src/ext/boost/smart_ptr/weak_ptr.hpp branches/lucene2_3_2/src/ext/boost/throw_exception.hpp branches/lucene2_3_2/src/ext/boost/version.hpp branches/lucene2_3_2/src/ext/zlib/ branches/lucene2_3_2/src/ext/zlib/ChangeLog branches/lucene2_3_2/src/ext/zlib/FAQ branches/lucene2_3_2/src/ext/zlib/INDEX branches/lucene2_3_2/src/ext/zlib/README branches/lucene2_3_2/src/ext/zlib/adler32.c branches/lucene2_3_2/src/ext/zlib/algorithm.txt branches/lucene2_3_2/src/ext/zlib/compress.c branches/lucene2_3_2/src/ext/zlib/crc32.c branches/lucene2_3_2/src/ext/zlib/crc32.h branches/lucene2_3_2/src/ext/zlib/deflate.c branches/lucene2_3_2/src/ext/zlib/deflate.h branches/lucene2_3_2/src/ext/zlib/gzio.c branches/lucene2_3_2/src/ext/zlib/inffast.c branches/lucene2_3_2/src/ext/zlib/inffast.h branches/lucene2_3_2/src/ext/zlib/inffixed.h branches/lucene2_3_2/src/ext/zlib/inflate.c branches/lucene2_3_2/src/ext/zlib/inflate.h branches/lucene2_3_2/src/ext/zlib/inftrees.c branches/lucene2_3_2/src/ext/zlib/inftrees.h branches/lucene2_3_2/src/ext/zlib/trees.c branches/lucene2_3_2/src/ext/zlib/trees.h branches/lucene2_3_2/src/ext/zlib/zconf.h branches/lucene2_3_2/src/ext/zlib/zlib.h branches/lucene2_3_2/src/ext/zlib/zutil.c branches/lucene2_3_2/src/ext/zlib/zutil.h branches/lucene2_3_2/src/htdocs/ branches/lucene2_3_2/src/htdocs/README branches/lucene2_3_2/src/htdocs/_footer.html branches/lucene2_3_2/src/htdocs/_header.html branches/lucene2_3_2/src/htdocs/_index.php branches/lucene2_3_2/src/htdocs/clucene.jpg branches/lucene2_3_2/src/htdocs/contribute.shtml branches/lucene2_3_2/src/htdocs/download.shtml branches/lucene2_3_2/src/htdocs/images/ branches/lucene2_3_2/src/htdocs/images/disk.png branches/lucene2_3_2/src/htdocs/images/img01.gif branches/lucene2_3_2/src/htdocs/images/img02.gif branches/lucene2_3_2/src/htdocs/images/img03.gif branches/lucene2_3_2/src/htdocs/images/img04.jpg branches/lucene2_3_2/src/htdocs/images/img05.jpg branches/lucene2_3_2/src/htdocs/images/img06.jpg branches/lucene2_3_2/src/htdocs/images/img07.gif branches/lucene2_3_2/src/htdocs/images/img08.jpg branches/lucene2_3_2/src/htdocs/images/img09.jpg branches/lucene2_3_2/src/htdocs/images/img10.jpg branches/lucene2_3_2/src/htdocs/images/img11.gif branches/lucene2_3_2/src/htdocs/images/spacer.gif branches/lucene2_3_2/src/htdocs/index.shtml branches/lucene2_3_2/src/htdocs/style.css branches/lucene2_3_2/src/shared/cmake/CheckAtomicFunctions.cmake Removed Paths: ------------- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h branches/lucene2_3_2/src/core/CLucene/index/IndexFileNameFilter.h branches/lucene2_3_2/src/core/CLucene/store/TransactionalRAMDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/_MMap.h branches/lucene2_3_2/src/core/CLucene/store/_TransactionalRAMDirectory.h branches/lucene2_3_2/src/core/libclucene.pc.cmake branches/lucene2_3_2/src/shared/CLucene/util/zlib/adler32.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/compress.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/crc32.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/crc32.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/deflate.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/deflate.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/gzio.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffast.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffast.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffixed.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inflate.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inflate.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inftrees.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inftrees.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/trees.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/trees.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zconf.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zlib.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zutil.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/zutil.h Modified: branches/lucene2_3_2/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/CMakeLists.txt 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/CMakeLists.txt 2010-08-24 15:00:54 UTC (rev 3027) @@ -5,16 +5,17 @@ #REVISION version MUST be revised if the headers or compatibility change #PATCH should be 0 unless a patch is made that doesn't affect the public signature (i.e. clients don't need to re-compile). -SET(CLUCENE_VERSION_MAJOR "0") -SET(CLUCENE_VERSION_MINOR "9") -SET(CLUCENE_VERSION_REVISION "23") +SET(CLUCENE_VERSION_MAJOR "1") +SET(CLUCENE_VERSION_MINOR "0") +SET(CLUCENE_VERSION_REVISION "0") SET(CLUCENE_VERSION_PATCH "0") -SET(CLUCENE_INT_VERSION 92300) +SET(CLUCENE_INT_VERSION 1000000) SET(CLUCENE_VERSION "${CLUCENE_VERSION_MAJOR}.${CLUCENE_VERSION_MINOR}.${CLUCENE_VERSION_REVISION}.${CLUCENE_VERSION_PATCH}") SET(CLUCENE_SOVERSION "${CLUCENE_VERSION_MAJOR}.${CLUCENE_VERSION_MINOR}.${CLUCENE_VERSION_REVISION}") -CMAKE_MINIMUM_REQUIRED(VERSION 2.4.2 FATAL_ERROR) +#CMake 2.6+ is recommended to an improved Boost module +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) @@ -25,13 +26,13 @@ set(CMAKE_DEBUG_POSTFIX "d") endif(WIN32) - # include specific modules set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") #define options... Include (CLuceneDocs) +Include (CLuceneBoost) Include (FindThreads) IF(NOT CMAKE_BUILD_TYPE) @@ -57,6 +58,20 @@ OPTION(ENABLE_ASCII_MODE "enable ascii support" OFF) + +SET(ENABLE_ANSI_MODE OFF) +IF(CMAKE_COMPILER_IS_GNUCXX) + SET(ENABLE_ANSI_MODE ON) + + #exceptions: + IF(MINGW OR CYGWIN) + SET(ENABLE_ANSI_MODE OFF) + ENDIF(MINGW OR CYGWIN) +ENDIF(CMAKE_COMPILER_IS_GNUCXX) + +OPTION(ENABLE_ANSI_MODE + "compile with -ansi flag" + ${ENABLE_ANSI_MODE}) OPTION(LUCENE_USE_INTERNAL_CHAR_FUNCTIONS "use internal character functions. required to run tests correctly" ON) @@ -107,10 +122,14 @@ ENDIF ( ENABLE_GPROF ) ENDIF ( GccFlagPg ) - IF("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" ) - ENDIF("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + #cmake should handle this automatically + # IF("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + # SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" ) + # ENDIF("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") + IF( ENABLE_ANSI_MODE ) + SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ansi") + ENDIF ( ENABLE_ANSI_MODE ) ENDIF(CMAKE_COMPILER_IS_GNUCC) @@ -127,7 +146,13 @@ INCLUDE_DIRECTORIES( ${clucene_BINARY_DIR}/src/shared ) INCLUDE_DIRECTORIES( ${clucene_SOURCE_DIR}/src/core ) +#set boost path. we need src/ext to be defined before this works... +Include (CLuceneBoost) +GET_BOOST_INCLUDE_PATH(_CL_BOOST_INCLUDE_PATH) +INCLUDE_DIRECTORIES( ${_CL_BOOST_INCLUDE_PATH} ) + #include the projects +ADD_SUBDIRECTORY (src/ext) ADD_SUBDIRECTORY (src/shared) ADD_SUBDIRECTORY (src/core) ADD_SUBDIRECTORY (src/test) @@ -140,6 +165,7 @@ ADD_SUBDIRECTORY (src/contribs-lib EXCLUDE_FROM_ALL) ENDIF ( BUILD_CONTRIBS_LIB ) + #add uninstall command CONFIGURE_FILE( "${CMAKE_MODULE_PATH}/cmake_uninstall.cmake.in" Modified: branches/lucene2_3_2/COPYING =================================================================== --- branches/lucene2_3_2/COPYING 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/COPYING 2010-08-24 15:00:54 UTC (rev 3027) @@ -124,7 +124,3 @@ PorterStemmer code: couldn't find license. This component is deprecated and will be removed very soon. Snowball code: needs to be researched. - -Jstreams: needs to be researched - -All code needs to be clearly marked with license headers... Modified: branches/lucene2_3_2/INSTALL =================================================================== --- branches/lucene2_3_2/INSTALL 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/INSTALL 2010-08-24 15:00:54 UTC (rev 3027) @@ -9,7 +9,8 @@ -------------------- Dependencies: -* CMake version 2.4.2 or later. +* CMake version 2.4.2 or later (2.6 or later recommended due to an improved FindBoost module) +* Boost libs (http://www.boost.org/), latest version recommended * A functioning and fairly new C++ compiler. We test mostly on GCC and Visual Studio 6+. Anything other than that may not work. * Something to unzip/untar the source code. @@ -28,6 +29,13 @@ create different types of builds in the same source-tree.] # mkdir <clucene>/build-name # cd <clucene>/build-name +5*.) Windows users: make sure Boost environment variables are defined at least for the current + command prompt session. You can check this by typing "set" (no quotes). If you have any doubts, + just type the following 3 commands to set those variables, as follows (boost_1_40_0 being the current + Boost version): + set BOOST_BUILD_PATH=C:\{parent directory for boost}\boost_1_40_0\tools\build\v2 + set BOOST_PATH=C:\{parent directory for boost} + set BOOST_ROOT=C:\{parent directory for boost}\boost_1_40_0 6.) Configure using cmake. This can be done many different ways, but the basic syntax is # cmake [-G "Script name"] .. [Where "Script name" is the name of the scripts to build (e.g. Visual Studio 8 2005). Added: branches/lucene2_3_2/cmake/CLuceneBoost.cmake =================================================================== --- branches/lucene2_3_2/cmake/CLuceneBoost.cmake (rev 0) +++ branches/lucene2_3_2/cmake/CLuceneBoost.cmake 2010-08-24 15:00:54 UTC (rev 3027) @@ -0,0 +1,22 @@ +#Locate Boost libs. Windows users: make sure BOOST_ROOT and BOOST_PATH are set correctly on your environment. +#See the site FAQ for more details. + +MACRO (GET_BOOST_INCLUDE_PATH path) + #todo: allow this to fall back on a local distributed copy, so user doesn't have to d/l Boost seperately + SET(Boost_USE_MULTITHREAD ON) + FIND_PACKAGE( Boost ) + + #todo: limit Boost version? + #todo: use COMPONENTS threads to locate boost_threads without breaking the current support + IF(Boost_FOUND) + IF (NOT _boost_IN_CACHE) + MESSAGE( "Boost found" ) + message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}") + ENDIF (NOT _boost_IN_CACHE) + SET(${path} ${Boost_INCLUDE_DIRS} ) + ELSE() + MESSAGE( "Boost not found, using local: ${clucene_SOURCE_DIR}/src/ext" ) + SET(${path} ${clucene_SOURCE_DIR}/src/ext ) + ENDIF() +ENDMACRO (GET_BOOST_INCLUDE_PATH path) + Modified: branches/lucene2_3_2/cmake/CreateClucenePackages.cmake =================================================================== --- branches/lucene2_3_2/cmake/CreateClucenePackages.cmake 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/cmake/CreateClucenePackages.cmake 2010-08-24 15:00:54 UTC (rev 3027) @@ -13,17 +13,16 @@ SET(CPACK_PACKAGE_VERSION ${CLUCENE_VERSION}) SET(CPACK_PACKAGE_SOVERSION ${CLUCENE_SOVERSION}) -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "library for full-featured text search engine (runtime)") SET(CPACK_PACKAGE_VENDOR "Ben van Klinken") SET(CPACK_PACKAGE_CONTACT "clu...@li...") -SET(CPACK_PACKAGE_NAME "libclucene1") +SET(CPACK_PACKAGE_NAME "libclucene2") SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CLucene - a C++ search engine, ported from the popular Apache Lucene") SET(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") -SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") +#SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") #so, what are we going to install? SET(CPACK_INSTALL_CMAKE_PROJECTS @@ -42,8 +41,8 @@ #specific packaging requirements: SET(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21)") +SET(CPACK_DEBIAN_PACKAGE_SECTION "libs") - #don't include the current binary dir. get_filename_component(clucene_BINARY_DIR_name ${clucene_BINARY_DIR} NAME) SET(CPACK_SOURCE_IGNORE_FILES Modified: branches/lucene2_3_2/dist-test.sh =================================================================== --- branches/lucene2_3_2/dist-test.sh 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/dist-test.sh 2010-08-24 15:00:54 UTC (rev 3027) @@ -71,11 +71,10 @@ #check to see that no #ifdefs exist in headers that don't belong function checkForIfdefs { I=0 - grep "#if" $1| grep -v "_UCS2" |grep -v "_CL_HAVE_" |grep -v "_ASCII" |grep -v "_WIN32" |grep -v "_WIN64" | while read line; do + grep "#if" $1| grep -v "_UCS2" |grep -v "_CL_HAVE_" |grep -v "_ASCII" |grep -v "_WIN32"|grep -v "_MSC_"|grep -v "__MINGW32__" |grep -v "_WIN64" | while read line; do I=`expr $I + 1` if [ $I -gt 1 ]; then - echo $1 has invalid ifdef: $line - FAIL=1 + echo $1 might have invalid ifdef: $line fi done } @@ -95,12 +94,12 @@ if [ "${BH:0:1}" != "_" ]; then DH=`dirname "${H:3}"` - #move headers somewhere to compile - mkdir -p "$TMP/$DH" 2>/dev/null - ln -s "`cd "$DN" && pwd`/$BH" "$TMP/${H:3}" 2>/dev/null - - #create pub-headers.cpp if [ "${H:7}" != "core/CLucene/util/Reader.h" ]; then + #move headers somewhere to compile + mkdir -p "$TMP/$DH" 2>/dev/null + ln -s "`cd "$DN" && pwd`/$BH" "$TMP/${H:3}" 2>/dev/null + + #create pub-headers.cpp echo "#include \"${H:7}\"" >>$TMP/pub-headers.cpp fi fi @@ -130,7 +129,7 @@ #internal headers... none must be exported XX=`awk '/^[ \t]*(class|struct)/ { print $line }' $H| grep -v ";$"| grep -v CLUCENE_EXPORT| grep -v CLUCENE_INLINE_EXPORT| grep -v CLUCENE_SHARED_EXPORT| grep -v CLUCENE_SHARED_INLINE_EXPORT` if [ "$XX" == "" ]; then - echo "$H has exported class: $XX" + echo "$H is internal but has exported class: $XX" echo "" FAIL=1 fi @@ -146,15 +145,33 @@ fi #test that each header compiles independently... - if [ $t_c_h -eq 1 ] && [ "${H:7}" != "disttest/src/core/CLucene/util/Reader.h" ]; then - echo "Test that $H compiles seperately..." + if [ $t_c_h -eq 1 ]; then echo "#include \"CLucene/StdHeader.h"\" >$TMP/pub-header.cpp echo "#include \"$H"\" >>$TMP/pub-header.cpp echo "int main(){ return 0; }" >>"$TMP/pub-header.cpp" - g++ -I. -I$TMP/src/shared -I./src/shared -I$TMP/src/core $TMP/pub-header.cpp - if [ $? -ne 0 ]; then FAIL=1; fi + ERROR=`g++ -I. -I$TMP/src/shared -I./src/shared -I../src/ext -I$TMP/src/core $TMP/pub-header.cpp` + if [ $? -ne 0 ]; then + echo "" + echo "$H doesn't compile seperately..." + echo $ERROR + FAIL=1; + fi fi done + + + if [ $t_ifdefs -eq 1 ]; then + echo "Not all ifdefs are invalid, you have to figure it out for yourself :-)" + echo "If defs in classes which change depending on a user setting can cause big problems due to offset changes" + echo "for example:" + echo "class X {" + echo " #ifdef _DEBUG" + echo " int x;" + echo " #endif" + echo " int y;" + echo "}" + echo "If the library is compiled with _DEBUG, and then a user calls y without _DEBUG defined, unexpected behaviour will occur" + fi fi #iterate all our code... @@ -164,25 +181,38 @@ BH_len=${#BH} if [ "${BH:BH_len-2}" == ".h" ] || [ "${BH:BH_len-2}" == ".c" ] || [ "${BH:BH_len-4}" == ".cpp" ]; then + + #snowball has its own license... + if [ "echo $H|grep 'snowball/src_c'" != "" ]; then + continue + fi + #snowball has its own license... + if [ "echo $H|grep 'libstemmer'" != "" ]; then + continue + fi + #zlib has its own license... + if [ "echo $H|grep 'CLucene/util/zlib'" != "" ]; then + continue + fi + if [ "`awk '/\* Copyright \(C\) [0-9]*-[0-9]* .*$/ { print $line }' $H`" == "" ]; then if [ "`awk '/\* Copyright [0-9]*-[0-9]* .*$/ { print $line }' $H`" == "" ]; then - echo "$H has invalid license" + echo "$H ($BH) has invalid license" FAIL=1 fi fi fi done - fi #test if headers can compile together by themselves: if [ $t_c_all -eq 1 ]; then - g++ -I$TMP/src -I$TMP/src/shared -I$TMP/src/core $TMP/pub-headers.cpp -I./src/shared + g++ -I$TMP/src -I../src/ext -I$TMP/src/shared -I$TMP/src/core $TMP/pub-headers.cpp -I./src/shared fi if [ $t_inline -eq 1 ]; then - if [ ! -f "./doc" ]; then + if [ ! -d "./doc" ]; then echo "Couldn't find docs, run:" echo "# cmake -DENABLE_CLDOCS:BOOLEAN=TRUE ." echo "# make doc" @@ -197,7 +227,13 @@ if [ "doc/html/classlucene_1_1index_1_1Term.html:1" == $line ]; then continue; fi - + if [ "doc/html/classlucene_1_1search_1_1Similarity.html:1" == $line ]; then + continue; + fi + if [ "doc/html/classlucene_1_1store_1_1BufferedIndexInput.html:1" == $line ]; then + continue; + fi + if [ $INLINES -eq 0 ]; then echo "These files report inline code:" INLINES=1 @@ -214,11 +250,18 @@ FAIL=1; fi + echo "Undefines for shared lib:" + nm -u --demangle bin/libclucene-shared.so |grep -E "lucene::" + echo "Undefines for core lib:" + nm -u --demangle bin/libclucene-core.so |grep -E "lucene::"|grep -v "lucene::util::Misc" |grep -v "lucene::util::mutex" |grep -v "lucene::util::StringBuffer"|grep -v "lucene::util::shared_condition" + #compile together make test-all if [ $? -ne 0 ]; then FAIL=1; fi + + fi Modified: branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #include "stdafx.h" #include "Benchmarker.h" #include "Unit.h" Modified: branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #pragma once class Benchmarker Modified: branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #include "stdafx.h" #include "TestCLString.h" Modified: branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #include "stdafx.h" using namespace lucene::util; Modified: branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #pragma once int BenchmarkDocumentWriter(Timer*); Modified: branches/lucene2_3_2/src/contribs/benchmarker/Timer.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Timer.h 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Timer.h 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #pragma once class Timer{ Modified: branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #include "stdafx.h" #include "Unit.h" Modified: branches/lucene2_3_2/src/contribs/benchmarker/Unit.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Unit.h 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/Unit.h 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #pragma once #include "CLucene/util/Misc.h" Modified: branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ // stdafx.cpp : source file that includes just the standard includes // demo.pch will be the pre-compiled header // stdafx.obj will contain the pre-compiled type information Modified: branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,3 +1,9 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ // stdafx.h : include file for standard system include files, // or project specific include files that are used frequently, but // are changed infrequently Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt 2010-08-24 15:00:54 UTC (rev 3027) @@ -4,6 +4,7 @@ DEFINE_OPTIONS(EXTRA_OPTIONS EXTRA_LIBS) ADD_DEFINITIONS(${EXTRA_OPTIONS}) +INCLUDE_DIRECTORIES( ${_CL_BOOST_INCLUDE_PATH} ) INCLUDE_DIRECTORIES( ${clucene-contribs-lib-test_SOURCE_DIR} ) INCLUDE_DIRECTORIES( ${clucene-contribs-lib_SOURCE_DIR} ) @@ -13,7 +14,6 @@ ./contribTests.cpp ./TestHighlight.cpp ./TestSnowball.cpp - ./TestStreams.cpp ./TestUtf8.cpp ./TestAnalysis.cpp ./CuTest.cpp Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp 2010-08-24 13:42:35 UTC (rev 3026) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp 2010-08-24 15:00:54 UTC (rev 3027) @@ -1,18 +1,9 @@ -/** - * Copyright 2003-2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ #include "test.h" #include "CLucene/analysis/cjk/CJKAnalyzer.h" #include "CLucene/analysis/LanguageBasedAnalyzer.h" @@ -128,7 +119,7 @@ TCHAR tres[LUCENE_MAX_WORD_LEN]; while (results[pos] != NULL) { - CLUCENE_ASSERT(tokenizer->next(&tok) == true); + CLUCENE_ASSERT(tokenizer->next(&tok) != NULL); lucene_utf8towcs(tres, results[pos], LUCENE_MAX_WORD_LEN); CuAssertStrEquals(tc, _T("unexpected token value"), tres, tok.termBuffer()); @@ -166,9 +157,9 @@ a.setStem(false); ts = a.tokenStream(_T("contents"), &reader); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t) != NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("he")) == 0); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t) != NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("abhorred")) == 0); _CLDELETE(ts); @@ -178,11 +169,11 @@ a.setStem(true); ts = a.tokenStream(_T("contents"), &reader); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t) != NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("he")) == 0); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t) != NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("abhorred")) == 0); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t) != NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("accentuer")) == 0); _CLDELETE(ts); } Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/Tes... [truncated message content] |
From: <ust...@us...> - 2010-08-24 13:42:48
|
Revision: 3026 http://clucene.svn.sourceforge.net/clucene/?rev=3026&view=rev Author: ustramooner Date: 2010-08-24 13:42:35 +0000 (Tue, 24 Aug 2010) Log Message: ----------- merged git master Modified Paths: -------------- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp branches/lucene2_3_2/src/core/CLucene/CLConfig.h branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h branches/lucene2_3_2/src/core/CLucene/document/DateField.h branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp branches/lucene2_3_2/src/core/CLucene/document/DateTools.h branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/Terms.h branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserConstants.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/ConjunctionScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h branches/lucene2_3_2/src/core/CLucene/search/Hits.cpp branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.h branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/Query.h branches/lucene2_3_2/src/core/CLucene/search/RangeFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeFilter.h branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.h branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.cpp branches/lucene2_3_2/src/core/CLucene/store/_RAMDirectory.h branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp branches/lucene2_3_2/src/core/CLucene/util/BitSet.h branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp branches/lucene2_3_2/src/core/CLucene/util/_FastCharStream.h branches/lucene2_3_2/src/core/CLucene/util/_bufferedstream.h branches/lucene2_3_2/src/core/CLucene.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/core/files_list.txt branches/lucene2_3_2/src/demo/CMakeLists.txt branches/lucene2_3_2/src/demo/IndexFiles.cpp branches/lucene2_3_2/src/demo/Main.cpp branches/lucene2_3_2/src/demo/SearchFiles.cpp branches/lucene2_3_2/src/demo/Statistics.cpp branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/config/repl_tchar.h branches/lucene2_3_2/src/shared/CLucene/config/repl_wchar.h branches/lucene2_3_2/src/shared/CLucene/config/utf8.cpp branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h branches/lucene2_3_2/src/shared/CMakeLists.txt branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/CuTest.cpp branches/lucene2_3_2/src/test/CuTest.h branches/lucene2_3_2/src/test/analysis/TestAnalyzers.cpp branches/lucene2_3_2/src/test/document/TestDocument.cpp branches/lucene2_3_2/src/test/index/TestIndexReader.cpp branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp branches/lucene2_3_2/src/test/index/TestReuters.cpp branches/lucene2_3_2/src/test/index/TestThreading.cpp branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/search/TestDateFilter.cpp branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/store/TestStore.cpp branches/lucene2_3_2/src/test/test.h branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/src/core/CLucene/search/ConstantScoreQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/ConstantScoreQuery.h branches/lucene2_3_2/src/test/document/TestDateTools.cpp branches/lucene2_3_2/src/test/index/TestIndexModifier.cpp branches/lucene2_3_2/src/test/search/TestRangeFilter.cpp branches/lucene2_3_2/src/test/util/TestBitSet.cpp Removed Paths: ------------- branches/lucene2_3_2/src/demo/stdafx.cpp branches/lucene2_3_2/src/demo/stdafx.h Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -373,7 +373,7 @@ } _CLDELETE_ARRAY(frags); frags = _CL_NEWARRAY(TextFragment*,fragTexts.size()+1); - fragTexts.toArray(frags); + fragTexts.toArray(frags, true); } _CLDELETE(tokenGroup); Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -40,7 +40,7 @@ // Return extracted terms WeightedTerm** ret = _CL_NEWARRAY(WeightedTerm*,terms.size()+1); - terms.toArray(ret); + terms.toArray(ret, true); return ret; } @@ -95,7 +95,7 @@ // Return extracted terms WeightedTerm** ret = _CL_NEWARRAY(WeightedTerm*,terms.size()+1); - terms.toArray(ret); + terms.toArray(ret, true); return ret; } Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -97,8 +97,8 @@ } } */ //code to reconstruct the original sequence of Tokens - const TCHAR** terms=tpv->getTerms(); - const ValueArray<int32_t>* freq=tpv->getTermFrequencies(); + const ArrayBase<const TCHAR*>* terms = tpv->getTerms(); + const ArrayBase<int32_t>* freq=tpv->getTermFrequencies(); size_t totalTokens=0; for (int32_t i = 0; i < freq->length; i++) @@ -108,11 +108,11 @@ CLSetList<Token*,TokenOrderCompare>* unsortedTokens = NULL; for (int32_t t = 0; t < freq->length; t++) { - ArrayBase<TermVectorOffsetInfo*>* offsets=tpv->getOffsets(t); + const ArrayBase<TermVectorOffsetInfo*>* offsets=tpv->getOffsets(t); if(offsets==NULL) return NULL; - ValueArray<int32_t>* pos=NULL; + const ArrayBase<int32_t>* pos=NULL; if(tokenPositionsGuaranteedContiguous) { //try get the token position info to speed up assembly of tokens into sorted sequence @@ -129,7 +129,7 @@ unsortedTokens=_CLNEW CLSetList<Token*,TokenOrderCompare>(false); for (int32_t tp=0; tp < offsets->length; tp++) { - unsortedTokens->insert(_CLNEW Token(terms[t], + unsortedTokens->insert(_CLNEW Token((*terms)[t], (*offsets)[tp]->getStartOffset(), (*offsets)[tp]->getEndOffset())); } @@ -144,7 +144,7 @@ //tokens stored with positions - can use this to index straight into sorted array for (int32_t tp = 0; tp < pos->length; tp++) { - tokensInOriginalOrder[(*pos)[tp]]=_CLNEW Token(terms[t], + tokensInOriginalOrder[(*pos)[tp]]=_CLNEW Token((*terms)[t], (*offsets)[tp]->getStartOffset(), (*offsets)[tp]->getEndOffset()); } @@ -158,7 +158,7 @@ tokensInOriginalOrder = _CL_NEWARRAY(Token*,unsortedTokens->size()+1); } //the list has already sorted our items //todo:check that this is true... - unsortedTokens->toArray(tokensInOriginalOrder); + unsortedTokens->toArray(tokensInOriginalOrder, true); return _CLNEW StoredTokenStream(tokensInOriginalOrder,unsortedTokens->size()); }else Modified: branches/lucene2_3_2/src/core/CLucene/CLConfig.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -204,6 +204,21 @@ //////////////////////////////////////////////////////////////////// +// Date conversion settings for DateTools and DateField +//////////////////////////////////////////////////////////////////// +// +// DateField, which is now deprecated, had it's buffer size +// defined for 9 chars. DateTools currently is configured +// for 30 chars, but this needs to be revised after tests +// are written for those. +// +#define DATETOOLS_BUFFER_SIZE 30 +#define DATEFIELD_DATE_LEN DATETOOLS_BUFFER_SIZE +// +//////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////// // FuzzyQuery settings //////////////////////////////////////////////////////////////////// // Modified: branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -30,6 +30,7 @@ #include "CLucene/index/FieldInfos.cpp" #include "CLucene/index/FieldsReader.cpp" #include "CLucene/index/FieldsWriter.cpp" +#include "CLucene/index/IndexDeletionPolicy.cpp" #include "CLucene/index/IndexFileDeleter.cpp" #include "CLucene/index/IndexFileNameFilter.cpp" #include "CLucene/index/IndexFileNames.cpp" @@ -38,6 +39,7 @@ #include "CLucene/index/IndexReader.cpp" #include "CLucene/index/MergePolicy.cpp" #include "CLucene/index/MergeScheduler.cpp" +#include "CLucene/index/MultipleTermPositions.cpp" #include "CLucene/index/MultiReader.cpp" #include "CLucene/index/MultiSegmentReader.cpp" #include "CLucene/index/Payload.cpp" @@ -70,6 +72,7 @@ #include "CLucene/search/CachingWrapperFilter.cpp" #include "CLucene/search/ChainedFilter.cpp" #include "CLucene/search/Compare.cpp" +#include "CLucene/search/ConstantScoreQuery.cpp" #include "CLucene/search/DateFilter.cpp" #include "CLucene/search/ConjunctionScorer.cpp" #include "CLucene/search/DisjunctionSumScorer.cpp" @@ -85,6 +88,7 @@ #include "CLucene/search/HitQueue.cpp" #include "CLucene/search/IndexSearcher.cpp" #include "CLucene/search/MatchAllDocsQuery.cpp" +#include "CLucene/search/MultiPhraseQuery.cpp" #include "CLucene/search/MultiSearcher.cpp" #include "CLucene/search/MultiTermQuery.cpp" #include "CLucene/search/PhrasePositions.cpp" Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -14,22 +14,22 @@ CL_NS_DEF(analysis) struct Analyzer::Internal{ - CL_NS(util)::ThreadLocal<TokenStream*, - CL_NS(util)::Deletor::Object<TokenStream> >* tokenStreams; + CL_NS(util)::ThreadLocal<void*, + CL_NS(util)::Deletor::Object<void> >* tokenStreams; }; Analyzer::Analyzer(){ _internal = new Internal; - _internal->tokenStreams = _CLNEW CL_NS(util)::ThreadLocal<TokenStream*, - CL_NS(util)::Deletor::Object<TokenStream> >; + _internal->tokenStreams = _CLNEW CL_NS(util)::ThreadLocal<void*, + CL_NS(util)::Deletor::Object<void> >; } Analyzer::~Analyzer(){ - _CLDELETE(_internal->tokenStreams); + _CLLDELETE(_internal->tokenStreams); delete _internal; } -TokenStream* Analyzer::getPreviousTokenStream() { +void* Analyzer::getPreviousTokenStream() { return _internal->tokenStreams->get(); } -void Analyzer::setPreviousTokenStream(TokenStream* obj) { +void Analyzer::setPreviousTokenStream(void* obj) { _internal->tokenStreams->set(obj); } TokenStream* Analyzer::reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { @@ -257,17 +257,17 @@ { } TokenFilter::~TokenFilter(){ - close(); + if ( deleteTokenStream && input!=NULL ) {input->close();_CLLDELETE( input );} + //close(); -- ISH 04/11/09 } // Close the input TokenStream. void TokenFilter::close() { if ( input != NULL ){ input->close(); - if ( deleteTokenStream ) - _CLDELETE( input ); + //if ( deleteTokenStream ) _CLDELETE( input ); -- ISH 04/11/09 } - input = NULL; + //input = NULL; -- ISH 04/11/09 } Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -189,7 +189,7 @@ NOTE: subclasses must override at least one of {@link #next()} or {@link #next(Token)}. */ -class CLUCENE_EXPORT TokenStream:LUCENE_BASE { +class CLUCENE_EXPORT TokenStream { public: /** Returns the next token in the stream, or null at EOS. * When possible, the input Token should be used as the @@ -247,7 +247,7 @@ * WARNING: You must override one of the methods defined by this class in your * subclass or the Analyzer will enter an infinite loop. */ -class CLUCENE_EXPORT Analyzer:LUCENE_BASE{ +class CLUCENE_EXPORT Analyzer{ public: Analyzer(); @@ -275,12 +275,12 @@ /** Used by Analyzers that implement reusableTokenStream * to retrieve previously saved TokenStreams for re-use * by the same thread. */ - TokenStream* getPreviousTokenStream(); + void* getPreviousTokenStream(); /** Used by Analyzers that implement reusableTokenStream * to save a TokenStream for later re-use by the same * thread. */ - void setPreviousTokenStream(TokenStream* obj); + void setPreviousTokenStream(void* obj); public: /** * Invoked before indexing a Field instance if Modified: branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -181,7 +181,7 @@ StopFilter::~StopFilter(){ if (deleteStopTable) - _CLDELETE(stopWords); + _CLLDELETE(stopWords); } //static bool StopFilter::getEnablePositionIncrementsDefault() { @@ -252,9 +252,16 @@ { StopFilter::fillStopTable(stopTable,ENGLISH_STOP_WORDS); } +class StopAnalyzer::SavedStreams { +public: + Tokenizer* source; + TokenStream* result; +}; StopAnalyzer::~StopAnalyzer() { -_CLDELETE(stopTable); + SavedStreams* t = reinterpret_cast<SavedStreams*>(this->getPreviousTokenStream()); + if (t) _CLDELETE(t->result); + _CLDELETE(stopTable); } StopAnalyzer::StopAnalyzer( const TCHAR** stopWords): stopTable(_CLNEW CLTCSetList(true)) @@ -265,6 +272,19 @@ return _CLNEW StopFilter(_CLNEW LowerCaseTokenizer(reader),true, stopTable); } +/** Filters LowerCaseTokenizer with StopFilter. */ +TokenStream* StopAnalyzer::reusableTokenStream(const TCHAR* fieldName, Reader* reader) { + SavedStreams* streams = reinterpret_cast<SavedStreams*>(getPreviousTokenStream()); + if (streams == NULL) { + streams = _CLNEW SavedStreams(); + streams->source = _CLNEW LowerCaseTokenizer(reader); + streams->result = _CLNEW StopFilter(streams->source, true, stopTable); + setPreviousTokenStream(streams); + } else + streams->source->reset(reader); + return streams->result; +} + const TCHAR* StopAnalyzer::ENGLISH_STOP_WORDS[] = { _T("a"), _T("an"), _T("and"), _T("are"), _T("as"), _T("at"), _T("be"), _T("but"), _T("by"), @@ -281,8 +301,8 @@ } PerFieldAnalyzerWrapper::~PerFieldAnalyzerWrapper(){ analyzerMap->clear(); - _CLDELETE(analyzerMap); - _CLDELETE(defaultAnalyzer); + _CLLDELETE(analyzerMap); + _CLLDELETE(defaultAnalyzer); } void PerFieldAnalyzerWrapper::addAnalyzer(const TCHAR* fieldName, Analyzer* analyzer) { @@ -290,7 +310,7 @@ } TokenStream* PerFieldAnalyzerWrapper::tokenStream(const TCHAR* fieldName, Reader* reader) { - Analyzer* analyzer = (fieldName==NULL?defaultAnalyzer:analyzerMap->get((TCHAR*)fieldName)); + Analyzer* analyzer = analyzerMap->get(const_cast<TCHAR*>(fieldName)); if (analyzer == NULL) { analyzer = defaultAnalyzer; } @@ -298,16 +318,17 @@ return analyzer->tokenStream(fieldName, reader); } -TokenStream* PerFieldAnalyzerWrapper::reusableTokenStream(TCHAR* fieldName, CL_NS(util)::Reader* reader) { - Analyzer* analyzer = analyzerMap->get(fieldName); - if (analyzer == NULL) +TokenStream* PerFieldAnalyzerWrapper::reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { + Analyzer* analyzer = analyzerMap->get(const_cast<TCHAR*>(fieldName)); + if (analyzer == NULL){ analyzer = defaultAnalyzer; + } return analyzer->reusableTokenStream(fieldName, reader); } -int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(TCHAR* fieldName) { - Analyzer* analyzer = analyzerMap->get(fieldName); +int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(const TCHAR* fieldName) { + Analyzer* analyzer = analyzerMap->get(const_cast<TCHAR*>(fieldName)); if (analyzer == NULL) analyzer = defaultAnalyzer; return analyzer->getPositionIncrementGap(fieldName); @@ -511,22 +532,20 @@ if (!done) { done = true; int32_t upto = 0; - int32_t rd; token->clear(); - TCHAR* termBuffer=token->termBuffer(); - const TCHAR* readBuffer=NULL; - assert(false);//test me + if (token->termBuffer() == NULL) + token->growBuffer(10); // todo + const TCHAR* termBuffer=token->termBuffer(); + //assert(false);//test me; while (true) { - rd = input->read(readBuffer, 1, cl_min(bufferSize, token->bufferLength()-upto) ); - if (rd == -1) - break; + int32_t length = input->read(termBuffer, 1, cl_min(bufferSize, token->bufferLength()-upto) ); + if (length == -1) break; + upto += length; if ( upto == token->bufferLength() ){ - termBuffer = token->resizeTermBuffer(token->bufferLength() + 8); + termBuffer = token->resizeTermBuffer(token->bufferLength() + 8); // todo: compare to JL } - _tcsncpy(termBuffer + upto, readBuffer, rd); - upto += rd; } - termBuffer[upto]=0; + token->termBuffer()[upto]=0; token->setTermLength(upto); return token; } Modified: branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -89,8 +89,7 @@ WhitespaceTokenizer(CL_NS(util)::Reader* in); virtual ~WhitespaceTokenizer(); protected: - /** Collects only characters which do not satisfy _istspace. - */ + /** Collects only characters which do not satisfy _istspace.*/ bool isTokenChar(const TCHAR c) const; }; @@ -237,6 +236,7 @@ /** Filters LetterTokenizer with LowerCaseFilter and StopFilter. */ class CLUCENE_EXPORT StopAnalyzer: public Analyzer { CLTCSetList* stopTable; + class SavedStreams; public: /** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */ @@ -258,7 +258,8 @@ /** Filters LowerCaseTokenizer with StopFilter. */ TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); - + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + /** An array containing some common English words that are not usually useful for searching. */ static const TCHAR* ENGLISH_STOP_WORDS[]; @@ -312,10 +313,10 @@ void addAnalyzer(const TCHAR* fieldName, Analyzer* analyzer); TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); - TokenStream* reusableTokenStream(TCHAR* fieldName, CL_NS(util)::Reader* reader); + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); /** Return the positionIncrementGap from the analyzer assigned to fieldName */ - int32_t getPositionIncrementGap(TCHAR* fieldName); + int32_t getPositionIncrementGap(const TCHAR* fieldName); }; Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -20,19 +20,19 @@ CL_NS_DEF2(analysis,standard) StandardAnalyzer::StandardAnalyzer(): - stopSet(_CLNEW CLTCSetList(true)) + stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS); } StandardAnalyzer::StandardAnalyzer( const TCHAR** stopWords): - stopSet(_CLNEW CLTCSetList(true)) + stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,stopWords ); } StandardAnalyzer::StandardAnalyzer(const char* stopwordsFile, const char* enc): - stopSet(_CLNEW CLTCSetList(true)) + stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { if ( enc == NULL ) enc = "ASCII"; @@ -40,28 +40,63 @@ } StandardAnalyzer::StandardAnalyzer(CL_NS(util)::Reader* stopwordsReader, const bool _bDeleteReader): - stopSet(_CLNEW CLTCSetList(true)) + stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { WordlistLoader::getWordSet(stopwordsReader, stopSet, _bDeleteReader); } + class StandardAnalyzer::SavedStreams { + public: + StandardTokenizer* tokenStream; + TokenStream* filteredTokenStream; + + SavedStreams():tokenStream(NULL), filteredTokenStream(NULL) + { + } + }; + StandardAnalyzer::~StandardAnalyzer(){ - _CLDELETE(stopSet); + SavedStreams* t = reinterpret_cast<SavedStreams*>(this->getPreviousTokenStream()); + if (t) _CLDELETE(t->filteredTokenStream); + _CLLDELETE(stopSet); } - TokenStream* StandardAnalyzer::tokenStream(const TCHAR* fieldName, Reader* reader) { BufferedReader* bufferedReader = reader->__asBufferedReader(); TokenStream* ret; if ( bufferedReader == NULL ) - ret = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true ); + ret = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true ); else ret = _CLNEW StandardTokenizer(bufferedReader); + //ret->setMaxTokenLength(maxTokenLength); ret = _CLNEW StandardFilter(ret,true); ret = _CLNEW LowerCaseFilter(ret,true); ret = _CLNEW StopFilter(ret,true, stopSet); return ret; } + + TokenStream* StandardAnalyzer::reusableTokenStream(const TCHAR* fieldName, Reader* reader){ + SavedStreams* streams = reinterpret_cast<SavedStreams*>(getPreviousTokenStream()); + if (streams == NULL) { + streams = _CLNEW SavedStreams(); + setPreviousTokenStream(streams); + + BufferedReader* bufferedReader = reader->__asBufferedReader(); + if ( bufferedReader == NULL ) + streams->tokenStream = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true); + else + streams->tokenStream = _CLNEW StandardTokenizer(bufferedReader); + + streams->filteredTokenStream = _CLNEW StandardFilter(streams->tokenStream, true); + streams->filteredTokenStream = _CLNEW LowerCaseFilter(streams->filteredTokenStream, true); + streams->filteredTokenStream = _CLNEW StopFilter(streams->filteredTokenStream, true, stopSet); + } else { + streams->tokenStream->reset(reader); + } + //streams->tokenStream->setMaxTokenLength(maxTokenLength); + + return streams->filteredTokenStream; + } CL_NS_END2 Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -21,7 +21,13 @@ { private: CLTCSetList* stopSet; + int32_t maxTokenLength; + + class SavedStreams; public: + /** Default maximum allowed token length */ + LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_MAX_TOKEN_LENGTH = 255); + /** Builds an analyzer.*/ StandardAnalyzer(); @@ -38,13 +44,32 @@ */ StandardAnalyzer(CL_NS(util)::Reader* stopwordsReader, const bool _bDeleteReader = false); - ~StandardAnalyzer(); + virtual ~StandardAnalyzer(); - /** - * Constructs a StandardTokenizer filtered by a - * StandardFilter, a LowerCaseFilter and a StopFilter. - */ - TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + /** + * Constructs a StandardTokenizer filtered by a + * StandardFilter, a LowerCaseFilter and a StopFilter. + */ + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + + TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + + /** + * Set maximum allowed token length. If a token is seen + * that exceeds this length then it is discarded. This + * setting only takes effect the next time tokenStream or + * reusableTokenStream is called. + */ + void setMaxTokenLength(const int32_t length) { + maxTokenLength = length; + } + + /** + * @see #setMaxTokenLength + */ + int getMaxTokenLength() const { + return maxTokenLength; + } }; CL_NS_END2 #endif Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -22,7 +22,7 @@ // Construct filtering <i>in</i>. StandardFilter(TokenStream* in, bool deleteTokenStream); - ~StandardFilter(); + virtual ~StandardFilter(); /** Returns the next token in the stream, or NULL at EOS. Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -131,6 +131,11 @@ return t; } + void StandardTokenizer::reset(Reader* _input) { + this->input = _input; + if (rd->input==NULL) rd->input = _input->__asBufferedReader(); + } + Token* StandardTokenizer::next(Token* t) { int ch=0; Modified: branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -57,7 +57,7 @@ // Constructs a tokenizer for this Reader. StandardTokenizer(CL_NS(util)::BufferedReader* reader, bool deleteReader=false); - ~StandardTokenizer(); + virtual ~StandardTokenizer(); /** Returns the next token in the stream, or false at end-of-stream. * The returned token's type is set to an element of @@ -80,6 +80,8 @@ // Reads CJK characters Token* ReadCJK(const TCHAR prev, Token* t); + + virtual void reset(CL_NS(util)::Reader* _input); }; CL_NS_END2 Modified: branches/lucene2_3_2/src/core/CLucene/document/DateField.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/DateField.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/document/DateField.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -14,8 +14,6 @@ //make date strings long enough to last a millenium #define DATEFIELD_DATE_MAX _ILONGLONG(31536000000000) //1000L*365*24*60*60*1000 -#define DATEFIELD_DATE_LEN 9 ////Long.toString(DATEFIELD_DATE_MAX, Character.MAX_RADIX).length() - /** * Provides support for converting dates to strings and vice-versa. * The strings are structured so that lexicographic sorting orders by date, @@ -35,9 +33,9 @@ * @deprecated If you build a new index, use {@link lucene::document::DateTools} instead. * This class is included for use with existing indices and will be removed in a future release. */ -class CLUCENE_EXPORT DateField :LUCENE_BASE { +class CLUCENE_EXPORT DateField { public: - ~DateField(); + virtual ~DateField(); /** * Converts a millisecond time to a string suitable for indexing. Modified: branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -6,37 +6,23 @@ ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" -#ifdef _CL_TIME_WITH_SYS_TIME -# include <sys/time.h> -# include <time.h> -#else -# if defined(_CL_HAVE_SYS_TIME_H) -# include <sys/time.h> -# else -# include <time.h> -# endif -#endif - -#ifdef _CL_HAVE_SYS_TIMEB_H -# include <sys/timeb.h> -#endif - #include "DateTools.h" #include "CLucene/util/Misc.h" CL_NS_USE(util) CL_NS_DEF(document) -#define DATETOOLS_BUFFER_SIZE 30 - TCHAR* DateTools::timeToString(const int64_t time, Resolution resolution /*= MILLISECOND_FORMAT*/) { TCHAR* buf = _CL_NEWARRAY(TCHAR, DATETOOLS_BUFFER_SIZE); timeToString(time, resolution, buf, DATETOOLS_BUFFER_SIZE); return buf; } -void DateTools::timeToString(const int64_t time, Resolution resolution, TCHAR* buf, size_t bufLength) { - time_t secs = time / 1000; +void DateTools::timeToString(const int64_t time, Resolution resolution, TCHAR* buf, size_t bufLength) +{ + // Take into account TZ and DST differences which may appear when using gmtime below + const int64_t diff_secs = getDifferenceFromGMT(); + time_t secs = time / 1000 + diff_secs; tm *ptm = gmtime(&secs); char abuf[DATETOOLS_BUFFER_SIZE]; @@ -62,9 +48,14 @@ STRCPY_AtoT(buf,abuf, bufLength); } +tm* DateTools::stringToDate(const TCHAR* dateString){ + const int64_t time = stringToTime(dateString); + time_t secs = time / 1000; + tm *ptm = gmtime(&secs); + return ptm; +} int64_t DateTools::stringToTime(const TCHAR* dateString) { - int64_t ret = 0; tm s_time; memset(&s_time, 0, sizeof(s_time)); s_time.tm_mday=1; @@ -168,14 +159,85 @@ } time_t t = mktime(&s_time); - if (t == -1) + if (t == -1) _CLTHROWA(CL_ERR_Parse, "Input is not valid date string"); - ret = (static_cast<int64_t>(t) * 1000) + ms; + // Get TZ difference in seconds, and calc it in + const int64_t diff_secs = getDifferenceFromGMT(); - return ret; + return (static_cast<int64_t>(t + diff_secs) * 1000) + ms; } +int64_t DateTools::getDifferenceFromGMT() +{ + struct tm *tptr; + time_t secs, local_secs, gmt_secs; + time( &secs ); // Current time in GMT + tptr = localtime( &secs ); + local_secs = mktime( tptr ); + tptr = gmtime( &secs ); + gmt_secs = mktime( tptr ); + return int64_t(local_secs - gmt_secs); +} + +int64_t DateTools::timeMakeInclusive(const int64_t time) +{ + time_t secs = time / 1000; + tm *ptm = localtime(&secs); // use localtime since mktime below will convert the time to GMT before returning + ptm->tm_hour = 23; + ptm->tm_min = 59; + ptm->tm_sec = 59; + + time_t t = mktime(ptm); + if (t == -1) + _CLTHROWA(CL_ERR_Parse, "Input is not a valid date"); + + return (static_cast<int64_t>(t) * 1000) + 999; +} + +int64_t DateTools::getTime(unsigned short year, uint8_t month, uint8_t mday, uint8_t hours, + uint8_t minutes, uint8_t seconds, unsigned short ms) +{ + struct tm* s_time; + + // get current time, and then change it according to the parameters + time_t rawtime; + time ( &rawtime ); + s_time = localtime ( &rawtime ); // use localtime, since mktime will take into account TZ differences + s_time->tm_isdst = 0; // since we are using gmtime all around, make sure DST is off + + s_time->tm_year = year - 1900; + s_time->tm_mon = month - 1; + s_time->tm_mday = mday; + s_time->tm_hour = hours; + s_time->tm_min = minutes; + s_time->tm_sec = seconds; + + time_t t = mktime(s_time); + if (t == -1) + _CLTHROWA(CL_ERR_Parse, "Input is not a valid date"); + + return (static_cast<int64_t>(t) * 1000) + ms; +} + +TCHAR* DateTools::getISOFormat(const int64_t time){ + const time_t secs = time / 1000; + const int64_t ms = abs((int32_t)((secs * 1000) - time)); + tm *ptm = gmtime(&secs); + return getISOFormat(ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_hour, ptm->tm_min, + ptm->tm_sec, ms); +} + +TCHAR* DateTools::getISOFormat(unsigned short year, uint8_t month, uint8_t mday, uint8_t hours, + uint8_t minutes, uint8_t seconds, unsigned short ms) +{ + TCHAR* ISOString = _CL_NEWARRAY(TCHAR, 24); + cl_stprintf(ISOString, 24, _T("%04d-%02d-%02d %02d:%02d:%02d:%03d"), year, month, mday, + hours, minutes, seconds, ms); + ISOString[23] = '\0'; + return ISOString; +} + DateTools::~DateTools(){ } Modified: branches/lucene2_3_2/src/core/CLucene/document/DateTools.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/document/DateTools.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/document/DateTools.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -7,10 +7,24 @@ #ifndef _lucene_document_DateTools_ #define _lucene_document_DateTools_ +#ifdef _CL_TIME_WITH_SYS_TIME +# include <sys/time.h> +# include <time.h> +#else +# if defined(_CL_HAVE_SYS_TIME_H) +# include <sys/time.h> +# else +# include <time.h> +# endif +#endif + +#ifdef _CL_HAVE_SYS_TIMEB_H +# include <sys/timeb.h> +#endif + CL_NS_DEF(document) - -class CLUCENE_EXPORT DateTools :LUCENE_BASE { +class CLUCENE_EXPORT DateTools { public: enum Resolution { @@ -48,7 +62,32 @@ */ static int64_t stringToTime(const TCHAR* dateString); - ~DateTools(); + static tm* stringToDate(const TCHAR* dateString); + + /**** + + * CLucene specific methods + + *****/ + + /** + * Returns a 64bit time value based on the parameters passed + */ + static int64_t getTime(unsigned short year, uint8_t month, uint8_t mday, uint8_t hours = 0, + uint8_t minutes = 0, uint8_t seconds = 0, unsigned short ms = 0); + + /** + * Returns a 64bit time value which is inclusive of the whole last day. + */ + static int64_t timeMakeInclusive(const int64_t time); + + inline static int64_t getDifferenceFromGMT(); + + static TCHAR* getISOFormat(const int64_t time); + static TCHAR* getISOFormat(unsigned short year, uint8_t month, uint8_t mday, uint8_t hours = 0, + uint8_t minutes = 0, uint8_t seconds = 0, unsigned short ms = 0); + + virtual ~DateTools(); }; CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -73,6 +73,8 @@ writeLock->release(); // release write lock writeLock = NULL; } + + _CLLDELETE(deleter); } else commitChanges(); Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -109,15 +109,25 @@ docStoreOffset = nextDocID = numDocsInRAM = numDocsInStore = nextWriteDocID = 0; } DocumentsWriter::~DocumentsWriter(){ - _CLDELETE(bufferedDeleteTerms); - _CLDELETE(skipListWriter); - _CLDELETE_ARRAY(copyByteBuffer); - _CLDELETE(_files); - _CLDELETE(fieldInfos); + _CLLDELETE(bufferedDeleteTerms); + _CLLDELETE(skipListWriter); + _CLDELETE_LARRAY(copyByteBuffer); + _CLLDELETE(_files); + _CLLDELETE(fieldInfos); for(size_t i=0;i<threadStates.length;i++) { - _CLDELETE(threadStates.values[i]); + _CLLDELETE(threadStates.values[i]); } + + // Make sure unused posting slots aren't attempted delete on + if (this->postingsFreeListDW.values){ + if (this->postingsFreeCountDW > this->postingsFreeListDW.length) { + memset(this->postingsFreeListDW.values + this->postingsFreeCountDW + , NULL + , sizeof(Posting*)); + } + postingsFreeListDW.deleteUntilNULL(); + } } void DocumentsWriter::setInfoStream(std::ostream* infoStream) { @@ -1322,7 +1332,7 @@ numToFree = postingsFreeChunk; else numToFree = this->postingsFreeCountDW; - for ( size_t i = this->postingsFreeCountDW-numToFree;i< this->postingsFreeListDW.length; i++ ){ + for ( size_t i = this->postingsFreeCountDW-numToFree;i< this->postingsFreeCountDW; i++ ){ _CLDELETE(this->postingsFreeListDW.values[i]); } this->postingsFreeCountDW -= numToFree; Modified: branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -852,7 +852,7 @@ if (!field->isTokenized()) { // un-tokenized field const TCHAR* stringValue = field->stringValue(); - size_t valueLength = _tcslen(stringValue); + const size_t valueLength = _tcslen(stringValue); Token* token = localToken; token->clear(); Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -294,6 +294,7 @@ uncompress(*b, data); }_CLFINALLY( _CLDELETE(b) ) +#ifndef _ASCII //convert to utf8 TCHAR* result = _CL_NEWARRAY(TCHAR, data.length); size_t l = lucene_utf8towcs(result, (const char*)data.values, data.length); @@ -303,12 +304,17 @@ if ( l < data.length/2 ){ TCHAR* tmp = result; result = STRDUP_TtoT(result); - _CLDELETE_ARRAY(tmp); + _CLDELETE_LCARRAY(tmp); } f = _CLNEW Field(fi->name, // field name result, // uncompress the value and add as string bits, false); +#else + f = _CLNEW Field(fi->name, // field name + reinterpret_cast<char*>(data.values), // uncompress the value and add as string + bits, false); +#endif f->setOmitNorms(fi->omitNorms); } else { bits |= Field::STORE_YES; @@ -445,6 +451,7 @@ _resetValue(); uncompress(b, uncompressed); //no need to catch error, memory all in frame +#ifndef _ASCII TCHAR* str = _CL_NEWARRAY(TCHAR, uncompressed.length); size_t l = lucene_utf8towcs(str, (const char*)uncompressed.values, uncompressed.length); str[l] = 0; @@ -452,11 +459,14 @@ if ( l < uncompressed.length/2 ){ //too pesimistic with size... fieldsData = STRDUP_TtoT(str); - _CLDELETE_ARRAY(str); + _CLDELETE_LCARRAY(str); }else{ fieldsData = str; } - } else { +#else + fieldsData = uncompressed.values; +#endif + } else { //read in chars b/c we already know the length we need to read TCHAR* chars = _CL_NEWARRAY(TCHAR, toRead+1); localFieldsStream->readChars(chars, 0, toRead); Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -11,6 +11,7 @@ #include "IndexReader.h" #include "CLucene/store/FSDirectory.h" #include "CLucene/document/Document.h" +#include "MergeScheduler.h" CL_NS_DEF(index) CL_NS_USE(util) @@ -28,19 +29,21 @@ } void IndexModifier::init(Directory* directory, Analyzer* analyzer, bool create) { - indexWriter = NULL; + indexWriter = NULL; indexReader = NULL; - this->analyzer = analyzer; open = false; + infoStream = NULL; useCompoundFile = true; this->maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS; this->maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; this->mergeFactor = IndexWriter::DEFAULT_MERGE_FACTOR; - this->directory = _CL_POINTER(directory); - createIndexReader(); - open = true; + this->directory = _CL_POINTER(directory); + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) + this->analyzer = analyzer; + indexWriter = _CLNEW IndexWriter(directory, analyzer, create); + open = true; } IndexModifier::~IndexModifier(){ @@ -60,10 +63,16 @@ _CLDELETE(indexReader); } indexWriter = _CLNEW IndexWriter(directory, analyzer, false); + // IndexModifier cannot use ConcurrentMergeScheduler + // because it synchronizes on the directory which can + // cause deadlock + indexWriter->setMergeScheduler(_CLNEW SerialMergeScheduler()); + indexWriter->setInfoStream(infoStream); indexWriter->setUseCompoundFile(useCompoundFile); - //indexWriter->setMaxBufferedDocs(maxBufferedDocs); + if (maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH) + indexWriter->setMaxBufferedDocs(maxBufferedDocs); indexWriter->setMaxFieldLength(maxFieldLength); - //indexWriter->setMergeFactor(mergeFactor); + indexWriter->setMergeFactor(mergeFactor); } } @@ -191,13 +200,13 @@ } void IndexModifier::close() { + SCOPED_LOCK_MUTEX(directory->THIS_LOCK) if (!open) - return; - SCOPED_LOCK_MUTEX(directory->THIS_LOCK) + _CLTHROWA(CL_ERR_IllegalState, "Index is closed already"); if (indexWriter != NULL) { indexWriter->close(); _CLDELETE(indexWriter); - } else { + } else if (indexReader != NULL) { indexReader->close(); _CLDELETE(indexReader); } Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -15,10 +15,8 @@ CL_CLASS_DEF(index, Term) CL_CLASS_DEF(index, TermDocs) CL_CLASS_DEF(index, TermEnum) + #include "CLucene/analysis/AnalysisHeader.h" -//#include "Term.h" -//#include "IndexWriter.h" -//#include "IndexReader.h" CL_NS_DEF(index) @@ -77,7 +75,7 @@ * * @deprecated Please use {@link IndexWriter} instead. */ -class CLUCENE_EXPORT IndexModifier :LUCENE_BASE{ +class CLUCENE_EXPORT IndexModifier { protected: IndexWriter* indexWriter; IndexReader* indexReader; @@ -87,6 +85,7 @@ bool open; // Lucene defaults: + std::ostream* infoStream; bool useCompoundFile; int32_t maxBufferedDocs; int32_t maxFieldLength; @@ -103,8 +102,6 @@ * <code>false</code> to append to the existing index */ IndexModifier(CL_NS(store)::Directory* directory, CL_NS(analysis)::Analyzer* analyzer, bool create); - - ~IndexModifier(); /** * Open an index with write access. @@ -115,6 +112,8 @@ * <code>false</code> to append to the existing index */ IndexModifier(const char* dirName, CL_NS(analysis)::Analyzer* analyzer, bool create); + + virtual ~IndexModifier(); protected: Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -97,7 +97,7 @@ // Destroys the instance and releases the writeLock if needed //Pre - true //Post - The instance has been destroyed if pre(writeLock) exists is has been released - _CLDELETE(_internal); + _CLLDELETE(_internal); } IndexReader* IndexReader::open(const char* path, bool closeDirectoryOnCleanup, IndexDeletionPolicy* deletionPolicy){ Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -73,18 +73,19 @@ IndexWriter::~IndexWriter(){ if (writeLock != NULL) { writeLock->release(); // release write lock - _CLDELETE(writeLock); + _CLLDELETE(writeLock); } - _CLDELETE(segmentInfos); - _CLDELETE(mergingSegments); - _CLDELETE(pendingMerges); - _CLDELETE(runningMerges); - _CLDELETE(mergeExceptions); - _CLDELETE(segmentsToOptimize); - _CLDELETE(mergeScheduler); - _CLDELETE(mergePolicy); - _CLDELETE(deleter); - _CLDELETE(docWriter); + _CLLDELETE(segmentInfos); + _CLLDELETE(mergingSegments); + _CLLDELETE(pendingMerges); + _CLLDELETE(runningMerges); + _CLLDELETE(mergeExceptions); + _CLLDELETE(segmentsToOptimize); + _CLLDELETE(mergeScheduler); + _CLLDELETE(mergePolicy); + _CLLDELETE(deleter); + _CLLDELETE(docWriter); + if (bOwnsDirectory) _CLLDECDELETE(directory); delete _internal; } @@ -148,19 +149,19 @@ return termIndexInterval; } -IndexWriter::IndexWriter(const char* path, Analyzer* a, bool create){ - init(FSDirectory::getDirectory(path), a, create, true, (IndexDeletionPolicy*)NULL, true); +IndexWriter::IndexWriter(const char* path, Analyzer* a, bool create):bOwnsDirectory(true){ + init(FSDirectory::getDirectory(path, create), a, create, true, (IndexDeletionPolicy*)NULL, true); } -IndexWriter::IndexWriter(Directory* d, Analyzer* a, bool create, bool closeDir){ +IndexWriter::IndexWriter(Directory* d, Analyzer* a, bool create, bool closeDir):bOwnsDirectory(false){ init(d, a, create, closeDir, NULL, true); } -IndexWriter::IndexWriter(Directory* d, bool autoCommit, Analyzer* a, IndexDeletionPolicy* deletionPolicy, bool closeDirOnShutdown){ +IndexWriter::IndexWriter(Directory* d, bool autoCommit, Analyzer* a, IndexDeletionPolicy* deletionPolicy, bool closeDirOnShutdown):bOwnsDirectory(false){ init(d, a, closeDirOnShutdown, deletionPolicy, autoCommit); } -IndexWriter::IndexWriter(Directory* d, bool autoCommit, Analyzer* a, bool create, IndexDeletionPolicy* deletionPolicy, bool closeDirOnShutdown){ +IndexWriter::IndexWriter(Directory* d, bool autoCommit, Analyzer* a, bool create, IndexDeletionPolicy* deletionPolicy, bool closeDirOnShutdown):bOwnsDirectory(false){ init(d, a, create, closeDirOnShutdown, deletionPolicy, autoCommit); } @@ -172,7 +173,8 @@ } } -void IndexWriter::init(Directory* d, Analyzer* a, const bool create, const bool closeDir, IndexDeletionPolicy* deletionPolicy, const bool autoCommit){ +void IndexWriter::init(Directory* d, Analyzer* a, const bool create, const bool closeDir, + IndexDeletionPolicy* deletionPolicy, const bool autoCommit){ this->_internal = new Internal(this); this->termIndexInterval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL; this->mergeScheduler = _CLNEW SerialMergeScheduler(); //TODO: implement and use ConcurrentMergeScheduler @@ -410,6 +412,7 @@ return IndexWriter::defaultInfoStream; } +//TODO: infoStream - unicode void IndexWriter::setInfoStream(std::ostream* infoStream) { ensureOpen(); this->infoStream = infoStream; @@ -596,8 +599,8 @@ try { CompoundFileWriter cfsWriter(directory, compoundFileName.c_str()); - const int32_t size = files.size(); - for(int32_t i=0;i<size;i++) + const size_t size = files.size(); + for(size_t i=0;i<size;++i) cfsWriter.addFile(files[i].c_str()); // Perform the merge @@ -2215,6 +2218,7 @@ reader->doCommit(); } _CLFINALLY ( reader->doClose(); + _CLLDELETE(reader); ) } ) @@ -2287,8 +2291,10 @@ if (deleteIds.size() > 0) { vector<int32_t>::const_iterator iter2 = deleteIds.begin(); - while(iter2 != deleteIds.end() ) + while (iter2 != deleteIds.end()){ reader->deleteDocument(*iter2); + ++iter2; + } } } Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -236,6 +236,7 @@ // where this index resides CL_NS(store)::Directory* directory; + bool bOwnsDirectory; int32_t getSegmentsCounter(); @@ -270,7 +271,7 @@ SegmentInfos* segmentInfos; // Release the write lock, if needed. - ~IndexWriter(); + virtual ~IndexWriter(); /** * The Java implementation of Lucene silently truncates any tokenized Modified: branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -93,8 +93,14 @@ termPositions.toArray(tps, true); _termPositionsQueue = _CLNEW TermPositionsQueue(tps,terms->length); + _CLDELETE_LARRAY(tps); } +MultipleTermPositions::~MultipleTermPositions() { + _CLLDELETE(_termPositionsQueue); + _CLLDELETE(_posList); +} + bool MultipleTermPositions::next() { if (_termPositionsQueue->size() == 0) return false; @@ -114,6 +120,7 @@ else { _termPositionsQueue->pop(); tp->close(); + _CLLDELETE(tp); } } while (_termPositionsQueue->size() > 0 && _termPositionsQueue->peek()->doc() == _doc); @@ -132,8 +139,10 @@ TermPositions* tp = _termPositionsQueue->pop(); if (tp->skipTo(target)) _termPositionsQueue->put(tp); - else + else { tp->close(); + _CLLDELETE(tp); + } } return next(); } @@ -147,8 +156,11 @@ } void MultipleTermPositions::close() { - while (_termPositionsQueue->size() > 0) - _termPositionsQueue->pop()->close(); + while (_termPositionsQueue->size() > 0) { + TermPositions* tp = _termPositionsQueue->pop(); + tp->close(); + _CLLDELETE(tp); + } } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -32,7 +32,7 @@ * @exception IOException */ MultipleTermPositions(IndexReader* indexReader, const CL_NS(util)::ArrayBase<Term*>* terms); - virtual ~MultipleTermPositions(){} + virtual ~MultipleTermPositions(); bool next(); Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -51,11 +51,7 @@ } int32_t SegmentTermPositions::nextPosition() { - /* todo: DSR:CL_BUG: Should raise exception if proxCount == 0 at the - ** beginning of this method, as in - ** if (--proxCount == 0) throw ...; - ** The JavaDocs for TermPositions.nextPosition declare this constraint, - ** but CLucene doesn't enforce it. */ + // perform lazy skips if neccessary lazySkip(); proxCount--; return position += readDeltaPosition(); @@ -107,7 +103,7 @@ needToLoadPayload = false; } -void SegmentTermPositions::skipPositions(int32_t n) { +void SegmentTermPositions::skipPositions(const int32_t n) { for ( int32_t f = n; f > 0; f-- ) { // skip unread positions readDeltaPosition(); skipPayload(); Modified: branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -121,7 +121,7 @@ for (int32_t i = 1; i < maxNumberOfSkipLevels; i++) { if (skipStream[i] != NULL) { //skipStream[i]->close(); - _CLLDELETE(skipStream[i]); + _CLDELETE(skipStream[i]); // ISH: We actually do need to nullify pointer here } } } Modified: branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp 2010-08-24 13:42:35 UTC (rev 3026) @@ -144,12 +144,22 @@ //Pre - Term must be lexicographically greater than all previous Terms added. // Pointers of TermInfo ti (freqPointer and proxPointer) must be positive and greater than all previous. +// TODO: This is a hack. If _ASCII is defined, Misc::toString(const TCHAR*, int) will cause linking errors, +// at least on VS. Needs a prettier fix no doubt... ISH 2009-11-08 +#ifdef _ASCII + assert(compareToLastTerm(fieldNumber, termText, termTextLength) < 0 || + (isIndex && termTextLength == 0 && lastTermTextLength == 0)); +#else CND_PRECONDITION(compareToLastTerm(fieldNumber, termText, termTextLength) < 0 || (isIndex && termTextLength == 0 && lastTermTextLength == 0), - (string("Terms are out of order: field=") + Misc::toString(fieldInfos->fieldName(fieldNumber)) + " (number " + Misc::toString(fieldNumber) + ")" + - " lastField=" + Misc::toString(fieldInfos->fieldName(lastFieldNumber)) + " (number " + Misc::toString(lastFieldNumber) + ")" + - " text=" + Misc::toString(termText, termTextLength) + " lastText=" + Misc::toString(lastTermText.values, lastTermTextLength) + (string("Terms are out of order: field=") + Misc::toString(fieldInfos->fieldName(fieldNumber)) + + " (number " + Misc::toString(fieldNumber) + ")" + + " lastField=" + Misc::toString(fieldInfos->fieldName(lastFieldNumber)) + + " (number " + Misc::toString(lastFieldNumber) + ")" + + " text=" + Misc::toString(termText, termTextLength) + + " lastText=" + Misc::toString(lastTermText.values, lastTermTextLength) ).c_str() ); +#endif CND_PRECONDITION(ti->freqPointer >= lastTi->freqPointer, ("freqPointer out of order (" + Misc::toString(ti->freqPointer) + " < " + Misc::toString(lastTi->freqPointer) + ")").c_str()); CND_PRECONDITION(ti->proxPointer >= lastTi->proxPointer, ("proxPointer out of order (" + Misc::toString(ti->proxPointer) + " < " + Misc::toString(lastTi->proxPointer) + ")").c_str()); Modified: branches/lucene2_3_2/src/core/CLucene/index/Terms.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/Terms.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/Terms.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -23,7 +23,7 @@ @see IndexReader#termDocs() */ -class CLUCENE_EXPORT TermDocs: LUCENE_BASE { +class CLUCENE_EXPORT TermDocs { public: virtual ~TermDocs(){ } Modified: branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h 2009-09-27 13:29:39 UTC (rev 3025) +++ branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h 2010-08-24 13:42:35 UTC (rev 3026) @@ -228,7 +228,7 @@ int32_t endIndex; public: ByteSliceReader(); - ~ByteSliceReader(); + virtual ~B... [truncated message content] |
From: Itamar Syn-H. <it...@di...> - 2010-08-08 09:11:23
|
Hi all, For a year now there hasn't been a single commit to our SVN repository, making it out-of-date. Development with git feels more natural for us at CLucene, and it has been quite active recently. Since git repos can be accessed also with SVN tools (see below), we feel there is no point in mirroring our work on SVN. Therefore, this list is officially marked as dead, and may be deleted soon. I'm looking into providing a way of pushing commit notifications to those interested, as SF don't seem to support this with git. For now - you can watch the github repository (see below). This e-mail is also a reminder for any of you who still haven't updated their local copy to the latest version from our git master HEAD to do so. It is much faster than any of our previous versions, and we are actively working on killing bugs and memory leaks on it. I take this opportunity to thank you all for using CLucene. And as always - feedback, testing and patches are welcome. Details on the 2_3_2 branch - which the git master is based on - can be found here: http://clucene.sourceforge.net/download.shtml <http://clucene.sourceforge.net/download.shtml> Our main git repository: http://clucene.git.sourceforge.net/git/gitweb.cgi?p=clucene/clucene;a=summary I maintain a mirror of this repository on github (master branch only): http://github.com/synhershko/clucene Users who still prefer SVN can access the github repo using SVN tools. Just checkout as: svn checkout http://svn.github.com/synhershko/clucene.git <http://github.com/blog/626-announcing-svn-support> Tarballs and zips of the current trees can be downloaded from both Web UIs. Itamar. |
From: <ust...@us...> - 2009-09-27 13:29:45
|
Revision: 3025 http://clucene.svn.sourceforge.net/clucene/?rev=3025&view=rev Author: ustramooner Date: 2009-09-27 13:29:39 +0000 (Sun, 27 Sep 2009) Log Message: ----------- Merge commit 'origin/master' Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/test.h branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/.gitignore branches/lucene2_3_2/src/test/search/TestBoolean.cpp Added: branches/lucene2_3_2/.gitignore =================================================================== --- branches/lucene2_3_2/.gitignore (rev 0) +++ branches/lucene2_3_2/.gitignore 2009-09-27 13:29:39 UTC (rev 3025) @@ -0,0 +1,11 @@ +# git-ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ +obj +bin +msvc +build +~$* \ No newline at end of file Modified: branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp 2009-09-27 11:33:44 UTC (rev 3024) +++ branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp 2009-09-27 13:29:39 UTC (rev 3025) @@ -109,7 +109,9 @@ return maxClauseCount; } - void BooleanQuery::setMaxClauseCount(size_t maxClauseCount){ + void BooleanQuery::setMaxClauseCount(const size_t maxClauseCount){ + if (maxClauseCount < 1) + _CLTHROWA(CL_ERR_IllegalArgument, "maxClauseCount must be >= 1"); BooleanQuery::maxClauseCount = maxClauseCount; } Modified: branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h 2009-09-27 11:33:44 UTC (rev 3024) +++ branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h 2009-09-27 13:29:39 UTC (rev 3025) @@ -51,7 +51,7 @@ static size_t getMaxClauseCount(); /** Set the maximum number of clauses permitted. */ - static void setMaxClauseCount(size_t maxClauseCount); + static void setMaxClauseCount(const size_t maxClauseCount); /** Adds a clause to a boolean query. Clauses may be: * <ul> Modified: branches/lucene2_3_2/src/test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/test/CMakeLists.txt 2009-09-27 11:33:44 UTC (rev 3024) +++ branches/lucene2_3_2/src/test/CMakeLists.txt 2009-09-27 13:29:39 UTC (rev 3025) @@ -29,6 +29,7 @@ ./document/TestDocument.cpp ./document/TestNumberTools.cpp ./store/TestStore.cpp +./search/TestBoolean.cpp ./search/TestDateFilter.cpp ./search/TestForDuplicates.cpp ./search/TestQueries.cpp Added: branches/lucene2_3_2/src/test/search/TestBoolean.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestBoolean.cpp (rev 0) +++ branches/lucene2_3_2/src/test/search/TestBoolean.cpp 2009-09-27 13:29:39 UTC (rev 3025) @@ -0,0 +1,181 @@ +/*------------------------------------------------------------------------------ +* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team +* +* Distributable under the terms of either the Apache License (Version 2.0) or +* the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "test.h" + +/// TestBooleanQuery.java, ported 5/9/2009 +void testEquality(CuTest *tc) { + BooleanQuery* bq1 = _CLNEW BooleanQuery(); + Term* t = _CLNEW Term(_T("field"), _T("value1")); + bq1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + t = _CLNEW Term(_T("field"), _T("value2")); + bq1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + BooleanQuery* nested1 = _CLNEW BooleanQuery(); + t = _CLNEW Term(_T("field"), _T("nestedvalue1")); + nested1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + t = _CLNEW Term(_T("field"), _T("nestedvalue2")); + nested1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + bq1->add(nested1, true, BooleanClause::SHOULD); + + BooleanQuery* bq2 = _CLNEW BooleanQuery(); + t = _CLNEW Term(_T("field"), _T("value1")); + bq2->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + t = _CLNEW Term(_T("field"), _T("value2")); + bq2->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + BooleanQuery* nested2 = _CLNEW BooleanQuery(); + t = _CLNEW Term(_T("field"), _T("nestedvalue1")); + nested2->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + t = _CLNEW Term(_T("field"), _T("nestedvalue2")); + nested2->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + bq2->add(nested2, true, BooleanClause::SHOULD); + + CLUCENE_ASSERT(bq1->equals(bq2)); + + _CLLDELETE(bq1); + _CLLDELETE(bq2); +} +void testException(CuTest *tc) { + try { + BooleanQuery::setMaxClauseCount(0); + CuFail(tc, _T("setMaxClauseCount(0) did not throw an exception")); + } catch (CLuceneError&) { + // okay + } +} + +/// TestBooleanScorer.java, ported 5/9/2009 +void testBooleanScorer(CuTest *tc) { + const TCHAR* FIELD = _T("category"); + RAMDirectory directory; + + TCHAR* values[] = { _T("1"), _T("2"), _T("3"), _T("4"), NULL}; + + try { + WhitespaceAnalyzer a; + IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true); + for (size_t i = 0; values[i]!=NULL; i++) { + Document* doc = _CLNEW Document(); + doc->add(*_CLNEW Field(FIELD, values[i], Field::STORE_YES | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLLDELETE(doc); + } + writer->close(); + _CLLDELETE(writer); + + BooleanQuery* booleanQuery1 = _CLNEW BooleanQuery(); + Term *t = _CLNEW Term(FIELD, _T("1")); + booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + t = _CLNEW Term(FIELD, _T("2")); + booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); + _CLDECDELETE(t); + + BooleanQuery* query = _CLNEW BooleanQuery(); + query->add(booleanQuery1, true, BooleanClause::MUST); + t = _CLNEW Term(FIELD, _T("9")); + query->add(_CLNEW TermQuery(t), true, BooleanClause::MUST_NOT); + _CLDECDELETE(t); + + IndexSearcher *indexSearcher = _CLNEW IndexSearcher(&directory); + Hits *hits = indexSearcher->search(query); + CLUCENE_ASSERT(2 == hits->length()); // Number of matched documents + _CLLDELETE(hits); + _CLLDELETE(indexSearcher); + + _CLLDELETE(query); + } + catch (CLuceneError& e) { + CuFail(tc, e.twhat()); + } +} + +/// TestBooleanPrefixQuery.java, ported 5/9/2009 +void testBooleanPrefixQuery(CuTest* tc) { + RAMDirectory directory; + WhitespaceAnalyzer a; + + TCHAR* categories[] = {_T("food"), _T("foodanddrink"), + _T("foodanddrinkandgoodtimes"), _T("food and drink"), NULL}; + + Query* rw1 = NULL; + Query* rw2 = NULL; + try { + IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true); + for (size_t i = 0; categories[i]!=NULL; i++) { + Document* doc = new Document(); + doc->add(*_CLNEW Field(_T("category"), categories[i], Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + writer->addDocument(doc); + _CLLDELETE(doc); + } + writer->close(); + _CLLDELETE(writer); + + IndexReader* reader = IndexReader::open(&directory); + Term* t = _CLNEW Term(_T("category"), _T("foo")); + PrefixQuery* query = _CLNEW PrefixQuery(t); + _CLDECDELETE(t); + + rw1 = query->rewrite(reader); + + BooleanQuery* bq = _CLNEW BooleanQuery(); + bq->add(query, true, BooleanClause::MUST); + + rw2 = bq->rewrite(reader); + + reader->close(); // TODO: check necessity (_CLLDELETE(reader) alone will not do the same cleanup) + + _CLLDELETE(reader); + _CLLDELETE(bq); + } catch (CLuceneError& e) { + CuFail(tc, e.twhat()); + } + + BooleanQuery* bq1 = NULL; + if (rw1->instanceOf(BooleanQuery::getClassName())) { + bq1 = (BooleanQuery*) rw1; + } + + BooleanQuery* bq2 = NULL; + if (rw2->instanceOf(BooleanQuery::getClassName())) { + bq2 = (BooleanQuery*) rw2; + } else { + CuFail(tc, _T("Rewrite")); + } + + bool bClausesMatch = bq1->getClauseCount() == bq2->getClauseCount(); + + _CLLDELETE(rw1); + _CLLDELETE(rw2); + + if (!bClausesMatch) { + CuFail(tc, _T("Number of Clauses Mismatch")); + } +} + +CuSuite *testBoolean(void) +{ + CuSuite *suite = CuSuiteNew(_T("CLucene Boolean Tests")); + + SUITE_ADD_TEST(suite, testEquality); + SUITE_ADD_TEST(suite, testException); + + SUITE_ADD_TEST(suite, testBooleanScorer); + + SUITE_ADD_TEST(suite, testBooleanPrefixQuery); + + //_CrtSetBreakAlloc(1179); + + return suite; +} +// EOF Modified: branches/lucene2_3_2/src/test/test.h =================================================================== --- branches/lucene2_3_2/src/test/test.h 2009-09-27 11:33:44 UTC (rev 3024) +++ branches/lucene2_3_2/src/test/test.h 2009-09-27 13:29:39 UTC (rev 3025) @@ -57,6 +57,8 @@ CuSuite *testreuters(void); CuSuite *testdocument(void); CuSuite *testNumberTools(void); +CuSuite *testDateTools(void); +CuSuite *testBoolean(void); class English{ public: Modified: branches/lucene2_3_2/src/test/tests.cpp =================================================================== --- branches/lucene2_3_2/src/test/tests.cpp 2009-09-27 11:33:44 UTC (rev 3024) +++ branches/lucene2_3_2/src/test/tests.cpp 2009-09-27 13:29:39 UTC (rev 3025) @@ -20,6 +20,7 @@ {"priorityqueue", testpriorityqueue}, {"queryparser", testQueryParser}, {"mfqueryparser", testMultiFieldQueryParser}, + {"boolean", testBoolean}, {"search", testsearch}, {"queries", testqueries}, {"termvector",testtermvector}, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-09-27 11:33:58
|
Revision: 3024 http://clucene.svn.sourceforge.net/clucene/?rev=3024&view=rev Author: ustramooner Date: 2009-09-27 11:33:44 +0000 (Sun, 27 Sep 2009) Log Message: ----------- svn merge fix for master/svn Modified Paths: -------------- branches/lucene2_3_2/ChangeLog branches/lucene2_3_2/LGPL.license branches/lucene2_3_2/cmake/CLuceneDocs.cmake branches/lucene2_3_2/cmake/CreateClucenePackages.cmake branches/lucene2_3_2/cmake/DefineOptions.cmake branches/lucene2_3_2/doc/coding standards.txt branches/lucene2_3_2/doc/doxygen.css.cmake branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h branches/lucene2_3_2/src/contribs/benchmarker/Timer.h branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp branches/lucene2_3_2/src/contribs/benchmarker/Unit.h branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/PorterStemmer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/PorterStemmer.h branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryTermExtractor.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipcompressstream.h branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/util/gzipinputstream.h branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h branches/lucene2_3_2/src/core/CLucene/debug/error.cpp branches/lucene2_3_2/src/core/CLucene/debug/error.h branches/lucene2_3_2/src/core/CLucene/document/DateField.cpp branches/lucene2_3_2/src/core/CLucene/document/Document.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexDeletionPolicy.h branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h branches/lucene2_3_2/src/core/CLucene/index/Payload.h branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/_SkipListReader.h branches/lucene2_3_2/src/core/CLucene/index/_Term.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosReader.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer2.cpp branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.cpp branches/lucene2_3_2/src/core/CLucene/search/CachingWrapperFilter.h branches/lucene2_3_2/src/core/CLucene/search/ConjunctionScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/IndexSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.h branches/lucene2_3_2/src/core/CLucene/search/Query.h branches/lucene2_3_2/src/core/CLucene/search/ScorerDocQueue.cpp branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp branches/lucene2_3_2/src/core/CLucene/search/Similarity.cpp branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer2.h branches/lucene2_3_2/src/core/CLucene/search/_ConjunctionScorer.h branches/lucene2_3_2/src/core/CLucene/search/_DisjunctionSumScorer.h branches/lucene2_3_2/src/core/CLucene/search/_PhrasePositions.h branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.h branches/lucene2_3_2/src/core/CLucene/store/MMapInput.cpp branches/lucene2_3_2/src/core/CLucene/store/_MMap.h branches/lucene2_3_2/src/core/CLucene/store/_RAMDirectory.h branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp branches/lucene2_3_2/src/core/CLucene/util/VoidList.h branches/lucene2_3_2/src/core/CLucene/util/_ThreadLocal.h branches/lucene2_3_2/src/core/files_list.txt branches/lucene2_3_2/src/shared/CLucene/SharedHeader.cpp branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/config/_threads.h branches/lucene2_3_2/src/shared/CLucene/config/repl_tprintf.cpp branches/lucene2_3_2/src/shared/CLucene/config/repl_wctype.h branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h branches/lucene2_3_2/src/shared/README branches/lucene2_3_2/src/shared/cmake/CheckErrorHandling.cmake branches/lucene2_3_2/src/shared/cmake/CheckFloatByte.cmake branches/lucene2_3_2/src/shared/cmake/CheckFloatByte.cpp.in branches/lucene2_3_2/src/shared/cmake/CheckHashmaps.cmake branches/lucene2_3_2/src/shared/cmake/CheckNamespace.cmake branches/lucene2_3_2/src/shared/cmake/CheckPthread.cmake branches/lucene2_3_2/src/shared/cmake/CheckSnprintf.cmake branches/lucene2_3_2/src/shared/cmake/CheckStdCallFunctionExists.cmake branches/lucene2_3_2/src/shared/cmake/CheckStdCallFunctionExists.cpp.in branches/lucene2_3_2/src/shared/cmake/DefineDword.cmake branches/lucene2_3_2/src/shared/cmake/DefineFloat.cmake branches/lucene2_3_2/src/shared/cmake/DefineLongLongSyntax.cmake branches/lucene2_3_2/src/shared/cmake/DefineMAXPATHValue.cmake branches/lucene2_3_2/src/shared/cmake/DefineStaticSyntax.cmake branches/lucene2_3_2/src/shared/cmake/MacroChooseFunction.cmake branches/lucene2_3_2/src/shared/cmake/MacroChooseMisc.cmake branches/lucene2_3_2/src/shared/cmake/MacroChooseSymbol.cmake branches/lucene2_3_2/src/shared/cmake/MacroChooseType.cmake branches/lucene2_3_2/src/shared/cmake/MacroGetVariableValue.c.in branches/lucene2_3_2/src/shared/cmake/MacroGetVariableValue.cmake branches/lucene2_3_2/src/shared/cmake/Macro_ChooseStatus.cmake branches/lucene2_3_2/src/test/analysis/TestAnalysis.cpp branches/lucene2_3_2/src/test/data/StopWords.test branches/lucene2_3_2/src/test/document/TestDocument.cpp branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/search/TestForDuplicates.cpp branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/search/TestSearch.cpp branches/lucene2_3_2/src/test/search/TestTermVector.cpp branches/lucene2_3_2/src/test/util/TestPriorityQueue.cpp Modified: branches/lucene2_3_2/ChangeLog =================================================================== --- branches/lucene2_3_2/ChangeLog 2009-09-27 08:59:45 UTC (rev 3023) +++ branches/lucene2_3_2/ChangeLog 2009-09-27 11:33:44 UTC (rev 3024) @@ -1,17 +1,17 @@ -Removed jstreams namespace. Sorry, I couldn't think of a way to nicely deprecate jstreams. - -version 0.9.23: -Changes: -* Static object fields have been changed to method accessors (SortField::FIELDDOC now chould be accessed as SortField::FIELDDOC(), for example). Classes changed: FieldCache, ScoreDocComparator, - This was necessary for creating static libraries work on certain platforms. -* Folders were reorganised, this seems like a good time to do it -* Some deprecated functions were removed. -* moved platform configuration type code and utility code into 'shared' project. This enables tests to be built with a shared library on windows -* Moved moved of the platform specific logic into cmake in order to reduce #ifdefs in code (i love cmake!) -* added contributions code into the trunk. this will hopefully mean more exposure to the contributions. need to make clear about the licensing still, though. -* Deletor::Array was renamed to Deletor::vArray. -* re-worked the install location for system-dependent files (clucene-config.h). this was a confusing issue, and i think it's better to stick to the standards rather than push the more compatible (in my opinion) way of doing things. this one has been getting so many complaints from downstream. however, LUCENE_SYS_INCLUDES is available to install the clucene-config.h type files into the library directory (or any other place). - -Here is a summary of changes that you'll need to look at for this release: -* Action deprecated features. Some features that were deprecated for a long time have now been finally removed. -* fix things that may affect you, such as the LUCENE_SYS_INCLUDES change, and the reorganisation of code (install locations are still the same though). Also autotools removals may affect your work, depending on how you use clucene. +Removed jstreams namespace. Sorry, I couldn't think of a way to nicely deprecate jstreams. + +version 0.9.23: +Changes: +* Static object fields have been changed to method accessors (SortField::FIELDDOC now chould be accessed as SortField::FIELDDOC(), for example). Classes changed: FieldCache, ScoreDocComparator, + This was necessary for creating static libraries work on certain platforms. +* Folders were reorganised, this seems like a good time to do it +* Some deprecated functions were removed. +* moved platform configuration type code and utility code into 'shared' project. This enables tests to be built with a shared library on windows +* Moved moved of the platform specific logic into cmake in order to reduce #ifdefs in code (i love cmake!) +* added contributions code into the trunk. this will hopefully mean more exposure to the contributions. need to make clear about the licensing still, though. +* Deletor::Array was renamed to Deletor::vArray. +* re-worked the install location for system-dependent files (clucene-config.h). this was a confusing issue, and i think it's better to stick to the standards rather than push the more compatible (in my opinion) way of doing things. this one has been getting so many complaints from downstream. however, LUCENE_SYS_INCLUDES is available to install the clucene-config.h type files into the library directory (or any other place). + +Here is a summary of changes that you'll need to look at for this release: +* Action deprecated features. Some features that were deprecated for a long time have now been finally removed. +* fix things that may affect you, such as the LUCENE_SYS_INCLUDES change, and the reorganisation of code (install locations are still the same though). Also autotools removals may affect your work, depending on how you use clucene. Modified: branches/lucene2_3_2/LGPL.license =================================================================== --- branches/lucene2_3_2/LGPL.license 2009-09-27 08:59:45 UTC (rev 3023) +++ branches/lucene2_3_2/LGPL.license 2009-09-27 11:33:44 UTC (rev 3024) @@ -1,475 +1,475 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - -------------------------------------------------------------------------------- - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - -------------------------------------------------------------------------------- - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - -------------------------------------------------------------------------------- - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - -------------------------------------------------------------------------------- - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - -------------------------------------------------------------------------------- - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - -------------------------------------------------------------------------------- - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - -------------------------------------------------------------------------------- - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - -------------------------------------------------------------------------------- - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + +------------------------------------------------------------------------------- + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + +------------------------------------------------------------------------------- + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + +------------------------------------------------------------------------------- + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + +------------------------------------------------------------------------------- + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + +------------------------------------------------------------------------------- + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + +------------------------------------------------------------------------------- + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + +------------------------------------------------------------------------------- + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an... [truncated message content] |
From: <ust...@us...> - 2009-09-27 08:59:52
|
Revision: 3023 http://clucene.svn.sourceforge.net/clucene/?rev=3023&view=rev Author: ustramooner Date: 2009-09-27 08:59:45 +0000 (Sun, 27 Sep 2009) Log Message: ----------- fixed a bug in MultiSearcher refactoring, refactored to use ArrayBase<Query*> instead of Query** Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h branches/lucene2_3_2/src/core/CLucene/search/Query.h branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp 2009-09-27 08:59:45 UTC (rev 3023) @@ -214,12 +214,12 @@ return _CLNEW TopFieldDocs (totalHits, fieldDocs, hqlen, hqFields); } - Query* MultiSearcher::rewrite(Query* original) { - Query** queries = _CL_NEWARRAY(Query*,searchablesLen+1); - for (int32_t i = 0; i < searchablesLen; ++i) - queries[i] = searchables[i]->rewrite(original); - queries[searchablesLen]=NULL; - return original->combine(queries); + Query* MultiSearcher::rewrite(Query* query) { + // this is a bit of a hack. We know that a query which + // creates a Weight based on this Dummy-Searcher is + // always already rewritten (see preparedWeight()). + // Therefore we just return the unmodified query here + return query; } void MultiSearcher::explain(Query* query, int32_t doc, Explanation* ret) { Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.cpp 2009-09-27 08:59:45 UTC (rev 3023) @@ -80,9 +80,9 @@ return query; } - Query* MultiTermQuery::combine(Query** queries) { + Query* MultiTermQuery::combine(CL_NS(util)::ArrayBase<Query*>* queries) { return Query::mergeBooleanQueries(queries); - } + } /** Prints a user-readable version of this query. */ TCHAR* MultiTermQuery::toString(const TCHAR* field) const{ Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/MultiTermQuery.h 2009-09-27 08:59:45 UTC (rev 3023) @@ -43,20 +43,20 @@ /** Construct the enumeration to be used, expanding the pattern term. */ virtual FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader) = 0; public: - /** Constructs a query for terms matching <code>term</code>. */ - MultiTermQuery(CL_NS(index)::Term* t); + /** Constructs a query for terms matching <code>term</code>. */ + MultiTermQuery(CL_NS(index)::Term* t); - virtual ~MultiTermQuery(); + virtual ~MultiTermQuery(); - /** Returns the pattern term. */ - CL_NS(index)::Term* getTerm(bool pointer=true) const; + /** Returns the pattern term. */ + CL_NS(index)::Term* getTerm(bool pointer=true) const; - Query* combine(Query** queries); + Query* combine(CL_NS(util)::ArrayBase<Query*>* queries); - /** Prints a user-readable version of this query. */ - TCHAR* toString(const TCHAR* field) const; + /** Prints a user-readable version of this query. */ + TCHAR* toString(const TCHAR* field) const; - virtual Query* rewrite(CL_NS(index)::IndexReader* reader); + virtual Query* rewrite(CL_NS(index)::IndexReader* reader); }; CL_NS_END #endif Modified: branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp 2009-09-27 08:59:45 UTC (rev 3023) @@ -151,7 +151,7 @@ return query; } - Query* PrefixQuery::combine(Query** queries) { + Query* PrefixQuery::combine(CL_NS(util)::ArrayBase<Query*>* queries) { return Query::mergeBooleanQueries(queries); } Modified: branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h 2009-09-27 08:59:45 UTC (rev 3023) @@ -40,7 +40,7 @@ /** Returns the prefix of this query. */ CL_NS(index)::Term* getPrefix(bool pointer=true); - Query* combine(Query** queries); + Query* combine(CL_NS(util)::ArrayBase<Query*>* queries); Query* rewrite(CL_NS(index)::IndexReader* reader); Query* clone() const; bool equals(Query * other) const; Modified: branches/lucene2_3_2/src/core/CLucene/search/Query.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/Query.h 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/Query.h 2009-09-27 08:59:45 UTC (rev 3023) @@ -8,7 +8,7 @@ #define _lucene_search_Query_h -//#include "CLucene/index/IndexReader.h" +#include "CLucene/util/Array.h" CL_CLASS_DEF(index,IndexReader) //#include "Filter.h" //#include "Sort.h" @@ -67,20 +67,26 @@ /** Expert: called to re-write queries into primitive queries. */ virtual Query* rewrite(CL_NS(index)::IndexReader* reader); - + /** Expert: called when re-writing queries under MultiSearcher. * - * <p>Only implemented by derived queries, with no - * {@link #_createWeight(Searcher)} implementatation. - */ - virtual Query* combine(Query** queries); + * Create a single query suitable for use by all subsearchers (in 1-1 + * correspondence with queries). This is an optimization of the OR of + * all queries. We handle the common optimization cases of equal + * queries and overlapping clauses of boolean OR queries (as generated + * by MultiTermQuery.rewrite() and RangeQuery.rewrite()). + * Be careful overriding this method as queries[0] determines which + * method will be called and is not necessarily of the same type as + * the other queries. + */ + virtual Query* combine(CL_NS(util)::ArrayBase<Query*>* queries); /** Expert: merges the clauses of a set of BooleanQuery's into a single * BooleanQuery. * *<p>A utility for use by {@link #combine(Query[])} implementations. */ - static Query* mergeBooleanQueries(Query** queries); + static Query* mergeBooleanQueries(CL_NS(util)::ArrayBase<Query*>* queries); /** Expert: Returns the Similarity implementation to be used for this query. * Subclasses may override this method to specify their own Similarity Modified: branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp 2009-09-27 08:59:45 UTC (rev 3023) @@ -82,7 +82,7 @@ return "RangeQuery"; } - Query* RangeQuery::combine(Query** queries) { + Query* RangeQuery::combine(CL_NS(util)::ArrayBase<Query*>* queries) { return Query::mergeBooleanQueries(queries); } Modified: branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h 2009-09-27 08:59:45 UTC (rev 3023) @@ -61,7 +61,7 @@ Query* rewrite(CL_NS(index)::IndexReader* reader); - Query* combine(Query** queries); + Query* combine(CL_NS(util)::ArrayBase<Query*>* queries); // Prints a user-readable version of this query. TCHAR* toString(const TCHAR* field) const; Modified: branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp 2009-09-27 08:58:40 UTC (rev 3022) +++ branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp 2009-09-27 08:59:45 UTC (rev 3023) @@ -12,6 +12,7 @@ #include "Searchable.h" #include "Hits.h" #include "_FieldDocSortedHitQueue.h" +#include <assert.h> CL_NS_USE(index) CL_NS_DEF(search) @@ -24,35 +25,28 @@ } //static -Query* Query::mergeBooleanQueries(Query** queries) { - CL_NS(util)::CLVector<BooleanClause*> allClauses; - int32_t i = 0; - int32_t queriesLength = 0; - - while ( queries[i] != NULL ){ - BooleanQuery* bq = (BooleanQuery*)queries[i]; - - int32_t size = bq->getClauseCount(); - BooleanClause** clauses = _CL_NEWARRAY(BooleanClause*, size); - bq->getClauses(clauses); - - for (int32_t j = 0;j<size;++j ){ - allClauses.push_back(clauses[j]); - j++; - } - _CLDELETE_LARRAY(clauses); - i++; - queriesLength++; +Query* Query::mergeBooleanQueries(CL_NS(util)::ArrayBase<Query*>* queries) { + std::vector<BooleanClause*> allClauses; + + CL_NS(util)::ValueArray<BooleanClause*> clauses; + for (size_t i = 0; i < queries->length; i++) { + assert(BooleanQuery::getClassName() == queries->values[i]->getObjectName()); + BooleanQuery* booleanQuery = (BooleanQuery*)queries->values[i]; + clauses.resize((booleanQuery->getClauseCount())); + booleanQuery->getClauses(clauses.values); + for (size_t j = 0; j < clauses.length; j++) { + allClauses.push_back(clauses.values[j]->clone()); } + } - bool coordDisabled = ( queriesLength == 0 ) ? false : ((BooleanQuery*)queries[0])->isCoordDisabled(); - BooleanQuery* result = _CLNEW BooleanQuery(coordDisabled); - - CL_NS(util)::CLVector<BooleanClause*>::iterator itr = allClauses.begin(); - while (itr != allClauses.end() ) { - result->add(*itr); - } - return result; + bool coordDisabled = ( queries->length == 0 ) ? false : ((BooleanQuery*)queries->values[0])->isCoordDisabled(); + BooleanQuery* result = _CLNEW BooleanQuery(coordDisabled); + std::vector<BooleanClause*>::iterator i = allClauses.begin(); + while ( i != allClauses.end() ){ + result->add(*i); + i++; + } + return result; } Query::Query(const Query& clone):boost(clone.boost){ @@ -73,8 +67,42 @@ return this; } -Query* Query::combine(Query** queries){ - _CLTHROWA(CL_ERR_UnsupportedOperation,"UnsupportedOperationException: Query::combine"); +Query* Query::combine(CL_NS(util)::ArrayBase<Query*>* queries){ + std::vector<Query*> uniques; + for (int i = 0; i < queries->length; i++) { + Query* query = queries->values[i]; + CL_NS(util)::ValueArray<BooleanClause*> clauses; + // check if we can split the query into clauses + bool splittable = query->instanceOf(BooleanQuery::getClassName()); + if(splittable){ + BooleanQuery* bq = (BooleanQuery*) query; + splittable = bq->isCoordDisabled(); + clauses.resize(bq->getClauseCount()); + bq->getClauses(clauses.values); + for (int32_t j = 0; splittable && j < clauses.length; j++) { + splittable = (clauses[j]->getOccur() == BooleanClause::SHOULD); + } + } + if(splittable){ + for (int j = 0; j < clauses.length; j++) { + uniques.push_back(clauses[j]->getQuery()); + } + } else { + uniques.push_back(query); + } + } + // optimization: if we have just one query, just return it + if(uniques.size() == 1){ + return *uniques.begin(); + } + std::vector<Query*>::iterator it = uniques.begin(); + BooleanQuery* result = _CLNEW BooleanQuery(true); + while (it != uniques.end() ){ + result->add(*it, BooleanClause::SHOULD); + + it++; + } + return result; } Similarity* Query::getSimilarity(Searcher* searcher) { return searcher->getSimilarity(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-09-27 08:58:46
|
Revision: 3022 http://clucene.svn.sourceforge.net/clucene/?rev=3022&view=rev Author: ustramooner Date: 2009-09-27 08:58:40 +0000 (Sun, 27 Sep 2009) Log Message: ----------- cleanup and fixed a bug in FuzzyTermEnum Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-09-27 08:58:03 UTC (rev 3021) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-09-27 08:58:40 UTC (rev 3022) @@ -28,7 +28,7 @@ FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength): - FilteredTermEnum(),d(NULL),dWidth(0),dHeight(0),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)), + FilteredTermEnum(),d(NULL),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)), text(NULL),textLen(0),prefix(NULL)/* ISH: was STRDUP_TtoT(LUCENE_BLANK_STRING)*/,prefixLength(_prefixLength), minimumSimilarity(minSimilarity) { @@ -58,8 +58,6 @@ prefix[realPrefixLength]='\0'; initializeMaxDistances(); - dWidth = LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX; // default length of the d array - dHeight = textLen + 1; Term* trm = _CLNEW Term(searchTerm->field(), prefix); // _CLNEW Term(term, prefix); -- not intern'd? setEnum(reader->terms(trm)); @@ -134,13 +132,6 @@ const size_t targetLen = termTextLen-prefixLength; _similarity = similarity(target, targetLen); return (_similarity > minimumSimilarity); - - /* LEGACY: - //Calculate the Levenshtein distance - int32_t dist = editDistance(text, target, textLen, targetLen); - distance = 1 - ((float_t)dist / (float_t)cl_min(textLen, targetLen)); - return (distance > minimumSimilarity); - */ } _endEnum = true; return false; @@ -177,23 +168,20 @@ //let's make sure we have enough room in our array to do the distance calculations. //Check if the array must be reallocated because it is too small or does not exist - - // TODO: realloc should be able to allocate memory for NULL pointers; if thats the case the NULL - // check here is redundant - if (d == NULL){ - dWidth = cl_max(dWidth, n+1); - dHeight = cl_max(dHeight, m+1); - d = reinterpret_cast<int32_t*>(malloc(sizeof(int32_t)*dWidth*dHeight)); - } else if (dWidth <= n || dHeight <= m) { - //growDistanceArray - dWidth = cl_max(dWidth, n+1); - dHeight = cl_max(dHeight, m+1); - d = reinterpret_cast<int32_t*>(realloc(d, sizeof(int32_t)*dWidth*dHeight)); + size_t dWidth = n+1; + size_t dHeight = m+1; + if (d == NULL){ + dLen = dWidth*dHeight; + d = (int32_t*)(malloc(sizeof(int32_t)*dLen)); + } else if (dLen < dWidth*dHeight) { + dLen = dWidth*dHeight; + d = (int32_t*)(realloc(d, sizeof(int32_t)*dLen)); } + memset(d,0,dLen); + + size_t i; // iterates through the source string + size_t j; // iterates through the target string - size_t i; // iterates through the source string - size_t j; // iterates through the target string - // init matrix d for (i = 0; i <= n; i++){ d[i + (0*dWidth)] = i; @@ -254,72 +242,6 @@ return (int32_t) ((1-minimumSimilarity) * (cl_min(textLen, m) + prefixLength)); } - /* LEGACY: - int32_t FuzzyTermEnum::editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) { - //Func - Calculates the Levenshtein distance also known as edit distance is a measure of similiarity - // between two strings where the distance is measured as the number of character - // deletions, insertions or substitutions required to transform one string to - // the other string. - //Pre - s != NULL and contains the source string - // t != NULL and contains the target string - // n >= 0 and contains the length of the source string - // m >= 0 and containts the length of the target string - //Post - The distance has been returned - - CND_PRECONDITION(s != NULL, "s is NULL"); - CND_PRECONDITION(t != NULL, "t is NULL"); - CND_PRECONDITION(n >= 0," n is a negative number"); - CND_PRECONDITION(n >= 0," n is a negative number"); - - int32_t i; // iterates through s - int32_t j; // iterates through t - TCHAR s_i; // ith character of s - - if (n == 0) - return m; - if (m == 0) - return n; - - //Check if the array must be reallocated because it is too small or does not exist - if (e == NULL || eWidth <= n || eHeight <= m) { - //Delete e if possible - _CLDELETE_ARRAY(e); - //resize e - eWidth = cl_max(eWidth, n+1); - eHeight = cl_max(eHeight, m+1); - e = _CL_NEWARRAY(int32_t,eWidth*eHeight); - } - - CND_CONDITION(e != NULL,"e is NULL"); - - // init matrix e - for (i = 0; i <= n; i++){ - e[i + (0*eWidth)] = i; - } - for (j = 0; j <= m; j++){ - e[0 + (j*eWidth)] = j; - } - - int32_t __t; //temporary variable for min3 - - // start computing edit distance - for (i = 1; i <= n; i++) { - s_i = s[i - 1]; - for (j = 1; j <= m; j++) { - if (s_i != t[j-1]){ - min3(e[i + (j*eWidth) - 1], e[i + ((j-1)*eWidth)], e[i + ((j-1)*eWidth)-1]); - e[i + (j*eWidth)] = __t+1; - }else{ - min3(e[i + (j*eWidth) -1]+1, e[i + ((j-1)*eWidth)]+1, e[i + ((j-1)*eWidth)-1]); - e[i + (j*eWidth)] = __t; - } - } - } - - // we got the result! - return e[n + ((m)*eWidth)]; - }*/ - class FuzzyQuery::ScoreTerm { public: Term* term; Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-09-27 08:58:03 UTC (rev 3021) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-09-27 08:58:40 UTC (rev 3022) @@ -90,8 +90,7 @@ * everytime similarity is called. */ int32_t* d; - size_t dWidth; - size_t dHeight; + size_t dLen; //float_t distance; float_t _similarity; @@ -108,24 +107,6 @@ double scale_factor; int32_t maxDistances[LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX]; - - - /* LEGACY: - int32_t* e; - int32_t eWidth; - int32_t eHeight; - ** - Levenshtein distance also known as edit distance is a measure of similiarity - between two strings where the distance is measured as the number of character - deletions, insertions or substitutions required to transform one string to - the other string. - <p>This method takes in four parameters; two strings and their respective - lengths to compute the Levenshtein distance between the two strings. - The result is returned as an integer. - * - int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m); - */ - /****************************** * Compute Levenshtein distance ******************************/ @@ -170,17 +151,6 @@ float_t similarity(const TCHAR* target, const size_t targetLen); /** - * Grow the second dimension of the array, so that we can calculate the - * Levenshtein difference. - */ - /* - void growDistanceArray(int32_t m) { - for (int i = 0; i < d.length; i++) { - d[i] = new int[m+1]; - } - }*/ - - /** * The max Distance is the maximum Levenshtein distance for the text * compared to some other value that results in score that is * better than the minimum similarity. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-09-27 08:58:11
|
Revision: 3021 http://clucene.svn.sourceforge.net/clucene/?rev=3021&view=rev Author: ustramooner Date: 2009-09-27 08:58:03 +0000 (Sun, 27 Sep 2009) Log Message: ----------- Isidor's merge norms bug fix +a -b bug in BooleanScorer2 test fix for BooleanScorer constuctor fix for bad StringBuffer memory handling (new code for releasing memory early caused asserts and generally incompatible functionality) BooleanScorer had faulty Token code Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer.h branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp branches/lucene2_3_2/src/test/search/TestSearch.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -633,7 +633,7 @@ // and declare we are the owners of the buffer (to save on a copy) // TODO: 1. Test to see what is the optimal initial length // 2. Allow re-using the provided string buffer (argument s) instead of creating another one? - StringBuffer sb(len+5,false); + StringBuffer sb(len+5); for (size_t i = 0; i < len; i++) { const TCHAR c = s[i]; // These characters are part of the query syntax and must be escaped @@ -644,7 +644,7 @@ } sb.appendChar(c); } - return sb.getBuffer(); + return sb.giveBuffer(); } int32_t QueryParser::Conjunction() { @@ -1072,7 +1072,7 @@ s = _ttoi(fuzzySlop->image + 1); } catch (...) { /* ignore exceptions */ } - } + } // TODO: Make sure this hack, save an extra dup, is legal and not harmful const size_t st = _tcslen(term->image); term->image[st-1]=NULL; @@ -1378,7 +1378,7 @@ TCHAR* QueryParserConstants::addEscapes(TCHAR* str) { const size_t len = _tcslen(str); - StringBuffer retval(len * 2, false); + StringBuffer retval(len * 2); TCHAR ch; for (size_t i = 0; i < len; i++) { switch (str[i]) @@ -1421,7 +1421,7 @@ continue; } } - return retval.getBuffer(); + return retval.giveBuffer(); } TCHAR* QueryParser::getParseExceptionMessage(QueryToken* currentToken, @@ -1449,7 +1449,7 @@ expected.append(_T(" ")); } - StringBuffer retval(CL_MAX_PATH, false); + StringBuffer retval(CL_MAX_PATH); retval.append(_T("Encountered \"")); QueryToken* tok = currentToken->next; for (size_t i = 0; i < maxSize; i++) { @@ -1482,7 +1482,7 @@ } retval.append(expected.getBuffer()); - return retval.getBuffer(); + return retval.giveBuffer(); } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1237,7 +1237,7 @@ int32_t errorColumn, TCHAR* errorAfter, TCHAR curChar) { TCHAR* tmp = NULL; - CL_NS(util)::StringBuffer sb(100, false); + CL_NS(util)::StringBuffer sb(100); sb.append(_T("Lexical error at line ")); sb.appendInt(errorLine); sb.append(_T(", column ")); @@ -1260,7 +1260,7 @@ _CLDELETE_LCARRAY(tmp); sb.appendChar(_T('"')); - return sb.getBuffer(); + return sb.giveBuffer(); } CL_NS_END // QueryParserTokenManager Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -244,7 +244,7 @@ Query* q = NULL; const TCHAR* sfield = field; - TCHAR* tmp; + TCHAR* tmp = NULL; QueryToken *DelToken = NULL; Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -148,29 +148,27 @@ StringArrayWithDeletor v; - Token* t = NULL; + Token t; int positionCount = 0; bool severalTokensAtSamePosition = false; //Get the tokens from the source try{ - while (source->next(t)){ - v.push_back(STRDUP_TtoT(t->termBuffer())); + while (source->next(&t)){ + v.push_back(STRDUP_TtoT(t.termBuffer())); - if (t->getPositionIncrement() != 0) - positionCount += t->getPositionIncrement(); + if (t.getPositionIncrement() != 0) + positionCount += t.getPositionIncrement(); else severalTokensAtSamePosition = true; } }catch(CLuceneError& err){ if ( err.number() != CL_ERR_IO ) { - _CLDELETE(t); _CLLDELETE(source); throw err; } } _CLDELETE(source); - _CLDELETE(t); //Check if there are any tokens retrieved if (v.size() == 0){ @@ -240,13 +238,13 @@ TCHAR* terms[2]; terms[0]=NULL;terms[1]=NULL; - Token* t = NULL; + Token t; bool tret=false; bool from=true; while(tret) { try{ - tret = (source->next(t) != NULL); + tret = (source->next(&t) != NULL); }catch (CLuceneError& err){ if ( err.number() == CL_ERR_IO ) tret=false; @@ -255,11 +253,11 @@ } if (tret) { - if ( !from && _tcscmp(t->termBuffer(),_T("TO"))==0 ) + if ( !from && _tcscmp(t.termBuffer(),_T("TO"))==0 ) continue; - TCHAR* tmp = STRDUP_TtoT(t->termBuffer()); + TCHAR* tmp = STRDUP_TtoT(t.termBuffer()); discardEscapeChar(tmp); terms[from? 0 : 1] = tmp; @@ -276,7 +274,6 @@ _CLDELETE_CARRAY(terms[0]); _CLDELETE_CARRAY(terms[1]); _CLDELETE(source); - _CLDELETE(t); return ret; } Modified: branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -15,12 +15,6 @@ CL_NS_USE(util) CL_NS_DEF(search) - BooleanScorer::BooleanScorer(Similarity* similarity): - Scorer(similarity) - { - BooleanScorer( similarity, 1 ); - } - BooleanScorer::BooleanScorer(Similarity* similarity, int32_t minNrShouldMatch ): Scorer(similarity), scorers(NULL), Modified: branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -42,8 +42,8 @@ } } } -Explanation* Explanation::clone() const{ - return _CLNEW Explanation(*this); +Explanation* Explanation::clone() const{ + return _CLNEW Explanation(*this); } Explanation::~Explanation(){ @@ -54,26 +54,26 @@ return (0.0f < getValue()); } -float_t Explanation::getValue() const{ - return value; +float_t Explanation::getValue() const{ + return value; } -void Explanation::setValue(const float_t value) { - this->value = value; +void Explanation::setValue(const float_t value) { + this->value = value; } -const TCHAR* Explanation::getDescription() const { - return description; +const TCHAR* Explanation::getDescription() const { + return description; } void Explanation::setDescription(const TCHAR* description) { _tcsncpy(this->description,description,LUCENE_SEARCH_EXPLANATION_DESC_LEN); } TCHAR* Explanation::getSummary() { - StringBuffer buf(210, false); + StringBuffer buf(210); buf.appendFloat(getValue(), 2); buf.append(_T(" = ")); buf.append(getDescription()); - return buf.getBuffer(); + return buf.giveBuffer(); } size_t Explanation::getDetailsLength() const {return (details==NULL)?0:details->size();} @@ -156,15 +156,15 @@ bool ComplexExplanation::isMatch() const {return getMatch();} TCHAR* ComplexExplanation::getSummary() { - StringBuffer buf(220, false); + StringBuffer buf(220); buf.appendFloat(getValue(),2); buf.append(_T(" = ")); buf.append(isMatch() ? _T("(MATCH) ") : _T("(NON-MATCH) ")); buf.append(getDescription()); - return buf.getBuffer(); + return buf.giveBuffer(); } -Explanation* ComplexExplanation::clone() const{ +Explanation* ComplexExplanation::clone() const{ ComplexExplanation* ret = static_cast<ComplexExplanation*>(_CLNEW Explanation(*this)); ret->match = this->match; return ret; Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -20,14 +20,14 @@ CL_NS_USE(util) CL_NS_DEF(search) - -/** Finds and returns the smallest of three integers + +/** Finds and returns the smallest of three integers * precondition: Must define int32_t __t for temporary storage and result */ #define min3(a, b, c) __t = (a < b) ? a : b; __t = (__t < c) ? __t : c; - FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength): + FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength): FilteredTermEnum(),d(NULL),dWidth(0),dHeight(0),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)), text(NULL),textLen(0),prefix(NULL)/* ISH: was STRDUP_TtoT(LUCENE_BLANK_STRING)*/,prefixLength(_prefixLength), minimumSimilarity(minSimilarity) @@ -84,7 +84,7 @@ } */ } - + FuzzyTermEnum::~FuzzyTermEnum(){ close(); } @@ -116,7 +116,7 @@ //Pre - term is NULL or term points to a Term //Post - if pre(term) is NULL then false is returned otherwise // if the distance of the current term in the enumeration is bigger than the FUZZY_THRESHOLD - // then true is returned + // then true is returned if (term == NULL){ return false; //Note that endEnum is not set to true! @@ -127,7 +127,7 @@ //Check if the field name of searchTerm of term match //(we can use == because fields are interned) - if ( searchTerm->field() == term->field() && + if ( searchTerm->field() == term->field() && (prefixLength==0 || _tcsncmp(termText,prefix,prefixLength)==0 )) { const TCHAR* target = termText+prefixLength; @@ -253,7 +253,7 @@ int32_t FuzzyTermEnum::calculateMaxDistance(const size_t m) const { return (int32_t) ((1-minimumSimilarity) * (cl_min(textLen, m) + prefixLength)); } - + /* LEGACY: int32_t FuzzyTermEnum::editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) { //Func - Calculates the Levenshtein distance also known as edit distance is a measure of similiarity @@ -275,9 +275,9 @@ int32_t j; // iterates through t TCHAR s_i; // ith character of s - if (n == 0) + if (n == 0) return m; - if (m == 0) + if (m == 0) return n; //Check if the array must be reallocated because it is too small or does not exist @@ -370,7 +370,7 @@ _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); */ } - + float_t FuzzyQuery::defaultMinSimilarity = 0.5f; int32_t FuzzyQuery::defaultPrefixLength = 0; @@ -386,7 +386,7 @@ } TCHAR* FuzzyQuery::toString(const TCHAR* field) const{ - StringBuffer buffer(100, false); // TODO: Have a better estimation for the initial buffer length + StringBuffer buffer(100); // TODO: Have a better estimation for the initial buffer length Term* term = getTerm(false); // no need to increase ref count if ( field==NULL || _tcscmp(term->field(),field)!=0 ) { buffer.append(term->field()); @@ -396,7 +396,7 @@ buffer.appendChar( _T('~') ); buffer.appendFloat(minimumSimilarity,1); buffer.appendBoost(getBoost()); - return buffer.getBuffer(); + return buffer.giveBuffer(); } const char* FuzzyQuery::getObjectName() const{ @@ -422,7 +422,7 @@ //if(prefixLength < 0) // _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength < 0"); - //else + //else if(prefixLength >= clone.getTerm()->textLength()) _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); @@ -451,7 +451,7 @@ && this->prefixLength == fq->getPrefixLength() && getTerm()->equals(fq->getTerm()); } - + FilteredTermEnum* FuzzyQuery::getEnum(IndexReader* reader){ Term* term = getTerm(false); FuzzyTermEnum* ret = _CLNEW FuzzyTermEnum(reader, term, minimumSimilarity, prefixLength); Modified: branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -58,15 +58,15 @@ } TCHAR* MatchAllDocsQuery::MatchAllDocsWeight::toString() { - CL_NS(util)::StringBuffer buf(50, false); + CL_NS(util)::StringBuffer buf(50); buf.append(_T("weight(")); - + TCHAR* t = parentQuery->toString(); buf.append(t); _CLDELETE_LCARRAY(t); buf.appendChar(_T(')')); - return buf.getBuffer(); + return buf.giveBuffer(); } Query* MatchAllDocsQuery::MatchAllDocsWeight::getQuery() { @@ -116,10 +116,10 @@ } TCHAR* MatchAllDocsQuery::toString(const TCHAR* field) const{ - CL_NS(util)::StringBuffer buffer(25, false); + CL_NS(util)::StringBuffer buffer(25); buffer.append(_T("MatchAllDocsQuery")); buffer.appendBoost(getBoost()); - return buffer.getBuffer(); + return buffer.giveBuffer(); } MatchAllDocsQuery::MatchAllDocsQuery(const MatchAllDocsQuery& clone){ Modified: branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -69,7 +69,7 @@ void normalize(float_t _queryNorm) { this->queryNorm = _queryNorm; queryWeight *= _queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } Scorer* scorer(IndexReader* reader) { @@ -289,7 +289,7 @@ } TCHAR* MultiPhraseQuery::toString(const TCHAR* f) const { - StringBuffer buffer(100, false); + StringBuffer buffer(100); if (_tcscmp(f, field)!=0) { buffer.append(field); buffer.appendChar(_T(':')); @@ -325,7 +325,7 @@ buffer.appendBoost(getBoost()); - return buffer.getBuffer(); + return buffer.giveBuffer(); } class TermArray_Equals:public CL_NS_STD(binary_function)<const Term**,const Term**,bool> Modified: branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -120,7 +120,7 @@ //Func - Destructor //Pre - true //Post 0 The instance has been destroyed - + //Iterate through all the terms for (size_t i = 0; i < terms->size(); i++){ _CLLDECDELETE((*terms)[i]); @@ -132,7 +132,7 @@ size_t PhraseQuery::hashCode() const { //todo: do cachedHashCode, and invalidate on add/remove clause size_t ret = Similarity::floatToByte(getBoost()) ^ Similarity::floatToByte(slop); - + { //msvc6 scope fix for ( size_t i=0;terms->size();i++ ) ret = 31 * ret + (*terms)[i]->hashCode(); @@ -193,7 +193,7 @@ result.values[i] = (*positions)[i]; } } - + Weight* PhraseQuery::_createWeight(Searcher* searcher) { if (terms->size() == 1) { // optimize one-term case Term* term = (*terms)[0]; @@ -213,7 +213,7 @@ //Let size contain the number of terms int32_t size = terms->size(); Term** ret = _CL_NEWARRAY(Term*,size+1); - + CND_CONDITION(ret != NULL,"Could not allocated memory for ret"); //Iterate through terms and copy each pointer to ret @@ -225,14 +225,14 @@ } TCHAR* PhraseQuery::toString(const TCHAR* f) const{ - //Func - Prints a user-readable version of this query. + //Func - Prints a user-readable version of this query. //Pre - f != NULL //Post - The query string has been returned if ( terms->size()== 0 ) return NULL; - StringBuffer buffer(30,false); + StringBuffer buffer(32); if ( f==NULL || _tcscmp(field,f)!=0) { buffer.append(field); buffer.appendChar(_T(':')); @@ -262,10 +262,10 @@ buffer.appendBoost(getBoost()); - return buffer.getBuffer(); + return buffer.giveBuffer(); } - + PhraseWeight::PhraseWeight(Searcher* searcher, PhraseQuery* _parentQuery) { this->parentQuery=_parentQuery; this->value = 0; @@ -275,13 +275,13 @@ this->searcher = searcher; } - TCHAR* PhraseWeight::toString() { + TCHAR* PhraseWeight::toString() { return STRDUP_TtoT(_T("weight(PhraseQuery)")); } PhraseWeight::~PhraseWeight(){ } - + Query* PhraseWeight::getQuery() { return parentQuery; } float_t PhraseWeight::getValue() { return value; } @@ -294,7 +294,7 @@ void PhraseWeight::normalize(float_t queryNorm) { this->queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + value = queryWeight * idf; // idf for document } Scorer* PhraseWeight::scorer(IndexReader* reader) { @@ -306,9 +306,9 @@ const size_t tpsLength = parentQuery->terms->size(); //optimize zero-term case - if (tpsLength == 0) + if (tpsLength == 0) return NULL; - + TermPositions** tps = _CL_NEWARRAY(TermPositions*,tpsLength+1); //Check if tps has been allocated properly @@ -320,14 +320,14 @@ for (size_t i = 0; i < tpsLength; i++) { //Get the termPostitions for the i-th term p = reader->termPositions((*parentQuery->terms)[i]); - + //Check if p is valid if (p == NULL) { //Delete previous retrieved termPositions while (--i >= 0){ _CLVDELETE(tps[i]); //todo: not a clucene object... should be } - _CLDELETE_ARRAY(tps); + _CLDELETE_ARRAY(tps); return NULL; } @@ -343,12 +343,12 @@ int32_t slop = parentQuery->getSlop(); if ( slop != 0) // optimize exact case - //todo: need to pass these: this, tps, + //todo: need to pass these: this, tps, ret = _CLNEW SloppyPhraseScorer(this,tps,positions.values, - parentQuery->getSimilarity(searcher), + parentQuery->getSimilarity(searcher), slop, reader->norms(parentQuery->field)); else - ret = _CLNEW ExactPhraseScorer(this, tps, positions.values, + ret = _CLNEW ExactPhraseScorer(this, tps, positions.values, parentQuery->getSimilarity(searcher), reader->norms(parentQuery->field)); positions.deleteArray(); Modified: branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer.h 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer.h 2009-09-27 08:58:03 UTC (rev 3021) @@ -80,8 +80,7 @@ int32_t prohibitedMask; float_t* coordFactors; - BooleanScorer(Similarity* similarity); - BooleanScorer( Similarity* similarity, int32_t minNrShouldMatch ); + BooleanScorer( Similarity* similarity, int32_t minNrShouldMatch = 1 ); virtual ~BooleanScorer(); void add(Scorer* scorer, const bool required, const bool prohibited); int32_t doc() const { return current->doc; } Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" @@ -15,48 +15,48 @@ buffer = buf; bufferLength = maxlen; bufferOwner = !consumeBuffer; - len = 0; + len = 0; } StringBuffer::StringBuffer(){ //Func - Constructor. Allocates a buffer with the default length. //Pre - true //Post - buffer of length bufferLength has been allocated - - //Initialize - bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE; - len = 0; - //Allocate a buffer of length bufferLength - buffer = _CL_NEWARRAY(TCHAR,bufferLength); - bufferOwner = true; + + //Initialize + bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE; + len = 0; + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + bufferOwner = true; } - StringBuffer::StringBuffer(const size_t initSize, const bool consumeBuffer){ + StringBuffer::StringBuffer(const size_t initSize){ //Func - Constructor. Allocates a buffer of length initSize + 1 //Pre - initSize > 0 //Post - A buffer has been allocated of length initSize + 1 //Initialize the bufferLength to initSize + 1 The +1 is for the terminator '\0' - bufferLength = initSize + 1; - len = 0; - //Allocate a buffer of length bufferLength - buffer = _CL_NEWARRAY(TCHAR,bufferLength); - bufferOwner = consumeBuffer; + bufferLength = initSize + 1; + len = 0; + //Allocate a buffer of length bufferLength + buffer = _CL_NEWARRAY(TCHAR,bufferLength); + bufferOwner = true; } StringBuffer::StringBuffer(const TCHAR* value){ - //Func - Constructor. + //Func - Constructor. // Creates an instance of Stringbuffer containing a copy of the string value //Pre - value != NULL //Post - An instance of StringBuffer has been created containing the copy of the string value - + //Initialize the length of the string to be stored in buffer len = (size_t) _tcslen(value); //Calculate the space occupied in buffer by a copy of value const size_t occupiedLength = len + 1; - + // Minimum allocated buffer length is LUCENE_DEFAULT_TOKEN_BUFFER_SIZE. - bufferLength = (occupiedLength >= LUCENE_DEFAULT_TOKEN_BUFFER_SIZE + bufferLength = (occupiedLength >= LUCENE_DEFAULT_TOKEN_BUFFER_SIZE ? occupiedLength : LUCENE_DEFAULT_TOKEN_BUFFER_SIZE); //Allocate a buffer of length bufferLength @@ -73,11 +73,12 @@ // Pre - true // Post - Instanc has been destroyed - if( bufferOwner ){ - _CLDELETE_CARRAY(buffer); - }else - buffer = NULL; + if( bufferOwner ){ + _CLDELETE_CARRAY(buffer); + }else + buffer = NULL; } + void StringBuffer::clear(){ //Func - Clears the Stringbuffer and resets it to it default empty state //Pre - true @@ -87,9 +88,9 @@ // We should really look into at least providing both options //Destroy the current buffer if present - _CLDELETE_LCARRAY(buffer); + _CLDELETE_LCARRAY(buffer); - //Initialize + //Initialize len = 0; bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE; //Allocate a buffer of length bufferLength @@ -97,13 +98,13 @@ } void StringBuffer::appendChar(const TCHAR character) { - //Func - Appends a single character + //Func - Appends a single character //Pre - true //Post - The character has been appended to the string in the buffer //Check if the current buffer length is sufficient to have the string value appended if (len + 1 > bufferLength){ - //Have the size of the current string buffer increased because it is too small + //Have the size of the current string buffer increased because it is too small growBuffer(len + 1); } //Put character at position len which is the end of the string in the buffer @@ -118,24 +119,24 @@ //Func - Appends a copy of the string value //Pre - value != NULL //Post - value has been copied and appended to the string in buffer - + append(value, _tcslen(value)); } void StringBuffer::append(const TCHAR* value, size_t appendedLength) { //Func - Appends a copy of the string value //Pre - value != NULL // appendedLength contains the length of the string value which is to be appended - //Post - value has been copied and appended to the string in buffer - + //Post - value has been copied and appended to the string in buffer + //Check if the current buffer length is sufficient to have the string value appended if (len + appendedLength + 1 > bufferLength){ - //Have the size of the current string buffer increased because it is too small + //Have the size of the current string buffer increased because it is too small growBuffer(len + appendedLength + 1); } //Copy the string value into the buffer at postion len _tcsncpy(buffer + len, value, appendedLength); - + //Add the length of the copied string to len to reflect the new length of the string in //the buffer (Note: len is not the bufferlength!) len += appendedLength; @@ -143,9 +144,9 @@ void StringBuffer::appendInt(const int64_t value, const int32_t _Radix) { //Func - Appends an integer (after conversion to a character string) - //Pre - true + //Pre - true //Post - The converted integer value has been appended to the string in buffer - + //instantiate a buffer of 30 charactes for the conversion of the integer TCHAR buf[30]; //Convert the integer value to a string buf using _Radix @@ -217,10 +218,10 @@ //todo: something is wrong with this code, i'm sure... it only grows (and therefore moves if the buffer is to small) //Check if the current buffer length is sufficient to have the string value prepended if (prependedLength + len + 1 > bufferLength){ - //Have the size of the current string buffer increased because it is too small + //Have the size of the current string buffer increased because it is too small //Because prependedLength is passed as the second argument to growBuffer, //growBuffer will have left the first prependedLength characters empty - //when it recopied buffer during reallocation. + //when it recopied buffer during reallocation. growBuffer(prependedLength + len + 1, prependedLength); } @@ -239,7 +240,7 @@ return len; } TCHAR* StringBuffer::toString(){ - //Func - Returns a copy of the current string in the StringBuffer sized equal to the length of the string + //Func - Returns a copy of the current string in the StringBuffer sized equal to the length of the string // in the StringBuffer. //Pre - true //Post - The copied string has been returned @@ -252,14 +253,14 @@ //terminate the string ret[len] = '\0'; } - //return the the copy + //return the the copy return ret; } TCHAR* StringBuffer::getBuffer() { //Func - '\0' terminates the buffer and returns its pointer //Pre - true //Post - buffer has been '\0' terminated and returned - + // Check if the current buffer is '\0' terminated if (len == bufferLength){ //Make space for terminator, if necessary. @@ -271,6 +272,15 @@ return buffer; } + TCHAR* StringBuffer::giveBuffer() { + TCHAR* ret = getBuffer(); + buffer = NULL; + len = 0; + bufferLength = 0; + bufferOwner = false; + return ret; + } + void StringBuffer::reserve(const size_t size){ if ( bufferLength >= size ) return; @@ -280,7 +290,7 @@ TCHAR* tmp = _CL_NEWARRAY(TCHAR,bufferLength); _tcsncpy(tmp, buffer, len); tmp[len] = '\0'; - + //destroy the old buffer if (buffer){ _CLDELETE_CARRAY(buffer); @@ -316,7 +326,7 @@ assert(bufferLength>=minLength); return; } - + bufferLength *= 2; //Check that bufferLength is bigger than minLength if (bufferLength < minLength){ @@ -331,7 +341,7 @@ //end of the old buffer), then apply the terminator to the new buffer. _tcsncpy(tmp + skippingNInitialChars, buffer, len); tmp[skippingNInitialChars + len] = '\0'; - + //destroy the old buffer if (buffer){ _CLDELETE_CARRAY(buffer); Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h =================================================================== --- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h 2009-09-27 08:58:03 UTC (rev 3021) @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #ifndef _lucene_util_StringBuffer_ @@ -13,8 +13,8 @@ ///Constructor. Allocates a buffer with the default length. StringBuffer(); ///Constructor. Allocates a buffer of length initSize + 1 - StringBuffer(const size_t initSize, const bool consumeBuffer = true); - ///Constructor. Creates an instance of Stringbuffer containing a copy of + StringBuffer(const size_t initSize); + ///Constructor. Creates an instance of Stringbuffer containing a copy of ///the string value StringBuffer(const TCHAR* value); ///Constructs a StringBuffer using another buffer. The StringBuffer can @@ -24,10 +24,10 @@ virtual ~StringBuffer(); ///Clears the Stringbuffer and resets it to it default empty state void clear(); - - ///Appends a single character + + ///Appends a single character void appendChar(const TCHAR chr); - ///Appends a copy of the string value + ///Appends a copy of the string value void append(const TCHAR* value); ///Appends a copy of the string value void append(const TCHAR* value, size_t appendedLength); @@ -41,19 +41,23 @@ void prepend(const TCHAR* value); ///Puts a copy of the string value in front of the current string in the StringBuffer void prepend(const TCHAR* value, size_t prependedLength); - + ///Contains the length of string in the StringBuffer ///Public so that analyzers can edit the length directly size_t len; ///Returns the length of the string in the StringBuffer size_t length() const; ///Returns a copy of the current string in the StringBuffer - TCHAR* toString(); + TCHAR* toString(); ///Returns a null terminated reference to the StringBuffer's text - TCHAR* getBuffer(); + TCHAR* getBuffer(); + /** Returns a null terminated reference to the StringBuffer's text + * the StringBuffer's buffer is released so that the text doesn't need to be copied + */ + TCHAR* giveBuffer(); - ///reserve a minimum amount of data for the buffer. + ///reserve a minimum amount of data for the buffer. ///no change made if the buffer is already longer than length void reserve(const size_t length); private: @@ -62,7 +66,7 @@ ///The length of the buffer size_t bufferLength; bool bufferOwner; - + ///Has the buffer grown to a minimum length of minLength or bigger void growBuffer(const size_t minLength); ///Has the buffer grown to a minimum length of minLength or bigger and shifts the Modified: branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp =================================================================== --- branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -6,17 +6,15 @@ ------------------------------------------------------------------------------*/ #include "test.h" #include <iostream> -#include <sys/stat.h> -#include <sys/types.h> //checks if a merged index finds phrases correctly void testIWmergePhraseSegments(CuTest *tc){ char fsdir[CL_MAX_PATH]; sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter"); - mkdir(fsdir, 0777); SimpleAnalyzer a; + Directory* dir = FSDirectory::getDirectory(fsdir, true); - IndexWriter ndx2(fsdir,&a,true); + IndexWriter ndx2(dir,&a,true); ndx2.setUseCompoundFile(false); Document doc0; doc0.add( @@ -64,6 +62,7 @@ _CLDELETE(query1); _CLDELETE(hits0); _CLDELETE(hits1); + _CLDECDELETE(dir); } //checks that adding more than the min_merge value goes ok... @@ -109,10 +108,10 @@ void testIWmergeSegments2(CuTest *tc){ char fsdir[CL_MAX_PATH]; sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter"); - mkdir(fsdir, 0777); SimpleAnalyzer a; + Directory* dir = FSDirectory::getDirectory(fsdir, true); - IndexWriter ndx2(fsdir,&a,true); + IndexWriter ndx2(dir,&a,true); ndx2.setUseCompoundFile(false); Document doc0; doc0.add( @@ -156,6 +155,7 @@ _CLDELETE(hits1); _CLDECDELETE(term0); _CLDECDELETE(term1); + _CLDECDELETE(dir); } void testAddIndexes(CuTest *tc){ Modified: branches/lucene2_3_2/src/test/search/TestSearch.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestSearch.cpp 2009-07-08 10:10:52 UTC (rev 3020) +++ branches/lucene2_3_2/src/test/search/TestSearch.cpp 2009-09-27 08:58:03 UTC (rev 3021) @@ -191,6 +191,7 @@ _TestSearchesRun(&a,s, _T("+term -term term") ); _TestSearchesRun(&a,s, _T("foo:term AND field:anotherTerm") ); _TestSearchesRun(&a,s, _T("term AND \"phrase phrase\"") ); + _TestSearchesRun(&a,s, _T("search AND \"meaningful direction\"") ); _TestSearchesRun(&a,s, _T("\"hello there\"") ); _TestSearchesRun(&a,s, _T("a AND b") ); @@ -256,7 +257,10 @@ IndexReader* reader = IndexReader::open(ram); IndexSearcher searcher(reader); + const TCHAR* queries[] = { + _T("a AND NOT b"), + _T("+a -b"), _T("\"a b\""), _T("\"a b c\""), _T("a AND b"), @@ -264,17 +268,20 @@ _T("\"a c\""), _T("\"a c e\""), }; - int shouldbe[] = {4,4,4,7,3,3}; + int shouldbe[] = {3,3,4,4,4,7,3,3}; Hits* hits = NULL; QueryParser parser(_T("contents"), &analyzer); - for (int k = 0; k < 6; k++) { + for (int k = 0; k < 8; k++) { Query* query = parser.parse(queries[k]); + + //workaround bug in BooleanScorer2 + if ( query->getObjectName() == BooleanQuery::getClassName() ) + ((BooleanQuery*)query)->setUseScorer14(true); TCHAR* qryInfo = query->toString(_T("contents")); hits = searcher.search(query); CLUCENE_ASSERT( hits->length() == shouldbe[k] ); - _CLDELETE_CARRAY(qryInfo); _CLDELETE(hits); _CLDELETE(query); @@ -322,7 +329,7 @@ } void testSrchManyHits(CuTest *tc) { - SimpleAnalyzer analyzer; + SimpleAnalyzer analyzer; RAMDirectory ram; IndexWriter writer( &ram, &analyzer, true); @@ -366,7 +373,7 @@ CuSuite *testsearch(void) { CuSuite *suite = CuSuiteNew(_T("CLucene Search Test")); - SUITE_ADD_TEST(suite, ramSearchTest); + SUITE_ADD_TEST(suite, ramSearchTest); SUITE_ADD_TEST(suite, fsSearchTest); SUITE_ADD_TEST(suite, testNormEncoding); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 10:10:55
|
Revision: 3020 http://clucene.svn.sourceforge.net/clucene/?rev=3020&view=rev Author: ustramooner Date: 2009-07-08 10:10:52 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Isidor Zeuner's fix & test for norms not being merged Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 10:10:09 UTC (rev 3019) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 10:10:52 UTC (rev 3020) @@ -270,6 +270,8 @@ /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * + * The size of bytes must be the size of the IndexReader->maxDoc() + * * @see Field#setBoost(float_t) */ virtual void norms(const TCHAR* field, uint8_t* bytes) = 0; Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 10:10:09 UTC (rev 3019) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 10:10:52 UTC (rev 3020) @@ -769,6 +769,7 @@ normBuffer.resize(maxDoc); memset(normBuffer.values,0,sizeof(uint8_t) * maxDoc); } + reader->norms(fi->name, normBuffer.values); if (!reader->hasDeletions()) { //optimized case for segments without deleted docs Modified: branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp =================================================================== --- branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-07-08 10:10:09 UTC (rev 3019) +++ branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp 2009-07-08 10:10:52 UTC (rev 3020) @@ -6,7 +6,66 @@ ------------------------------------------------------------------------------*/ #include "test.h" #include <iostream> +#include <sys/stat.h> +#include <sys/types.h> +//checks if a merged index finds phrases correctly +void testIWmergePhraseSegments(CuTest *tc){ + char fsdir[CL_MAX_PATH]; + sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter"); + mkdir(fsdir, 0777); + SimpleAnalyzer a; + + IndexWriter ndx2(fsdir,&a,true); + ndx2.setUseCompoundFile(false); + Document doc0; + doc0.add( + *_CLNEW Field( + _T("field0"), + _T("value0 value1"), + Field::STORE_YES | Field::INDEX_TOKENIZED + ) + ); + ndx2.addDocument(&doc0); + ndx2.optimize(); + ndx2.close(); + + IndexWriter ndx(fsdir,&a,false); + ndx.setUseCompoundFile(false); + Document doc1; + doc1.add( + *_CLNEW Field( + _T("field0"), + _T("value1 value0"), + Field::STORE_YES | Field::INDEX_TOKENIZED + ) + ); + ndx.addDocument(&doc1); + ndx.optimize(); + ndx.close(); + + //test the index querying + IndexSearcher searcher(fsdir); + Query* query0 = QueryParser::parse( + _T("\"value0 value1\""), + _T("field0"), + &a + ); + Hits* hits0 = searcher.search(query0); + CLUCENE_ASSERT(hits0->length() > 0); + Query* query1 = QueryParser::parse( + _T("\"value1 value0\""), + _T("field0"), + &a + ); + Hits* hits1 = searcher.search(query1); + CLUCENE_ASSERT(hits1->length() > 0); + _CLDELETE(query0); + _CLDELETE(query1); + _CLDELETE(hits0); + _CLDELETE(hits1); +} + //checks that adding more than the min_merge value goes ok... //checks for a mem leak that used to occur void testIWmergeSegments1(CuTest *tc){ @@ -45,6 +104,60 @@ _CLDECDELETE(term); _CLDELETE(reader2); } + +//checks if appending to an index works correctly +void testIWmergeSegments2(CuTest *tc){ + char fsdir[CL_MAX_PATH]; + sprintf(fsdir,"%s/%s",cl_tempDir, "test.indexwriter"); + mkdir(fsdir, 0777); + SimpleAnalyzer a; + + IndexWriter ndx2(fsdir,&a,true); + ndx2.setUseCompoundFile(false); + Document doc0; + doc0.add( + *_CLNEW Field( + _T("field0"), + _T("value0"), + Field::STORE_YES | Field::INDEX_TOKENIZED + ) + ); + ndx2.addDocument(&doc0); + ndx2.optimize(); + ndx2.close(); + + IndexWriter ndx(fsdir,&a,false); + ndx.setUseCompoundFile(false); + Document doc1; + doc1.add( + *_CLNEW Field( + _T("field0"), + _T("value1"), + Field::STORE_YES | Field::INDEX_TOKENIZED + ) + ); + ndx.addDocument(&doc1); + ndx.optimize(); + ndx.close(); + + //test the ram querying + IndexSearcher searcher(fsdir); + Term* term0 = _CLNEW Term(_T("field0"),_T("value1")); + Query* query0 = QueryParser::parse(_T("value0"),_T("field0"),&a); + Hits* hits0 = searcher.search(query0); + CLUCENE_ASSERT(hits0->length() > 0); + Term* term1 = _CLNEW Term(_T("field0"),_T("value0")); + Query* query1 = QueryParser::parse(_T("value1"),_T("field0"),&a); + Hits* hits1 = searcher.search(query1); + CLUCENE_ASSERT(hits1->length() > 0); + _CLDELETE(query0); + _CLDELETE(query1); + _CLDELETE(hits0); + _CLDELETE(hits1); + _CLDECDELETE(term0); + _CLDECDELETE(term1); +} + void testAddIndexes(CuTest *tc){ char reuters_origdirectory[1024]; strcpy(reuters_origdirectory, clucene_data_location); @@ -73,11 +186,14 @@ CLUCENE_ASSERT(w.docCount()==62); //31 docs in reuters... } } + CuSuite *testindexwriter(void) { CuSuite *suite = CuSuiteNew(_T("CLucene IndexWriter Test")); SUITE_ADD_TEST(suite, testAddIndexes); SUITE_ADD_TEST(suite, testIWmergeSegments1); + SUITE_ADD_TEST(suite, testIWmergeSegments2); + SUITE_ADD_TEST(suite, testIWmergePhraseSegments); return suite; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 10:10:11
|
Revision: 3019 http://clucene.svn.sourceforge.net/clucene/?rev=3019&view=rev Author: ustramooner Date: 2009-07-08 10:10:09 +0000 (Wed, 08 Jul 2009) Log Message: ----------- various mem fixes Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-07-08 10:09:32 UTC (rev 3018) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-07-08 10:10:09 UTC (rev 3019) @@ -996,11 +996,11 @@ clone->initialize(si, readBufferSize, false, true); clone->cfsReader = cfsReader; clone->storeCFSReader = storeCFSReader; - clone->_fieldInfos = _fieldInfos; - clone->tis = tis; - clone->freqStream = freqStream; - clone->proxStream = proxStream; - clone->termVectorsReaderOrig = termVectorsReaderOrig; + clone->_fieldInfos = _fieldInfos; + clone->tis = tis; + clone->freqStream = freqStream; + clone->proxStream = proxStream; + clone->termVectorsReaderOrig = termVectorsReaderOrig; // we have to open a new FieldsReader, because it is not thread-safe // and can thus not be shared among multiple SegmentReaders @@ -1043,7 +1043,8 @@ const TCHAR* curField = _fieldInfos->fieldInfo(i)->name; Norm* norm = this->_norms.get(curField); norm->incRef(); - clone->_norms.put(curField, norm); + norm->_this = clone; //give the norm to the clone + clone->_norms.put(curField, norm); } } @@ -1054,6 +1055,7 @@ const TCHAR* field = it->first; Norm* norm = _norms[field]; norm->incRef(); + norm->_this = clone; //give the norm to the clone clone->_norms.put(field, norm); it++; } @@ -1086,17 +1088,16 @@ clone->decRef(); } ) - - //disown this memory - this->freqStream = NULL; - this->_fieldInfos = NULL; - this->fieldsReader = NULL; - this->tis = NULL; - this->deletedDocs = NULL; - this->ones = NULL; - this->termVectorsReaderOrig = NULL; + + //disown this memory + this->freqStream = NULL; + this->_fieldInfos = NULL; + this->fieldsReader = NULL; + this->tis = NULL; + this->deletedDocs = NULL; + this->ones = NULL; + this->termVectorsReaderOrig = NULL; this->cfsReader = NULL; - this->singleNormStream = NULL; this->fieldsReader = NULL; this->tis = NULL; this->freqStream = NULL; @@ -1104,6 +1105,7 @@ this->termVectorsReaderOrig = NULL; this->cfsReader = NULL; this->storeCFSReader = NULL; + this->singleNormStream = NULL; return clone; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 10:09:38
|
Revision: 3018 http://clucene.svn.sourceforge.net/clucene/?rev=3018&view=rev Author: ustramooner Date: 2009-07-08 10:09:32 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Fixes mem-leaks in QP, and adds more tests Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -542,6 +542,7 @@ const size_t inputLen = _tcslen(input); bool outputOwned=false; if (output == NULL){ + // TODO: Perhaps we can re-use an inner buffer instead of creating new char arrays here and in several other places output = _CL_NEWARRAY(TCHAR, inputLen + 1); outputOwned=true; } @@ -565,7 +566,12 @@ for (size_t i = 0; i < inputLen; i++) { TCHAR curChar = input[i]; if (codePointMultiplier > 0) { - codePoint += hexToInt(curChar) * codePointMultiplier; + try { + codePoint += hexToInt(curChar) * codePointMultiplier; + } catch (CLuceneError& e) { + if (outputOwned)_CLDELETE_LCARRAY(output); + throw e; + } codePointMultiplier = codePointMultiplier >> 4; if (codePointMultiplier == 0) { output[length++] = (TCHAR)codePoint; @@ -621,7 +627,7 @@ } //static -TCHAR* QueryParser::escape(TCHAR* s) { +TCHAR* QueryParser::escape(const TCHAR* s) { size_t len = _tcslen(s); // Create a StringBuffer object a bit longer from the length of the query (to prevent some reallocations), // and declare we are the owners of the buffer (to save on a copy) @@ -703,15 +709,16 @@ } Query* QueryParser::TopLevelQuery(TCHAR* _field) { - Query* q; + Query* q = NULL;; try { q = fQuery(_field); + jj_consume_token(0); } catch (CLuceneError& e) { if (_field!=field)_CLDELETE_LCARRAY(_field); + _CLLDELETE(q); throw e; } if (_field!=field)_CLDELETE_LCARRAY(_field); - jj_consume_token(0); return q; } Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:09:32 UTC (rev 3018) @@ -447,7 +447,7 @@ * * @memory caller is responsible to free the returned string */ - static TCHAR* escape(TCHAR* s); + static TCHAR* escape(const TCHAR* s); // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -1228,6 +1228,7 @@ } // TODO: TokenMgrError.LEXICAL_ERROR ? TCHAR* err = getLexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar); + _CLDELETE_LCARRAY(error_after); _CLTHROWT_DEL(CL_ERR_TokenMgr,err); } } @@ -1235,6 +1236,7 @@ TCHAR* QueryParserTokenManager::getLexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, TCHAR* errorAfter, TCHAR curChar) { + TCHAR* tmp = NULL; CL_NS(util)::StringBuffer sb(100, false); sb.append(_T("Lexical error at line ")); sb.appendInt(errorLine); @@ -1252,7 +1254,11 @@ sb.append(_T("), ")); } sb.append(_T("after : \"")); - sb.append(addEscapes(errorAfter)); + + tmp = addEscapes(errorAfter); + sb.append(tmp); + _CLDELETE_LCARRAY(tmp); + sb.appendChar(_T('"')); return sb.getBuffer(); } Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -168,7 +168,7 @@ CuAssertTrue(tc,ret); } -void assertParseException(CuTest *tc,TCHAR* queryString) { +void assertParseException(CuTest *tc,const TCHAR* queryString) { try { Query* q = getQuery(tc,queryString, NULL, CL_ERR_Parse); } catch (CLuceneError&){ @@ -177,6 +177,44 @@ CuFail(tc,_T("ParseException expected, not thrown")); } +void assertEscapedQueryEquals(CuTest *tc,const TCHAR* query, Analyzer* a, const TCHAR* result){ + TCHAR* escapedQuery = QueryParser::escape(query); + if (_tcscmp(escapedQuery, result) != 0) { + TCHAR str[CL_MAX_PATH]; + _tcscpy(str,escapedQuery); + _CLDELETE_LCARRAY(escapedQuery); + CuFail(tc, _T("Query /%s/ yielded /%s/, expecting /%s/\n"), query, escapedQuery, result); + } + _CLDELETE_LCARRAY(escapedQuery); +} + +Query* getQueryDOA(const TCHAR* query, Analyzer* a=NULL) { + bool bOwnsAnalyzer=false; + if (a == NULL){ + a = _CLNEW SimpleAnalyzer(); + bOwnsAnalyzer=true; + } + QueryParser* qp = _CLNEW QueryParser(_T("field"), a); + qp->setDefaultOperator(QueryParser::AND_OPERATOR); + Query* q = qp->parse(query); + _CLLDELETE(qp); + if (bOwnsAnalyzer) _CLLDELETE(a); + return q; +} + +void assertQueryEqualsDOA(CuTest *tc,const TCHAR* query, Analyzer* a, TCHAR* result){ + Query* q = getQueryDOA(query, a); + TCHAR* s = q->toString(_T("field")); + _CLLDELETE(q); + if (_tcscmp(s,result)!=0) { + TCHAR str[CL_MAX_PATH]; + _tcscpy(str,s); + _CLDELETE_LCARRAY(s); + CuFail(tc,_T("Query /%s/ yielded /%s/, expecting /%s/"),query, str, result); + } + _CLDELETE_LCARRAY(s); +} + /// END Helper functions and classes void testSimple(CuTest *tc) { @@ -528,6 +566,122 @@ // TODO: testLegacyDateRange, testDateRange +void testQueryStringEscaping(CuTest *tc) { + WhitespaceAnalyzer a; + + assertEscapedQueryEquals(tc, _T("a-b:c"), &a, _T("a\\-b\\:c")); + assertEscapedQueryEquals(tc,_T("a+b:c"), &a, _T("a\\+b\\:c")); + assertEscapedQueryEquals(tc, _T("a:b:c"), &a, _T("a\\:b\\:c")); + assertEscapedQueryEquals(tc, _T("a\\b:c"), &a, _T("a\\\\b\\:c")); + + assertEscapedQueryEquals(tc,_T("a:b-c"), &a, _T("a\\:b\\-c")); + assertEscapedQueryEquals(tc,_T("a:b+c"), &a, _T("a\\:b\\+c")); + assertEscapedQueryEquals(tc,_T("a:b:c"), &a, _T("a\\:b\\:c")); + assertEscapedQueryEquals(tc,_T("a:b\\c"), &a, _T("a\\:b\\\\c")); + + assertEscapedQueryEquals(tc,_T("a:b-c*"), &a, _T("a\\:b\\-c\\*")); + assertEscapedQueryEquals(tc,_T("a:b+c*"), &a, _T("a\\:b\\+c\\*")); + assertEscapedQueryEquals(tc,_T("a:b:c*"), &a, _T("a\\:b\\:c\\*")); + + assertEscapedQueryEquals(tc,_T("a:b\\\\c*"), &a, _T("a\\:b\\\\\\\\c\\*")); + + assertEscapedQueryEquals(tc,_T("a:b-?c"), &a, _T("a\\:b\\-\\?c")); + assertEscapedQueryEquals(tc,_T("a:b+?c"), &a, _T("a\\:b\\+\\?c")); + assertEscapedQueryEquals(tc,_T("a:b:?c"), &a, _T("a\\:b\\:\\?c")); + + assertEscapedQueryEquals(tc,_T("a:b?c"), &a, _T("a\\:b\\?c")); + + assertEscapedQueryEquals(tc,_T("a:b-c~"), &a, _T("a\\:b\\-c\\~")); + assertEscapedQueryEquals(tc,_T("a:b+c~"), &a, _T("a\\:b\\+c\\~")); + assertEscapedQueryEquals(tc,_T("a:b:c~"), &a, _T("a\\:b\\:c\\~")); + assertEscapedQueryEquals(tc,_T("a:b\\c~"), &a, _T("a\\:b\\\\c\\~")); + + assertEscapedQueryEquals(tc,_T("[ a - TO a+ ]"), NULL, _T("\\[ a \\- TO a\\+ \\]")); + assertEscapedQueryEquals(tc,_T("[ a : TO a~ ]"), NULL, _T("\\[ a \\: TO a\\~ \\]")); + assertEscapedQueryEquals(tc,_T("[ a\\ TO a* ]"), NULL, _T("\\[ a\\\\ TO a\\* \\]")); + + // LUCENE-881 + assertEscapedQueryEquals(tc,_T("|| abc ||"), &a, _T("\\|\\| abc \\|\\|")); + assertEscapedQueryEquals(tc,_T("&& abc &&"), &a, _T("\\&\\& abc \\&\\&")); +} + +void testTabNewlineCarriageReturn(CuTest *tc){ + assertQueryEqualsDOA(tc,_T("+weltbank +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \n +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\r+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\r\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r\n +worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r \n +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\t+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \t+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \t +worlbank"), NULL, + _T("+weltbank +worlbank")); +} + +void testSimpleDAO(CuTest *tc){ + assertQueryEqualsDOA(tc,_T("term term term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term +term term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term term +term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term +term +term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("-term term term"), NULL, _T("-term +term +term")); +} + +void testBoost(CuTest *tc){ + const TCHAR* stopWords[] = {_T("on"), NULL}; + StandardAnalyzer* oneStopAnalyzer = _CLNEW StandardAnalyzer(reinterpret_cast<const TCHAR**>(&stopWords)); + QueryParser* qp = _CLNEW QueryParser(_T("field"), oneStopAnalyzer); + Query* q = qp->parse(_T("on^1.0")); + CLUCENE_ASSERT(q != NULL); + _CLLDELETE(q); + q = qp->parse(_T("\"hello\"^2.0")); + CLUCENE_ASSERT(q != NULL); + CLUCENE_ASSERT(q->getBoost() == 2.0f); + _CLLDELETE(q); + q = qp->parse(_T("hello^2.0")); + CLUCENE_ASSERT(q != NULL); + CLUCENE_ASSERT(q->getBoost() == 2.0f); + _CLLDELETE(q); + q = qp->parse(_T("\"on\"^1.0")); + CLUCENE_ASSERT(q != NULL); + _CLLDELETE(q); + _CLLDELETE(qp); + _CLLDELETE(oneStopAnalyzer); + + StandardAnalyzer a; + QueryParser* qp2 = _CLNEW QueryParser(_T("field"), &a); + q = qp2->parse(_T("the^3")); + // "the" is a stop word so the result is an empty query: + CLUCENE_ASSERT(q != NULL); + TCHAR* tmp = q->toString(); + CLUCENE_ASSERT( _tcscmp(tmp, _T("")) == 0 ); + _CLDELETE_LCARRAY(tmp); + CLUCENE_ASSERT(1.0f == q->getBoost()); + _CLLDELETE(q); + _CLLDELETE(qp2); +} + void testMatchAllDocs(CuTest *tc) { WhitespaceAnalyzer a; QueryParser* qp = _CLNEW QueryParser(_T("field"), &a); @@ -550,16 +704,19 @@ CuSuite *suite = CuSuiteNew(_T("CLucene Query Parser Test")); SUITE_ADD_TEST(suite, testSimple); + SUITE_ADD_TEST(suite, testPunct); + SUITE_ADD_TEST(suite, testSlop); + SUITE_ADD_TEST(suite, testNumber); + SUITE_ADD_TEST(suite, testWildcard); SUITE_ADD_TEST(suite, testLeadingWildcardType); SUITE_ADD_TEST(suite, testQPA); + SUITE_ADD_TEST(suite, testRange); SUITE_ADD_TEST(suite, testEscaped); - SUITE_ADD_TEST(suite, testNumber); - SUITE_ADD_TEST(suite, testPunct); + SUITE_ADD_TEST(suite, testQueryStringEscaping); + SUITE_ADD_TEST(suite, testTabNewlineCarriageReturn); + SUITE_ADD_TEST(suite, testSimpleDAO); + SUITE_ADD_TEST(suite, testBoost); - SUITE_ADD_TEST(suite, testSlop); - SUITE_ADD_TEST(suite, testRange); - SUITE_ADD_TEST(suite, testWildcard); - SUITE_ADD_TEST(suite, testMatchAllDocs); return suite; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 10:08:52
|
Revision: 3017 http://clucene.svn.sourceforge.net/clucene/?rev=3017&view=rev Author: ustramooner Date: 2009-07-08 10:08:46 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Fixing bugs with QP, and exposing several mem-leaks -- thanks to new tests Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:57:26 UTC (rev 3016) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) @@ -825,7 +825,7 @@ _CLTHROWT(CL_ERR_Parse,_T("")); } } - if (boost != NULL) { + if (q && boost != NULL) { float_t f = 1.0; try { f = _tcstod(boost->image, NULL); @@ -969,11 +969,14 @@ else jj_la1[15] = jj_gen; + // TODO: Allow analysis::Term to accept ownership on a TCHAR* and save on extra dup's if (goop1->kind == RANGEIN_QUOTED) { _tcscpy(goop1->image, goop1->image+1); + goop1->image[_tcslen(goop1->image)-1]=NULL; } if (goop2->kind == RANGEIN_QUOTED) { _tcscpy(goop2->image, goop2->image+1); + goop2->image[_tcslen(goop2->image)-1]=NULL; } TCHAR* t1 = discardEscapeChar(goop1->image); TCHAR* t2 = discardEscapeChar(goop2->image); @@ -1062,9 +1065,7 @@ s = _ttoi(fuzzySlop->image + 1); } catch (...) { /* ignore exceptions */ } - } - // TODO: Allow analysis::Term to accept ownership on a TCHAR* and save on extra dup's - + } // TODO: Make sure this hack, save an extra dup, is legal and not harmful const size_t st = _tcslen(term->image); term->image[st-1]=NULL; @@ -1326,9 +1327,10 @@ } } } + + _CLLDELETE(jj_expentry); for (int32_t i = 0; i < 33; i++) { if (la1tokens[i]) { - _CLLDELETE(jj_expentry); jj_expentry = _CLNEW ValueArray<int32_t>(1); jj_expentry->values[0] = i; jj_expentries->push_back(jj_expentry); Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 09:57:26 UTC (rev 3016) +++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) @@ -6,6 +6,10 @@ ------------------------------------------------------------------------------*/ #include "test.h" + +/// Java QueryParser tests +/// Helper functions and classes + class QPTestFilter: public TokenFilter { public: @@ -57,13 +61,30 @@ } }; - QueryParser* getParser(Analyzer* a) { - if (a == NULL) - return NULL; - QueryParser* qp = _CLNEW QueryParser(_T("field"), a); +class QPTestParser : public QueryParser { +public: + QPTestParser(TCHAR* f, Analyzer* a) : QueryParser(f, a){ + } + virtual ~QPTestParser(){ + } + +protected: + Query* getFuzzyQuery(TCHAR* field, TCHAR* termStr, float_t minSimilarity) { + _CLTHROWA(CL_ERR_Parse,"Fuzzy queries not allowed"); + } + + Query* getWildcardQuery(TCHAR* field, TCHAR* termStr) { + _CLTHROWA(CL_ERR_Parse,"Wildcard queries not allowed"); + } +}; + +QueryParser* getParser(Analyzer* a) { + if (a == NULL) + return NULL; + QueryParser* qp = _CLNEW QueryParser(_T("field"), a); qp->setDefaultOperator(QueryParser::OR_OPERATOR); - return qp; - } + return qp; +} Query* getQuery(CuTest *tc,const TCHAR* query, Analyzer* a, int ignoreCLError=0) { bool del = (a==NULL); @@ -147,6 +168,17 @@ CuAssertTrue(tc,ret); } +void assertParseException(CuTest *tc,TCHAR* queryString) { + try { + Query* q = getQuery(tc,queryString, NULL, CL_ERR_Parse); + } catch (CLuceneError&){ + return; + } + CuFail(tc,_T("ParseException expected, not thrown")); +} + +/// END Helper functions and classes + void testSimple(CuTest *tc) { StandardAnalyzer a; KeywordAnalyzer b; @@ -165,8 +197,8 @@ #endif // TODO: Those 2 fail, related probably to the escape function - assertQueryEquals(tc, _T("\"\""), &b, _T("")); - assertQueryEquals(tc, _T("foo:\"\""), &b, _T("foo:")); + //assertQueryEquals(tc, _T("\"\""), &b, _T("")); + //assertQueryEquals(tc, _T("foo:\"\""), &b, _T("foo:")); assertQueryEquals(tc,_T("a AND b"), NULL, _T("+a +b")); assertQueryEquals(tc,_T("(a AND b)"), NULL, _T("+a +b")); @@ -213,8 +245,8 @@ assertQueryEquals(tc,_T("+title:(dog OR cat) -author:\"bob dole\""), NULL, _T("+(title:dog title:cat) -author:\"bob dole\"") ); - // make sure OR is the default: QueryParser* qp = _CLNEW QueryParser(_T("field"), &a); + // make sure OR is the default: CLUCENE_ASSERT(QueryParser::OR_OPERATOR == qp->getDefaultOperator()); qp->setDefaultOperator(QueryParser::AND_OPERATOR); CLUCENE_ASSERT(QueryParser::AND_OPERATOR == qp->getDefaultOperator()); @@ -232,12 +264,6 @@ qp->setDefaultOperator(QueryParser::OR_OPERATOR); CLUCENE_ASSERT(QueryParser::OR_OPERATOR == qp->getDefaultOperator()); _CLDELETE(qp); - - //test string buffer - // TODO: Move this somewhere else - StringBuffer sb; - sb.appendFloat(0.02f,2); - CuAssertStrEquals(tc, _T("appendFloat failed"), _T("0.02"), sb.getBuffer()); } void testPunct(CuTest *tc) { @@ -251,14 +277,15 @@ assertQueryEquals(tc,_T("\"term germ\"~2"), NULL, _T("\"term germ\"~2") ); assertQueryEquals(tc,_T("\"term germ\"~2 flork"), NULL, _T("\"term germ\"~2 flork") ); assertQueryEquals(tc,_T("\"term\"~2"), NULL, _T("term")); + assertQueryEquals(tc,_T("\" \"~2 germ"), NULL, _T("germ")); + assertQueryEquals(tc,_T("\"term germ\"~2^2"), NULL, _T("\"term germ\"~2^2.0") ); + /* ### These do not work anymore with the new QP, and they do not exist in the official Java tests assertQueryEquals(tc,_T("term~2"), NULL, _T("term")); assertQueryEquals(tc,_T("term~0.5"), NULL, _T("term")); assertQueryEquals(tc,_T("term~0.6"), NULL, _T("term")); */ - assertQueryEquals(tc,_T("\" \"~2 germ"), NULL, _T("germ")); - assertQueryEquals(tc,_T("\"term germ\"~2^2"), NULL, _T("\"term germ\"~2^2.0") ); } void testNumber(CuTest *tc) { @@ -273,13 +300,11 @@ assertQueryEquals(tc,_T("term term1 term2"), &a, _T("term term1 term2")); } -void testWildcard(CuTest *tc) { - +void testWildcard(CuTest *tc) +{ assertQueryEquals(tc,_T("term*"), NULL, _T("term*")); assertQueryEquals(tc,_T("term*^2"), NULL, _T("term*^2.0")); assertQueryEquals(tc,_T("term~"), NULL, _T("term~0.5")); - // ### not in the Java tests - // assertQueryEquals(tc,_T("term~0.5"), NULL, _T("term")); assertQueryEquals(tc,_T("term~0.7"), NULL, _T("term~0.7")); assertQueryEquals(tc,_T("term~^2"), NULL, _T("term~0.5^2.0")); assertQueryEquals(tc,_T("term^2~"), NULL, _T("term~0.5^2.0")); @@ -301,11 +326,7 @@ CuAssertTrue(tc, FuzzyQuery::defaultPrefixLength == fq->getPrefixLength()); _CLDELETE(fq); - try { - Query *q = getQuery(tc, _T("term~1.1"), NULL, CL_ERR_Parse); // value > 1, throws exception - CuFail(tc,_T("Expected a parse exception for query /term~1.1/")); - } catch (CLuceneError&){ - } + assertParseException(tc,_T("term~1.1")); // value > 1, throws exception assertTrue(tc, _T("term*germ"), NULL,"WildcardQuery", _T("term*germ")); @@ -345,34 +366,53 @@ assertWildcardQueryEquals(tc,_T("[A TO C]"), true, _T("[a TO c]")); assertWildcardQueryEquals(tc,_T("[A TO C]"), false, _T("[A TO C]")); // Test suffix queries: first disallow - try { - getQuery(tc,_T("*Term"), NULL, CL_ERR_Parse); - CuFail(tc,_T("Expected an exception for query /*Term/")); - } catch (CLuceneError&){ - } + assertParseException(tc,_T("*Term")); + assertParseException(tc,_T("?Term")); - try { - getQuery(tc,_T("?Term"), NULL, CL_ERR_Parse); - CuFail(tc,_T("Expected an exception for query /?Term/")); - } catch (CLuceneError&){ - } - // Test suffix queries: then allow assertWildcardQueryEquals(tc,_T("*Term"), true, _T("*term"), true); assertWildcardQueryEquals(tc,_T("?Term"), true, _T("?term"), true); + + // ### not in the Java tests + // assertQueryEquals(tc,_T("term~0.5"), NULL, _T("term")); } +void testLeadingWildcardType(CuTest *tc) { + SimpleAnalyzer a; + QueryParser* qp = getParser(&a); + qp->setAllowLeadingWildcard(true); + assertTrue(tc, qp->parse(_T("t*erm*")), WildcardQuery::getClassName(), true); + assertTrue(tc, qp->parse(_T("?t*erm*")), WildcardQuery::getClassName(), true); // should not throw an exception + assertTrue(tc, qp->parse(_T("*t*erm*")), WildcardQuery::getClassName(), true); + _CLLDELETE(qp); +} + void testQPA(CuTest *tc) { QPTestAnalyzer qpAnalyzer; + assertQueryEquals(tc,_T("term term^3.0 term"), &qpAnalyzer, _T("term term^3.0 term") ); + assertQueryEquals(tc,_T("term stop^3.0 term"), &qpAnalyzer, _T("term term") ); + assertQueryEquals(tc,_T("term term term"), &qpAnalyzer, _T("term term term") ); assertQueryEquals(tc,_T("term +stop term"), &qpAnalyzer, _T("term term") ); assertQueryEquals(tc,_T("term -stop term"), &qpAnalyzer, _T("term term") ); + + assertQueryEquals(tc,_T("drop AND (stop) AND roll"), &qpAnalyzer, _T("+drop +roll") ); + assertQueryEquals(tc,_T("term +(stop) term"), &qpAnalyzer, _T("term term") ); + assertQueryEquals(tc,_T("term -(stop) term"), &qpAnalyzer, _T("term term") ); + assertQueryEquals(tc,_T("drop AND stop AND roll"), &qpAnalyzer, _T("+drop +roll") ); assertQueryEquals(tc,_T("term phrase term"), &qpAnalyzer, _T("term \"phrase1 phrase2\" term") ); assertQueryEquals(tc,_T("term AND NOT phrase term"), &qpAnalyzer, _T("+term -\"phrase1 phrase2\" term") ); + assertQueryEquals(tc,_T("stop^3"), &qpAnalyzer, _T("") ); assertQueryEquals(tc,_T("stop"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("(stop)^3"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("((stop))^3"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("(stop^3)"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("((stop)^3)"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("(stop)"), &qpAnalyzer, _T("") ); + assertQueryEquals(tc,_T("((stop))"), &qpAnalyzer, _T("") ); assertTrue(tc, _T("term term term"), &qpAnalyzer,"BooleanQuery", _T("term term term")); assertTrue(tc, _T("term +stop"), &qpAnalyzer,"TermQuery", _T("term +stop")); } @@ -418,9 +458,9 @@ void testEscaped(CuTest *tc) { WhitespaceAnalyzer a; - assertQueryEquals(tc, _T("\\[brackets"), &a, _T("[brackets") ); + /*assertQueryEquals(tc, _T("\\[brackets"), &a, _T("[brackets") ); assertQueryEquals(tc, _T("\\\\\\[brackets"), &a, _T("\\[brackets") ); - assertQueryEquals(tc,_T("\\[brackets"), NULL, _T("brackets") ); + assertQueryEquals(tc,_T("\\[brackets"), NULL, _T("brackets") );*/ assertQueryEquals(tc,_T("\\a"), &a, _T("a") ); @@ -454,8 +494,40 @@ assertQueryEquals(tc,_T("[ a\\- TO a\\+ ]"), &a, _T("[a- TO a+]") ); assertQueryEquals(tc,_T("[ a\\: TO a\\~ ]"), &a, _T("[a: TO a~]") ); assertQueryEquals(tc,_T("[ a\\\\ TO a\\* ]"), &a, _T("[a\\ TO a*]") ); + + assertQueryEquals(tc, _T("[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]"), &a, + _T("[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]")); + + assertQueryEquals(tc, _T("a\\\\\\+b"), &a, _T("a\\+b")); + + assertQueryEquals(tc, _T("a \\\"b c\\\" d"), &a, _T("a \"b c\" d")); + assertQueryEquals(tc, _T("\"a \\\"b c\\\" d\""), &a, _T("\"a \"b c\" d\"")); + assertQueryEquals(tc, _T("\"a \\+b c d\""), &a, _T("\"a +b c d\"")); + + assertQueryEquals(tc, _T("c\\:\\\\temp\\\\\\~foo.txt"), &a, _T("c:\\temp\\~foo.txt")); + + assertParseException(tc, _T("XY\\")); // there must be a character after the escape char + + // test unicode escaping + assertQueryEquals(tc,_T("a\\u0062c"), &a, _T("abc")); + assertQueryEquals(tc,_T("XY\\u005a"), &a, _T("XYZ")); + assertQueryEquals(tc,_T("XY\\u005A"), &a, _T("XYZ")); + assertQueryEquals(tc,_T("\"a \\\\\\u0028\\u0062\\\" c\""), &a, _T("\"a \\(b\" c\"")); + + assertParseException(tc,_T("XY\\u005G")); // test non-hex character in escaped unicode sequence + assertParseException(tc,_T("XY\\u005")); // test incomplete escaped unicode sequence + + // Tests bug LUCENE-800 + assertQueryEquals(tc,_T("(item:\\\\ item:ABCD\\\\)"), &a, _T("item:\\ item:ABCD\\")); + assertParseException(tc,_T("(item:\\\\ item:ABCD\\\\))")); // unmatched closing paranthesis + assertQueryEquals(tc,_T("\\*"), &a, _T("*")); + assertQueryEquals(tc,_T("\\\\"), &a, _T("\\")); // escaped backslash + + assertParseException(tc,_T("\\")); // a backslash must always be escaped } +// TODO: testLegacyDateRange, testDateRange + void testMatchAllDocs(CuTest *tc) { WhitespaceAnalyzer a; QueryParser* qp = _CLNEW QueryParser(_T("field"), &a); @@ -478,6 +550,7 @@ CuSuite *suite = CuSuiteNew(_T("CLucene Query Parser Test")); SUITE_ADD_TEST(suite, testSimple); + SUITE_ADD_TEST(suite, testLeadingWildcardType); SUITE_ADD_TEST(suite, testQPA); SUITE_ADD_TEST(suite, testEscaped); SUITE_ADD_TEST(suite, testNumber); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:57:29
|
Revision: 3016 http://clucene.svn.sourceforge.net/clucene/?rev=3016&view=rev Author: ustramooner Date: 2009-07-08 09:57:26 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Fixed an issue with quoted terms in QP Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:56:41 UTC (rev 3015) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:57:26 UTC (rev 3016) @@ -1064,6 +1064,10 @@ catch (...) { /* ignore exceptions */ } } // TODO: Allow analysis::Term to accept ownership on a TCHAR* and save on extra dup's + + // TODO: Make sure this hack, save an extra dup, is legal and not harmful + const size_t st = _tcslen(term->image); + term->image[st-1]=NULL; TCHAR* tmp = discardEscapeChar(term->image+1); q = getFieldQuery(_field, tmp, s); _CLDELETE_LCARRAY(tmp); Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 09:56:41 UTC (rev 3015) +++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 09:57:26 UTC (rev 3016) @@ -164,8 +164,9 @@ assertQueryEquals(tc,tmp1, &a, tmp1); #endif - //assertQueryEquals(tc, _T("\"\""), &b, _T("")); - //assertQueryEquals(tc, _T("foo:\"\""), &b, _T("foo:")); + // TODO: Those 2 fail, related probably to the escape function + assertQueryEquals(tc, _T("\"\""), &b, _T("")); + assertQueryEquals(tc, _T("foo:\"\""), &b, _T("foo:")); assertQueryEquals(tc,_T("a AND b"), NULL, _T("+a +b")); assertQueryEquals(tc,_T("(a AND b)"), NULL, _T("+a +b")); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:56:43
|
Revision: 3015 http://clucene.svn.sourceforge.net/clucene/?rev=3015&view=rev Author: ustramooner Date: 2009-07-08 09:56:41 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Added FuzzyQuery tests - and removed old ones from TestWildcard.cpp Modified Paths: -------------- branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/search/TestWildcard.cpp Modified: branches/lucene2_3_2/src/test/search/TestQueries.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestQueries.cpp 2009-07-08 09:55:52 UTC (rev 3014) +++ branches/lucene2_3_2/src/test/search/TestQueries.cpp 2009-07-08 09:56:41 UTC (rev 3015) @@ -7,46 +7,368 @@ #include "test.h" - void testPrefixQuery(CuTest *tc){ - WhitespaceAnalyzer analyzer; +/// Java PrefixQuery test, 2009-06-02 +void testPrefixQuery(CuTest *tc){ + WhitespaceAnalyzer analyzer; + RAMDirectory directory; + const TCHAR* categories[] = {_T("/Computers"), _T("/Computers/Mac"), _T("/Computers/Windows")}; + + IndexWriter writer( &directory, &analyzer, true); + for (int i = 0; i < 3; i++) { + Document *doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("category"), categories[i], Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + writer.addDocument(doc); + _CLDELETE(doc); + } + writer.close(); + + Term* t = _CLNEW Term(_T("category"), _T("/Computers")); + PrefixQuery *query = _CLNEW PrefixQuery(t); + IndexSearcher searcher(&directory); + Hits *hits = searcher.search(query); + CLUCENE_ASSERT(3 == hits->length()); // All documents in /Computers category and below + _CLDELETE(query); + _CLDELETE(t); + _CLDELETE(hits); + + t = _CLNEW Term(_T("category"), _T("/Computers/Mac")); + query = _CLNEW PrefixQuery(t); + hits = searcher.search(query); + CLUCENE_ASSERT(1 == hits->length()); // One in /Computers/Mac + _CLDELETE(query); + _CLDELETE(t); + _CLDELETE(hits); +} + +#ifndef NO_FUZZY_QUERY + +class TestFuzzyQuery { +private: + CuTest *tc; + + void addDoc(const TCHAR* text, IndexWriter* writer) { + Document* doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("field"), text, Field::STORE_YES, Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLLDELETE(doc); + } + + Hits* searchQuery(IndexSearcher* searcher, const TCHAR* field, const TCHAR* text, + float_t minSimilarity=FuzzyQuery::defaultMinSimilarity, size_t prefixLen=0){ + + Term* t = _CLNEW Term(field, text); + FuzzyQuery* query = _CLNEW FuzzyQuery(t, minSimilarity, prefixLen); + Hits* hits = searcher->search(query); + _CLLDELETE(query); + _CLLDECDELETE(t); + return hits; + } + + size_t getHitsLength(IndexSearcher* searcher, const TCHAR* field, const TCHAR* text, + float_t minSimilarity=FuzzyQuery::defaultMinSimilarity, size_t prefixLen=0){ + + Hits* hits = searchQuery(searcher, field, text, minSimilarity, prefixLen); + size_t ret = hits->length(); + _CLLDELETE(hits); + return ret; + } +public: + TestFuzzyQuery(CuTest *_tc):tc(_tc){ + } + ~TestFuzzyQuery(){ + } + + void testFuzziness() { RAMDirectory directory; - const TCHAR* categories[] = {_T("/Computers"), _T("/Computers/Mac"), _T("/Computers/Windows")}; - - IndexWriter writer( &directory, &analyzer, true); - for (int i = 0; i < 3; i++) { - Document *doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("category"), categories[i], Field::STORE_YES | Field::INDEX_UNTOKENIZED)); - writer.addDocument(doc); - _CLDELETE(doc); - } + WhitespaceAnalyzer a; + IndexWriter writer(&directory, &a, true); + addDoc(_T("aaaaa"), &writer); + addDoc(_T("aaaab"), &writer); + addDoc(_T("aaabb"), &writer); + addDoc(_T("aabbb"), &writer); + addDoc(_T("abbbb"), &writer); + addDoc(_T("bbbbb"), &writer); + addDoc(_T("ddddd"), &writer); + writer.optimize(); writer.close(); - - Term* t = _CLNEW Term(_T("category"), _T("/Computers")); - PrefixQuery *query = _CLNEW PrefixQuery(t); IndexSearcher searcher(&directory); - Hits *hits = searcher.search(query); - CLUCENE_ASSERT(3 == hits->length()); // All documents in /Computers category and below - _CLDELETE(query); - _CLDELETE(t); - _CLDELETE(hits); - t = _CLNEW Term(_T("category"), _T("/Computers/Mac")); - query = _CLNEW PrefixQuery(t); - hits = searcher.search(query); - CLUCENE_ASSERT(1 == hits->length()); // One in /Computers/Mac - _CLDELETE(query); - _CLDELETE(t); - _CLDELETE(hits); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa")) == 3); + + // same with prefix + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,1) == 3); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,2) == 3); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,3) == 3); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,4) == 2); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,5) == 1); + CLUCENE_ASSERT( getHitsLength(&searcher, _T("field"), _T("aaaaa"),FuzzyQuery::defaultMinSimilarity,6) == 1); + + // not similar enough: + CuAssertTrue(tc, getHitsLength(&searcher, _T("field"), _T("xxxxx")) == 0); + CuAssertTrue(tc, getHitsLength(&searcher, _T("field"), _T("aaccc")) == 0); // edit distance to "aaaaa" = 3 + + // query identical to a word in the index: + Hits* hits = searchQuery(&searcher, _T("field"), _T("aaaaa")); + CLUCENE_ASSERT( hits->length() == 3); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + // default allows for up to two edits: + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaabb"), hits->doc(2).get(_T("field"))); + _CLLDELETE(hits); + + // query similar to a word in the index: + hits = searchQuery(&searcher, _T("field"), _T("aaaac")); + CLUCENE_ASSERT( hits->length() == 3); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaabb"), hits->doc(2).get(_T("field"))); + _CLLDELETE(hits); + + // now with prefix + hits = searchQuery(&searcher, _T("field"), _T("aaaac"), FuzzyQuery::defaultMinSimilarity, 1); + CLUCENE_ASSERT( hits->length() == 3); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaabb"), hits->doc(2).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("aaaac"), FuzzyQuery::defaultMinSimilarity, 2); + CLUCENE_ASSERT( hits->length() == 3); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaabb"), hits->doc(2).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("aaaac"), FuzzyQuery::defaultMinSimilarity, 3); + CLUCENE_ASSERT( hits->length() == 3); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaabb"), hits->doc(2).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("aaaac"), FuzzyQuery::defaultMinSimilarity, 4); + CLUCENE_ASSERT( hits->length() == 2); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + CuAssertStrEquals(tc, NULL, _T("aaaab"), hits->doc(1).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("aaaac"), FuzzyQuery::defaultMinSimilarity, 5); + CLUCENE_ASSERT( hits->length() == 0); + CuAssertStrEquals(tc, NULL, _T("aaaaa"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + + hits = searchQuery(&searcher, _T("field"), _T("ddddX")); + CLUCENE_ASSERT( hits->length() == 1); + CuAssertStrEquals(tc, NULL, _T("ddddd"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + // now with prefix + hits = searchQuery(&searcher, _T("field"), _T("ddddX"), FuzzyQuery::defaultMinSimilarity, 1); + CLUCENE_ASSERT( hits->length() == 1); + CuAssertStrEquals(tc, NULL, _T("ddddd"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("ddddX"), FuzzyQuery::defaultMinSimilarity, 2); + CLUCENE_ASSERT( hits->length() == 1); + CuAssertStrEquals(tc, NULL, _T("ddddd"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("ddddX"), FuzzyQuery::defaultMinSimilarity, 3); + CLUCENE_ASSERT( hits->length() == 1); + CuAssertStrEquals(tc, NULL, _T("ddddd"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("ddddX"), FuzzyQuery::defaultMinSimilarity, 4); + CLUCENE_ASSERT( hits->length() == 1); + CuAssertStrEquals(tc, NULL, _T("ddddd"), hits->doc(0).get(_T("field"))); + _CLLDELETE(hits); + + hits = searchQuery(&searcher, _T("field"), _T("ddddX"), FuzzyQuery::defaultMinSimilarity, 5); + CLUCENE_ASSERT( hits->length() == 0); + _CLLDELETE(hits); + + // different field = no match: + hits = searchQuery(&searcher, _T("anotherfield"), _T("ddddX")); + CLUCENE_ASSERT( hits->length() == 0); + _CLLDELETE(hits); + + searcher.close(); + directory.close(); } + /* + void testFuzzinessLong() { + RAMDirectory directory; + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); + addDoc("aaaaaaa", writer); + addDoc("segment", writer); + writer.optimize(); + writer.close(); + IndexSearcher searcher = new IndexSearcher(directory); + FuzzyQuery query; + // not similar enough: + query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0); + Hits hits = searcher.search(query); + assertEquals(0, hits.length()); + // edit distance to "aaaaaaa" = 3, this matches because the string is longer than + // in testDefaultFuzziness so a bigger difference is allowed: + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query); + assertEquals(1, hits.length()); + assertEquals(hits.doc(0).get("field"), ("aaaaaaa")); + + // now with prefix + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query); + assertEquals(1, hits.length()); + assertEquals(hits.doc(0).get("field"), ("aaaaaaa")); + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4); + hits = searcher.search(query); + assertEquals(1, hits.length()); + assertEquals(hits.doc(0).get("field"), ("aaaaaaa")); + query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // no match, more than half of the characters is wrong: + query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // now with prefix + query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // "student" and "stellent" are indeed similar to "segment" by default: + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query); + assertEquals(1, hits.length()); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0); + hits = searcher.search(query); + assertEquals(1, hits.length()); + + // now with prefix + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query); + assertEquals(1, hits.length()); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1); + hits = searcher.search(query); + assertEquals(1, hits.length()); + query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query); + assertEquals(0, hits.length()); + query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // "student" doesn't match anymore thanks to increased minimum similarity: + query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + try { + query = new FuzzyQuery(new Term("field", "student"), 1.1f); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + // expecting exception + } + try { + query = new FuzzyQuery(new Term("field", "student"), -0.1f); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + // expecting exception + } + + searcher.close(); + directory.close(); + } + */ +}; + +void testFuzzyQuery(CuTest *tc){ + + /// Run Java Lucene tests + TestFuzzyQuery tester(tc); + tester.testFuzziness(); + + /// Legacy CLucene tests + RAMDirectory ram; + + //--- + WhitespaceAnalyzer an; + IndexWriter* writer = _CLNEW IndexWriter(&ram, &an, true); + + //--- + Document *doc = 0; + //**** + doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("body"),_T("test"),Field::STORE_NO | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLDELETE(doc); + //**** + doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("body"),_T("home"),Field::STORE_NO | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLDELETE(doc); + //**** + doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("body"), _T("pc linux"),Field::STORE_NO | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLDELETE(doc); + //**** + doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("body"), _T("tested"),Field::STORE_NO | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLDELETE(doc); + //**** + doc = _CLNEW Document(); + doc->add(*_CLNEW Field(_T("body"), _T("source"),Field::STORE_NO | Field::INDEX_TOKENIZED)); + writer->addDocument(doc); + _CLDELETE(doc); + + //--- + writer->close(); + _CLDELETE(writer); + + //--- + IndexSearcher searcher (&ram); + + //--- + Term* term = _CLNEW Term(_T("body"), _T("test~")); + Query* query = _CLNEW FuzzyQuery(term); + Hits* result = searcher.search(query); + + CLUCENE_ASSERT(result && result->length() > 0); + + //--- + _CLDELETE(result); + _CLDELETE(query); + _CLDECDELETE(term); + searcher.close(); + ram.close(); +} +#else + void _NO_FUZZY_QUERY(CuTest *tc){ + CuNotImpl(tc,_T("Fuzzy")); + } +#endif + CuSuite *testqueries(void) { CuSuite *suite = CuSuiteNew(_T("CLucene Queries Test")); - SUITE_ADD_TEST(suite, testPrefixQuery); + SUITE_ADD_TEST(suite, testPrefixQuery); + #ifndef NO_FUZZY_QUERY + SUITE_ADD_TEST(suite, testFuzzyQuery); + #else + SUITE_ADD_TEST(suite, _NO_FUZZY_QUERY); + #endif - return suite; + + return suite; } -//EOF +//EOF Modified: branches/lucene2_3_2/src/test/search/TestWildcard.cpp =================================================================== --- branches/lucene2_3_2/src/test/search/TestWildcard.cpp 2009-07-08 09:55:52 UTC (rev 3014) +++ branches/lucene2_3_2/src/test/search/TestWildcard.cpp 2009-07-08 09:56:41 UTC (rev 3015) @@ -29,64 +29,9 @@ _CLDECDELETE(term); } - void testFuzzyQuery(CuTest *tc){ - RAMDirectory ram; - - //--- - WhitespaceAnalyzer an; - IndexWriter* writer = _CLNEW IndexWriter(&ram, &an, true); - //--- - Document *doc = 0; - //**** - doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("body"),_T("test"),Field::STORE_NO | Field::INDEX_TOKENIZED)); - writer->addDocument(doc); - _CLDELETE(doc); - //**** - doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("body"),_T("home"),Field::STORE_NO | Field::INDEX_TOKENIZED)); - writer->addDocument(doc); - _CLDELETE(doc); - //**** - doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("body"), _T("pc linux"),Field::STORE_NO | Field::INDEX_TOKENIZED)); - writer->addDocument(doc); - _CLDELETE(doc); - //**** - doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("body"), _T("tested"),Field::STORE_NO | Field::INDEX_TOKENIZED)); - writer->addDocument(doc); - _CLDELETE(doc); - //**** - doc = _CLNEW Document(); - doc->add(*_CLNEW Field(_T("body"), _T("source"),Field::STORE_NO | Field::INDEX_TOKENIZED)); - writer->addDocument(doc); - _CLDELETE(doc); - //--- - writer->close(); - _CLDELETE(writer); - //--- - IndexSearcher searcher (&ram); - - //--- - Term* term = _CLNEW Term(_T("body"), _T("test~")); - Query* query = _CLNEW FuzzyQuery(term); - Hits* result = searcher.search(query); - - CLUCENE_ASSERT(result && result->length() > 0); - - //--- - _CLDELETE(result); - _CLDELETE(query); - _CLDECDELETE(term); - searcher.close(); - ram.close(); - } - - void testAsterisk(CuTest *tc){ RAMDirectory indexStore; SimpleAnalyzer an; @@ -168,9 +113,6 @@ void _NO_WILDCARD_QUERY(CuTest *tc){ CuNotImpl(tc,_T("Wildcard")); } - void _NO_FUZZY_QUERY(CuTest *tc){ - CuNotImpl(tc,_T("Fuzzy")); - } #endif @@ -178,11 +120,6 @@ { CuSuite *suite = CuSuiteNew(_T("CLucene Wildcard Test")); - #ifndef NO_FUZZY_QUERY - SUITE_ADD_TEST(suite, testFuzzyQuery); - #else - SUITE_ADD_TEST(suite, _NO_FUZZY_QUERY); - #endif #ifndef NO_WILDCARD_QUERY SUITE_ADD_TEST(suite, testQuestionmark); SUITE_ADD_TEST(suite, testAsterisk); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:55:55
|
Revision: 3014 http://clucene.svn.sourceforge.net/clucene/?rev=3014&view=rev Author: ustramooner Date: 2009-07-08 09:55:52 +0000 (Wed, 08 Jul 2009) Log Message: ----------- cleanups and tweaks Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h branches/lucene2_3_2/src/core/CLucene/search/Hits.cpp branches/lucene2_3_2/src/core/CLucene/search/Hits.h Modified: branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp 2009-07-08 09:54:46 UTC (rev 3013) +++ branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp 2009-07-08 09:55:52 UTC (rev 3014) @@ -284,7 +284,7 @@ if ( term != NULL ){ //Seek enumerator to term; delete the new TermInfo that's returned. TermInfo* ti = get(term); - _CLDELETE(ti); + _CLLDELETE(ti); enumerator = getEnum(); }else enumerator = origEnum; Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-07-08 09:54:46 UTC (rev 3013) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-07-08 09:55:52 UTC (rev 3014) @@ -61,7 +61,7 @@ dWidth = LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX; // default length of the d array dHeight = textLen + 1; - Term* trm = _CLNEW Term(searchTerm->field(), prefix, true); // _CLNEW Term(term, prefix); -- not intern'd? + Term* trm = _CLNEW Term(searchTerm->field(), prefix); // _CLNEW Term(term, prefix); -- not intern'd? setEnum(reader->terms(trm)); _CLDECDELETE(trm); Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-07-08 09:54:46 UTC (rev 3013) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-07-08 09:55:52 UTC (rev 3014) @@ -20,7 +20,7 @@ class CLUCENE_EXPORT FuzzyQuery: public MultiTermQuery { private: class ScoreTerm; - class ScoreTermQueue; + class ScoreTermQueue; float_t minimumSimilarity; size_t prefixLength; Modified: branches/lucene2_3_2/src/core/CLucene/search/Hits.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/Hits.cpp 2009-07-08 09:54:46 UTC (rev 3013) +++ branches/lucene2_3_2/src/core/CLucene/search/Hits.cpp 2009-07-08 09:55:52 UTC (rev 3014) @@ -81,7 +81,7 @@ return cnt; } - int32_t Hits::length() const { + size_t Hits::length() const { return _length; } Modified: branches/lucene2_3_2/src/core/CLucene/search/Hits.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/Hits.h 2009-07-08 09:54:46 UTC (rev 3013) +++ branches/lucene2_3_2/src/core/CLucene/search/Hits.h 2009-07-08 09:55:52 UTC (rev 3014) @@ -18,21 +18,21 @@ class HitDoc; class Sort; - /** A ranked list of documents, used to hold search results. - * <p> - * <b>Caution:</b> Iterate only over the hits needed. Iterating over all - * hits is generally not desirable and may be the source of - * performance issues. If you need to iterate over many or all hits, consider - * using the search method that takes a {@link HitCollector}. - * </p> - * <p><b>Note:</b> Deleting matching documents concurrently with traversing - * the hits, might, when deleting hits that were not yet retrieved, decrease - * {@link #length()}. In such case, - * {@link java.util.ConcurrentModificationException ConcurrentModificationException} - * is thrown when accessing hit <code>n</code> ≥ current_{@link #length()} - * (but <code>n</code> < {@link #length()}_at_start). + /** A ranked list of documents, used to hold search results. + * <p> + * <b>Caution:</b> Iterate only over the hits needed. Iterating over all + * hits is generally not desirable and may be the source of + * performance issues. If you need to iterate over many or all hits, consider + * using the search method that takes a {@link HitCollector}. + * </p> + * <p><b>Note:</b> Deleting matching documents concurrently with traversing + * the hits, might, when deleting hits that were not yet retrieved, decrease + * {@link #length()}. In such case, + * {@link java.util.ConcurrentModificationException ConcurrentModificationException} + * is thrown when accessing hit <code>n</code> ≥ current_{@link #length()} + * (but <code>n</code> < {@link #length()}_at_start). */ - class CLUCENE_EXPORT Hits:LUCENE_BASE { + class CLUCENE_EXPORT Hits { private: Query* query; Searcher* searcher; @@ -68,15 +68,15 @@ public: Hits(Searcher* s, Query* q, Filter* f, const Sort* sort=NULL); - ~Hits(); + virtual ~Hits(); /** Returns the total number of hits available in this set. */ - int32_t length() const; + size_t length() const; - /** Returns the stored fields of the n<sup>th</sup> document in this set. - * <p>Documents are cached, so that repeated requests for the same element may - * return the same Document object. - * @throws CorruptIndexException if the index is corrupt + /** Returns the stored fields of the n<sup>th</sup> document in this set. + * <p>Documents are cached, so that repeated requests for the same element may + * return the same Document object. + * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error * * @memory Memory belongs to the hits object. Don't delete the return value. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:54:50
|
Revision: 3013 http://clucene.svn.sourceforge.net/clucene/?rev=3013&view=rev Author: ustramooner Date: 2009-07-08 09:54:46 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Updated FuzzyQuery to conform with JL, should work better now. Old code similarity code - which looks to be optimized but wasn't working properly, is commented out for future optimizations reference Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/CLConfig.h branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.h branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h branches/lucene2_3_2/src/core/CLucene/search/_PhraseQueue.h branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h branches/lucene2_3_2/src/core/files_list.txt Modified: branches/lucene2_3_2/src/core/CLucene/CLConfig.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2009-07-08 09:54:46 UTC (rev 3013) @@ -202,5 +202,19 @@ // //////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////// +// FuzzyQuery settings +//////////////////////////////////////////////////////////////////// +// +// This should be somewhere around the average long word. +// If it is longer, we waste time and space. If it is shorter, we waste a +// little bit of time growing the array as we encounter longer words. +// +#define LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX 19 +// +//////////////////////////////////////////////////////////////////// + + #endif Modified: branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.cpp 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.cpp 2009-07-08 09:54:46 UTC (rev 3013) @@ -12,14 +12,8 @@ CL_NS_DEF(search) - FilteredTermEnum::FilteredTermEnum(){ - //Func - Constructor - //Pre - true - //Post - Instance has been created - - currentTerm = NULL; - actualEnum = NULL; - } +FilteredTermEnum::FilteredTermEnum():currentTerm(NULL),actualEnum(NULL){ +} FilteredTermEnum::~FilteredTermEnum() { //Func - Destructor @@ -48,7 +42,7 @@ //The actual enumerator is not initialized! if (actualEnum == NULL){ return false; - } + } //Finalize the currentTerm and reset it to NULL _CLDECDELETE( currentTerm ); @@ -101,12 +95,11 @@ //Check if actualEnum is valid if (actualEnum){ //Close the enumeration - actualEnum->close(); - } + actualEnum->close(); + //Destroy the enumeration + _CLDELETE(actualEnum); + } - //Destroy the enumeration - _CLDELETE(actualEnum); - //Destroy currentTerm _CLDECDELETE(currentTerm); } @@ -118,8 +111,7 @@ CND_PRECONDITION(actualEnum != NULL,"actualEnum is NULL"); - _CLDELETE(this->actualEnum); - + _CLLDELETE(this->actualEnum); this->actualEnum = actualEnum; // Find the first term that matches Modified: branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.h 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/search/FilteredTermEnum.h 2009-07-08 09:54:46 UTC (rev 3013) @@ -12,47 +12,48 @@ #include "CLucene/index/Terms.h" CL_NS_DEF(search) - //FilteredTermEnum is an abstract class for enumerating a subset of all terms. - // - //Term enumerations are always ordered by term->compareTo(). Each term in - //the enumeration is greater than all that precede it. - - class CLUCENE_EXPORT FilteredTermEnum: public CL_NS(index)::TermEnum { - public: - //Constructor - FilteredTermEnum(); - //Destructor - virtual ~FilteredTermEnum(); - - //Equality measure on the term - virtual float_t difference() = 0; +/** Abstract class for enumerating a subset of all terms. - //Returns the docFreq of the current Term in the enumeration. - int32_t docFreq() const ; - - //Increments the enumeration to the next element - bool next() ; - - //Returns a pointer to the current Term in the enumeration. - CL_NS(index)::Term* term(); - CL_NS(index)::Term* term(bool pointer); - - //Closes the enumeration to further activity, freeing resources. - void close(); +<p>Term enumerations are always ordered by Term.compareTo(). Each term in +the enumeration is greater than all that precede it. */ +class CLUCENE_EXPORT FilteredTermEnum: public CL_NS(index)::TermEnum { +public: + FilteredTermEnum(); + virtual ~FilteredTermEnum(); - protected: - //Equality compare on the term */ - virtual bool termCompare(CL_NS(index)::Term* term) = 0; - - //Indiciates the end of the enumeration has been reached - virtual bool endEnum() = 0; - - void setEnum(CL_NS(index)::TermEnum* actualEnum) ; - - private: - CL_NS(index)::Term* currentTerm; - CL_NS(index)::TermEnum* actualEnum; - - }; + /** Equality measure on the term */ + virtual float_t difference() = 0; + + /** + * Returns the docFreq of the current Term in the enumeration. + * Returns -1 if no Term matches or all terms have been enumerated. + */ + int32_t docFreq() const; + + /** Increments the enumeration to the next element. True if one exists. */ + bool next() ; + + /** Returns the current Term in the enumeration. + * Returns null if no Term matches or all terms have been enumerated. */ + CL_NS(index)::Term* term(bool pointer); + CL_NS(index)::Term* term(); + + /** Closes the enumeration to further activity, freeing resources. */ + void close(); + +protected: + /** Equality compare on the term */ + virtual bool termCompare(CL_NS(index)::Term* term) = 0; + + /** Indicates the end of the enumeration has been reached */ + virtual bool endEnum() = 0; + + void setEnum(CL_NS(index)::TermEnum* actualEnum) ; + +private: + CL_NS(index)::Term* currentTerm; + CL_NS(index)::TermEnum* actualEnum; + +}; CL_NS_END #endif Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp 2009-07-08 09:54:46 UTC (rev 3013) @@ -8,366 +8,459 @@ #include "CLucene/index/Term.h" #include "CLucene/index/IndexReader.h" #include "Similarity.h" -#include "CLucene/util/StringBuffer.h" #include "FuzzyQuery.h" +#include "BooleanQuery.h" +#include "BooleanClause.h" +#include "TermQuery.h" +#include "CLucene/util/StringBuffer.h" +#include "CLucene/util/PriorityQueue.h" + CL_NS_USE(index) CL_NS_USE(util) CL_NS_DEF(search) - /** Finds and returns the smallest of three integers - precondition: Must define int32_t __t for temporary storage and result - */ - #define min3(a, b, c) __t = (a < b) ? a : b; __t = (__t < c) ? __t : c; +/** Finds and returns the smallest of three integers + * precondition: Must define int32_t __t for temporary storage and result + */ +#define min3(a, b, c) __t = (a < b) ? a : b; __t = (__t < c) ? __t : c; - /** - * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of - * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > - * <code>minSimilarity</code>. - * - * @param reader Delivers terms. - * @param term Pattern term. - * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. - * @param prefixLength Length of required common prefix. Default value is 0. - * @throws IOException - */ - FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t prefixLength): - distance(0), - _endEnum(false), - prefix(STRDUP_TtoT(LUCENE_BLANK_STRING)), - prefixLength(0), + FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength): + FilteredTermEnum(),d(NULL),dWidth(0),dHeight(0),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)), + text(NULL),textLen(0),prefix(NULL)/* ISH: was STRDUP_TtoT(LUCENE_BLANK_STRING)*/,prefixLength(_prefixLength), minimumSimilarity(minSimilarity) { - //Func - Constructor - //Pre - reader contains a valid reference to an IndexReader - // term != NULL - //Post - The instance has been created + CND_PRECONDITION(term != NULL,"term is NULL"); - CND_PRECONDITION(term != NULL,"term is NULL"); - - scale_factor = 1.0f / (1.0f - minimumSimilarity); - searchTerm = _CL_POINTER(term); - - text = STRDUP_TtoT(term->text()); - textLen = term->textLength(); - - + if (minSimilarity >= 1.0f) + _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be greater than or equal to 1"); + else if (minSimilarity < 0.0f) + _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be less than 0"); + if(_prefixLength < 0) + _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength cannot be less than 0"); + + scale_factor = 1.0f / (1.0f - minimumSimilarity); // only now we are safe from a division by zero + //TODO: this.field = searchTerm.field(); + + //The prefix could be longer than the word. + //It's kind of silly though. It means we must match the entire word. + const size_t fullSearchTermLength = searchTerm->textLength(); + const size_t realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; + + text = STRDUP_TtoT(searchTerm->text() + realPrefixLength); + textLen = fullSearchTermLength - realPrefixLength; + + // TODO: what is safer to use, prefixLength or realPrefixLength? + prefix = _CL_NEWARRAY(TCHAR,realPrefixLength+1); + _tcsncpy(prefix, searchTerm->text(), realPrefixLength); + prefix[realPrefixLength]='\0'; + + initializeMaxDistances(); + dWidth = LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX; // default length of the d array + dHeight = textLen + 1; + + Term* trm = _CLNEW Term(searchTerm->field(), prefix, true); // _CLNEW Term(term, prefix); -- not intern'd? + setEnum(reader->terms(trm)); + _CLDECDELETE(trm); + + + /* LEGACY: //Initialize e to NULL e = NULL; eWidth = 0; eHeight = 0; - + if(prefixLength > 0 && prefixLength < textLen){ - this->prefixLength = prefixLength; - - prefix = _CL_NEWARRAY(TCHAR,prefixLength+1); - _tcsncpy(prefix,text,prefixLength); - prefix[prefixLength]='\0'; - - textLen = prefixLength; - text[textLen]='\0'; - } - - - //Set the enumeration - Term* trm = _CLNEW Term(term, prefix); - setEnum(reader->terms(trm)); - _CLDECDELETE(trm); - } + this->prefixLength = prefixLength; - FuzzyTermEnum::~FuzzyTermEnum(){ - //Func - Destructor - //Pre - true - //Post - FuzzyTermEnum has been destroyed + prefix = _CL_NEWARRAY(TCHAR,prefixLength+1); + _tcsncpy(prefix,text,prefixLength); + prefix[prefixLength]='\0'; - //Close the enumeration - close(); - } + textLen = prefixLength; + text[textLen]='\0'; + } + */ + } + + FuzzyTermEnum::~FuzzyTermEnum(){ + close(); + } const char* FuzzyTermEnum::getObjectName() const{ return getClassName(); } const char* FuzzyTermEnum::getClassName(){ return "FuzzyTermEnum"; } - bool FuzzyTermEnum::endEnum() { - //Func - Returns the fact if the current term in the enumeration has reached the end - //Pre - true - //Post - The boolean value of endEnum has been returned + bool FuzzyTermEnum::endEnum() { + return _endEnum; + } - return _endEnum; - } + void FuzzyTermEnum::close(){ - void FuzzyTermEnum::close(){ - //Func - Close the enumeration - //Pre - true - //Post - The enumeration has been closed + FilteredTermEnum::close(); - FilteredTermEnum::close(); - - //Finalize the searchTerm - _CLDECDELETE(searchTerm); - //Destroy e - _CLDELETE_ARRAY(e); + //Finalize the searchTerm + _CLDECDELETE(searchTerm); - _CLDELETE_CARRAY(text); + free(d); + d=NULL; - _CLDELETE_CARRAY(prefix); - } + _CLDELETE_CARRAY(text); - bool FuzzyTermEnum::termCompare(Term* term) { - //Func - Compares term with the searchTerm using the Levenshtein distance. - //Pre - term is NULL or term points to a Term - //Post - if pre(term) is NULL then false is returned otherwise - // if the distance of the current term in the enumeration is bigger than the FUZZY_THRESHOLD - // then true is returned - - if (term == NULL){ - return false; //Note that endEnum is not set to true! - } + _CLDELETE_CARRAY(prefix); + } - const TCHAR* termText = term->text(); - size_t termTextLen = term->textLength(); + bool FuzzyTermEnum::termCompare(Term* term) { + //Func - Compares term with the searchTerm using the Levenshtein distance. + //Pre - term is NULL or term points to a Term + //Post - if pre(term) is NULL then false is returned otherwise + // if the distance of the current term in the enumeration is bigger than the FUZZY_THRESHOLD + // then true is returned - //Check if the field name of searchTerm of term match - //(we can use == because fields are interned) - if ( searchTerm->field() == term->field() && - (prefixLength==0 || _tcsncmp(termText,prefix,prefixLength)==0 )) { + if (term == NULL){ + return false; //Note that endEnum is not set to true! + } - const TCHAR* target = termText+prefixLength; - size_t targetLen = termTextLen-prefixLength; + const TCHAR* termText = term->text(); + const size_t termTextLen = term->textLength(); - //Calculate the Levenshtein distance - int32_t dist = editDistance(text, target, textLen, targetLen); - distance = 1 - ((float_t)dist / (float_t)cl_min(textLen, targetLen)); - return (distance > minimumSimilarity); - } + //Check if the field name of searchTerm of term match + //(we can use == because fields are interned) + if ( searchTerm->field() == term->field() && + (prefixLength==0 || _tcsncmp(termText,prefix,prefixLength)==0 )) { + + const TCHAR* target = termText+prefixLength; + const size_t targetLen = termTextLen-prefixLength; + _similarity = similarity(target, targetLen); + return (_similarity > minimumSimilarity); + + /* LEGACY: + //Calculate the Levenshtein distance + int32_t dist = editDistance(text, target, textLen, targetLen); + distance = 1 - ((float_t)dist / (float_t)cl_min(textLen, targetLen)); + return (distance > minimumSimilarity); + */ + } _endEnum = true; return false; - } + } - float_t FuzzyTermEnum::difference() { - //Func - Returns the difference between the distance and the fuzzy threshold - // multiplied by the scale factor - //Pre - true - //Post - The difference is returned + float_t FuzzyTermEnum::difference() { + return (float_t)((_similarity - minimumSimilarity) * scale_factor ); + } - return (float_t)((distance - minimumSimilarity) * scale_factor ); - } + // TODO: had synchronized in definition + float_t FuzzyTermEnum::similarity(const TCHAR* target, const size_t m) { + const size_t n = textLen; // TODO: remove after replacing n with textLen + if (n == 0) { + //we don't have anything to compare. That means if we just add + //the letters for m we get the new word + return prefixLength == 0 ? 0.0f : 1.0f - ((float_t) m / prefixLength); + } + if (m == 0) { + return prefixLength == 0 ? 0.0f : 1.0f - ((float_t) n / prefixLength); + } + + const int32_t maxDistance = getMaxDistance(m); + + if (maxDistance < abs((int32_t)(m-n))) { + //just adding the characters of m to n or vice-versa results in + //too many edits + //for example "pre" length is 3 and "prefixes" length is 8. We can see that + //given this optimal circumstance, the edit distance cannot be less than 5. + //which is 8-3 or more precisesly Math.abs(3-8). + //if our maximum edit distance is 4, then we can discard this word + //without looking at it. + return 0.0f; + } + + //let's make sure we have enough room in our array to do the distance calculations. + //Check if the array must be reallocated because it is too small or does not exist + + // TODO: realloc should be able to allocate memory for NULL pointers; if thats the case the NULL + // check here is redundant + if (d == NULL){ + dWidth = cl_max(dWidth, n+1); + dHeight = cl_max(dHeight, m+1); + d = reinterpret_cast<int32_t*>(malloc(sizeof(int32_t)*dWidth*dHeight)); + } else if (dWidth <= n || dHeight <= m) { + //growDistanceArray + dWidth = cl_max(dWidth, n+1); + dHeight = cl_max(dHeight, m+1); + d = reinterpret_cast<int32_t*>(realloc(d, sizeof(int32_t)*dWidth*dHeight)); + } + + size_t i; // iterates through the source string + size_t j; // iterates through the target string + + // init matrix d + for (i = 0; i <= n; i++){ + d[i + (0*dWidth)] = i; + } + for (j = 0; j <= m; j++){ + d[0 + (j*dWidth)] = j; + } + + int32_t __t; //temporary variable for min3 + + // start computing edit distance + TCHAR s_i; // ith character of s + for (i = 1; i <= n; i++) { + int32_t bestPossibleEditDistance = m; + s_i = text[i - 1]; + for (j = 1; j <= m; j++) { + if (s_i != target[j-1]) { + min3(d[i-1 + (j*dWidth)], d[i + ((j-1)*dWidth)], d[i-1 + ((j-1)*dWidth)]); + d[i + (j*dWidth)] = __t+1; + } + else { + min3(d[i-1 + (j*dWidth)]+1, d[i + ((j-1)*dWidth)]+1, d[i-1 + ((j-1)*dWidth)]); + d[i + (j*dWidth)] = __t; + } + bestPossibleEditDistance = cl_min(bestPossibleEditDistance, d[i + (j*dWidth)]); + } + + //After calculating row i, the best possible edit distance + //can be found by finding the smallest value in a given column. + //If the bestPossibleEditDistance is greater than the max distance, abort. + + if (i > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater + //the closest the target can be to the text is just too far away. + //this target is leaving the party early. + return 0.0f; + } + } + + // this will return less than 0.0 when the edit distance is + // greater than the number of characters in the shorter word. + // but this was the formula that was previously used in FuzzyTermEnum, + // so it has not been changed (even though minimumSimilarity must be + // greater than 0.0) + return 1.0f - ((float_t)d[n + m*dWidth] / (float_t) (prefixLength + cl_min(n, m))); + } + + int32_t FuzzyTermEnum::getMaxDistance(const size_t m) { + return (m < LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX) ? maxDistances[m] : calculateMaxDistance(m); + } + + void FuzzyTermEnum::initializeMaxDistances() { + for (int32_t i = 0; i < LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX; i++) { + maxDistances[i] = calculateMaxDistance(i); + } + } + + int32_t FuzzyTermEnum::calculateMaxDistance(const size_t m) const { + return (int32_t) ((1-minimumSimilarity) * (cl_min(textLen, m) + prefixLength)); + } + /* LEGACY: + int32_t FuzzyTermEnum::editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) { + //Func - Calculates the Levenshtein distance also known as edit distance is a measure of similiarity + // between two strings where the distance is measured as the number of character + // deletions, insertions or substitutions required to transform one string to + // the other string. + //Pre - s != NULL and contains the source string + // t != NULL and contains the target string + // n >= 0 and contains the length of the source string + // m >= 0 and containts the length of the target string + //Post - The distance has been returned - int32_t FuzzyTermEnum::editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) { - //Func - Calculates the Levenshtein distance also known as edit distance is a measure of similiarity - // between two strings where the distance is measured as the number of character - // deletions, insertions or substitutions required to transform one string to - // the other string. - //Pre - s != NULL and contains the source string - // t != NULL and contains the target string - // n >= 0 and contains the length of the source string - // m >= 0 and containts the length of th target string - //Post - The distance has been returned + CND_PRECONDITION(s != NULL, "s is NULL"); + CND_PRECONDITION(t != NULL, "t is NULL"); + CND_PRECONDITION(n >= 0," n is a negative number"); + CND_PRECONDITION(n >= 0," n is a negative number"); - CND_PRECONDITION(s != NULL, "s is NULL"); - CND_PRECONDITION(t != NULL, "t is NULL"); - CND_PRECONDITION(n >= 0," n is a negative number"); - CND_PRECONDITION(n >= 0," n is a negative number"); + int32_t i; // iterates through s + int32_t j; // iterates through t + TCHAR s_i; // ith character of s - int32_t i; // iterates through s - int32_t j; // iterates through t - TCHAR s_i; // ith character of s + if (n == 0) + return m; + if (m == 0) + return n; - if (n == 0) - return m; - if (m == 0) - return n; + //Check if the array must be reallocated because it is too small or does not exist + if (e == NULL || eWidth <= n || eHeight <= m) { + //Delete e if possible + _CLDELETE_ARRAY(e); + //resize e + eWidth = cl_max(eWidth, n+1); + eHeight = cl_max(eHeight, m+1); + e = _CL_NEWARRAY(int32_t,eWidth*eHeight); + } - //Check if the array must be reallocated because it is too small or does not exist - if (e == NULL || eWidth <= n || eHeight <= m) { - //Delete e if possible - _CLDELETE_ARRAY(e); - //resize e - eWidth = cl_max(eWidth, n+1); - eHeight = cl_max(eHeight, m+1); - e = _CL_NEWARRAY(int32_t,eWidth*eHeight); - } - - CND_CONDITION(e != NULL,"e is NULL"); + CND_CONDITION(e != NULL,"e is NULL"); - // init matrix e - for (i = 0; i <= n; i++){ - e[i + (0*eWidth)] = i; - } - for (j = 0; j <= m; j++){ - e[0 + (j*eWidth)] = j; - } + // init matrix e + for (i = 0; i <= n; i++){ + e[i + (0*eWidth)] = i; + } + for (j = 0; j <= m; j++){ + e[0 + (j*eWidth)] = j; + } - int32_t __t; //temporary variable for min3 + int32_t __t; //temporary variable for min3 - // start computing edit distance - for (i = 1; i <= n; i++) { - s_i = s[i - 1]; - for (j = 1; j <= m; j++) { - if (s_i != t[j-1]){ - min3(e[i + (j*eWidth) - 1], e[i + ((j-1)*eWidth)], e[i + ((j-1)*eWidth)-1]); - e[i + (j*eWidth)] = __t+1; - }else{ - min3(e[i + (j*eWidth) -1]+1, e[i + ((j-1)*eWidth)]+1, e[i + ((j-1)*eWidth)-1]); - e[i + (j*eWidth)] = __t; + // start computing edit distance + for (i = 1; i <= n; i++) { + s_i = s[i - 1]; + for (j = 1; j <= m; j++) { + if (s_i != t[j-1]){ + min3(e[i + (j*eWidth) - 1], e[i + ((j-1)*eWidth)], e[i + ((j-1)*eWidth)-1]); + e[i + (j*eWidth)] = __t+1; + }else{ + min3(e[i + (j*eWidth) -1]+1, e[i + ((j-1)*eWidth)]+1, e[i + ((j-1)*eWidth)-1]); + e[i + (j*eWidth)] = __t; + } } - } - } + } - // we got the result! - return e[n + ((m)*eWidth)]; - } + // we got the result! + return e[n + ((m)*eWidth)]; + }*/ + class FuzzyQuery::ScoreTerm { + public: + Term* term; + float_t score; - /** - * Create a new FuzzyQuery that will match terms with a similarity - * of at least <code>minimumSimilarity</code> to <code>term</code>. - * If a <code>prefixLength</code> > 0 is specified, a common prefix - * of that length is also required. - * - * @param term the term to search for - * @param minimumSimilarity a value between 0 and 1 to set the required similarity - * between the query term and the matching terms. For example, for a - * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length - * as the query term is considered similar to the query term if the edit distance - * between both terms is less than <code>length(term)*0.5</code> - * @param prefixLength length of common (non-fuzzy) prefix - * @throws IllegalArgumentException if minimumSimilarity is > 1 or < 0 - * or if prefixLength < 0 or > <code>term.text().length()</code>. - */ - FuzzyQuery::FuzzyQuery(Term* term, float_t minimumSimilarity, size_t prefixLength): - MultiTermQuery(term) + ScoreTerm(Term* _term, float_t _score):term(_term),score(_score){ + } + virtual ~ScoreTerm(){ + } + }; + + class FuzzyQuery::ScoreTermQueue : public PriorityQueue<ScoreTerm*, CL_NS(util)::Deletor::Object<ScoreTerm> > { + public: + ScoreTermQueue(int32_t size){ + initialize(size, true); + } + virtual ~ScoreTermQueue(){ + } + + protected: + bool lessThan(ScoreTerm* termA, ScoreTerm* termB) { + if (termA->score == termB->score) + return termA->term->compareTo(termB->term) > 0; + else + return termA->score < termB->score; + } + }; + + + FuzzyQuery::FuzzyQuery(Term* term, float_t _minimumSimilarity, size_t _prefixLength): + MultiTermQuery(term),minimumSimilarity(_minimumSimilarity),prefixLength(_prefixLength) { - //Func - Constructor - //Pre - term != NULL - //Post - The instance has been created - if ( minimumSimilarity < 0 ) - minimumSimilarity = defaultMinSimilarity; + if ( minimumSimilarity < 0 ) + minimumSimilarity = defaultMinSimilarity; - CND_PRECONDITION(term != NULL,"term is NULL"); + CND_PRECONDITION(term != NULL,"term is NULL"); - if (minimumSimilarity > 1.0f) - _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity > 1"); - else if (minimumSimilarity < 0.0f) + if (minimumSimilarity >= 1.0f) + _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity >= 1"); + else if (minimumSimilarity < 0.0f) _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity < 0"); - - this->minimumSimilarity = minimumSimilarity; - - if(prefixLength >= term->textLength()) - _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); - this->prefixLength = prefixLength; + if (prefixLength < 0) + _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength < 0"); - } + /* + TODO: Not in original Java version + if(prefixLength >= term->textLength()) + _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); + */ + } - - float_t FuzzyQuery::defaultMinSimilarity = 0.5f; - int32_t FuzzyQuery::defaultPrefixLength = 0; + float_t FuzzyQuery::defaultMinSimilarity = 0.5f; + int32_t FuzzyQuery::defaultPrefixLength = 0; - FuzzyQuery::~FuzzyQuery(){ - //Func - Destructor - //Pre - true - //Post - Instance has been destroyed - } + FuzzyQuery::~FuzzyQuery(){ + } - TCHAR* FuzzyQuery::toString(const TCHAR* field) const{ - StringBuffer buffer(100, false); // TODO: Have a better estimation for the initial buffer length - Term* term = getTerm(false); // no need to increase ref count - if ( field==NULL || _tcscmp(term->field(),field)!=0 ) { - buffer.append(term->field()); - buffer.append( _T(":")); - } - buffer.append(term->text()); - buffer.append( _T("~") ); - buffer.appendFloat(minimumSimilarity,1); - // todo: use ToStringUtils.boost() - if (getBoost() != 1.0f) { - buffer.appendChar ( '^' ); - buffer.appendFloat( getBoost(),1); - } - return buffer.getBuffer(); - } + float_t FuzzyQuery::getMinSimilarity() const { + return minimumSimilarity; + } + size_t FuzzyQuery::getPrefixLength() const { + return prefixLength; + } + + TCHAR* FuzzyQuery::toString(const TCHAR* field) const{ + StringBuffer buffer(100, false); // TODO: Have a better estimation for the initial buffer length + Term* term = getTerm(false); // no need to increase ref count + if ( field==NULL || _tcscmp(term->field(),field)!=0 ) { + buffer.append(term->field()); + buffer.appendChar( _T(':')); + } + buffer.append(term->text()); + buffer.appendChar( _T('~') ); + buffer.appendFloat(minimumSimilarity,1); + buffer.appendBoost(getBoost()); + return buffer.getBuffer(); + } + const char* FuzzyQuery::getObjectName() const{ - //Func - Returns the name of the query - //Pre - true - //post - The string FuzzyQuery has been returned + //Func - Returns the name of the query + //Pre - true + //post - The string FuzzyQuery has been returned - return getClassName(); + return getClassName(); } const char* FuzzyQuery::getClassName(){ - //Func - Returns the name of the query - //Pre - true - //post - The string FuzzyQuery has been returned + //Func - Returns the name of the query + //Pre - true + //post - The string FuzzyQuery has been returned - return "FuzzyQuery"; + return "FuzzyQuery"; } - - /** - * Returns the minimum similarity that is required for this query to match. - * @return float value between 0.0 and 1.0 - */ - float_t FuzzyQuery::getMinSimilarity() const { - return minimumSimilarity; - } - FuzzyQuery::FuzzyQuery(const FuzzyQuery& clone): - MultiTermQuery(clone) - { + MultiTermQuery(clone) + { this->minimumSimilarity = clone.getMinSimilarity(); this->prefixLength = clone.getPrefixLength(); - - //if(prefixLength < 0) - // _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength < 0"); - //else - if(prefixLength >= clone.getTerm()->textLength()) - _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); - } + //if(prefixLength < 0) + // _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength < 0"); + //else + if(prefixLength >= clone.getTerm()->textLength()) + _CLTHROWA(CL_ERR_IllegalArgument,"prefixLength >= term.textLength()"); + } + Query* FuzzyQuery::clone() const{ - return _CLNEW FuzzyQuery(*this); - } - size_t FuzzyQuery::hashCode() const{ - //todo: we should give the query a seeding value... but - //need to do it for all hascode functions - size_t val = Similarity::floatToByte(getBoost()) ^ getTerm()->hashCode(); - val ^= Similarity::floatToByte(this->getMinSimilarity()); - val ^= this->getPrefixLength(); - return val; - } - bool FuzzyQuery::equals(Query* other) const{ - if (!(other->instanceOf(FuzzyQuery::getClassName()))) - return false; + return _CLNEW FuzzyQuery(*this); + } + size_t FuzzyQuery::hashCode() const{ + //todo: we should give the query a seeding value... but + //need to do it for all hascode functions + // TODO: does not conform with JL + size_t val = Similarity::floatToByte(getBoost()) ^ getTerm()->hashCode(); + val ^= Similarity::floatToByte(this->getMinSimilarity()); + val ^= this->getPrefixLength(); + return val; + } + bool FuzzyQuery::equals(Query* other) const{ + if (this == other) return true; + if (!(other->instanceOf(FuzzyQuery::getClassName()))) + return false; - FuzzyQuery* fq = (FuzzyQuery*)other; - return (this->getBoost() == fq->getBoost()) - && this->getMinSimilarity() == fq->getMinSimilarity() - && this->getPrefixLength() == fq->getPrefixLength() - && getTerm()->equals(fq->getTerm()); - } - - /** - * Returns the prefix length, i.e. the number of characters at the start - * of a term that must be identical (not fuzzy) to the query term if the query - * is to match that term. - */ - size_t FuzzyQuery::getPrefixLength() const { - return prefixLength; + FuzzyQuery* fq = static_cast<FuzzyQuery*>(other); + return (this->getBoost() == fq->getBoost()) + && this->minimumSimilarity == fq->getMinSimilarity() + && this->prefixLength == fq->getPrefixLength() + && getTerm()->equals(fq->getTerm()); } - + FilteredTermEnum* FuzzyQuery::getEnum(IndexReader* reader){ Term* term = getTerm(false); FuzzyTermEnum* ret = _CLNEW FuzzyTermEnum(reader, term, minimumSimilarity, prefixLength); return ret; } - /* Query* FuzzyQuery::rewrite(IndexReader* reader) { FilteredTermEnum* enumerator = getEnum(reader); - int32_t maxClauseCount = BooleanQuery::getMaxClauseCount(); + const int32_t maxClauseCount = BooleanQuery::getMaxClauseCount(); ScoreTermQueue* stQueue = _CLNEW ScoreTermQueue(maxClauseCount); ScoreTerm* reusableST = NULL; @@ -375,7 +468,7 @@ do { float_t score = 0.0f; Term* t = enumerator->term(); - if (t != null) { + if (t != NULL) { score = enumerator->difference(); if (reusableST == NULL) { reusableST = _CLNEW ScoreTerm(t, score); @@ -389,27 +482,28 @@ continue; } - reusableST = (ScoreTerm) stQueue->insertWithOverflow(reusableST); + reusableST = stQueue->insertWithOverflow(reusableST); } } while (enumerator->next()); } _CLFINALLY({ enumerator->close(); - _CLDELETE(enumerator); - } + _CLLDELETE(enumerator); + }) - BooleanQuery query = _CLNEW BooleanQuery(true); - int size = stQueue->size(); - for(int i = 0; i < size; i++){ - ScoreTerm* st = (ScoreTerm) stQueue->pop(); - TermQuery* tq = new TermQuery(st.term); // found a match - tq->setBoost(getBoost() * st.score); // set the boost - query->add(tq, BooleanClause.Occur.SHOULD); // add to query + BooleanQuery* query = _CLNEW BooleanQuery(true); + const size_t size = stQueue->size(); + for(size_t i = 0; i < size; i++){ + ScoreTerm* st = stQueue->pop(); + TermQuery* tq = _CLNEW TermQuery(st->term); // found a match + tq->setBoost(getBoost() * st->score); // set the boost + query->add(tq, BooleanClause::SHOULD); // add to query } + _CLLDELETE(stQueue); - _CLDELETE(reusableST); + //_CLDELETE(reusableST); return query; - }*/ + } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h 2009-07-08 09:54:46 UTC (rev 3013) @@ -7,28 +7,30 @@ #ifndef _lucene_search_FuzzyQuery_ #define _lucene_search_FuzzyQuery_ - -//#include "CLucene/index/IndexReader.h" -CL_CLASS_DEF(index,Term) -//#include "MultiTermQuery.h" #include "MultiTermQuery.h" #include "FilteredTermEnum.h" +CL_CLASS_DEF(index,Term) CL_NS_DEF(search) - // class FuzzyQuery implements the fuzzy search query - class CLUCENE_EXPORT FuzzyQuery: public MultiTermQuery { - private: - float_t minimumSimilarity; - size_t prefixLength; - protected: - FuzzyQuery(const FuzzyQuery& clone); - public: - static float_t defaultMinSimilarity; - static int32_t defaultPrefixLength; +/** Implements the fuzzy search query. The similiarity measurement +* is based on the Levenshtein (edit distance) algorithm. +*/ +class CLUCENE_EXPORT FuzzyQuery: public MultiTermQuery { +private: + class ScoreTerm; + class ScoreTermQueue; - /** + float_t minimumSimilarity; + size_t prefixLength; +protected: + FuzzyQuery(const FuzzyQuery& clone); +public: + static float_t defaultMinSimilarity; + static int32_t defaultPrefixLength; + + /** * Create a new FuzzyQuery that will match terms with a similarity * of at least <code>minimumSimilarity</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix @@ -44,115 +46,192 @@ * @throws IllegalArgumentException if minimumSimilarity is > 1 or < 0 * or if prefixLength < 0 or > <code>term.text().length()</code>. */ - FuzzyQuery(CL_NS(index)::Term* term, float_t minimumSimilarity=-1, size_t prefixLength=0); - //Destructor - ~FuzzyQuery(); + FuzzyQuery(CL_NS(index)::Term* term, float_t minimumSimilarity=-1, size_t prefixLength=0); + virtual ~FuzzyQuery(); - TCHAR* toString(const TCHAR* field) const; + /** + * Returns the minimum similarity that is required for this query to match. + * @return float value between 0.0 and 1.0 + */ + float_t getMinSimilarity() const; - //Returns the name "FuzzyQuery" - static const char* getClassName(); - const char* getObjectName() const; + /** + * Returns the prefix length, i.e. the number of characters at the start + * of a term that must be identical (not fuzzy) to the query term if the query + * is to match that term. + */ + size_t getPrefixLength() const; - Query* clone() const; - bool equals(Query * other) const; - size_t hashCode() const; + Query* rewrite(CL_NS(index)::IndexReader* reader); - /** - * Returns the minimum similarity that is required for this query to match. - * @return float value between 0.0 and 1.0 - */ - float_t getMinSimilarity() const; + TCHAR* toString(const TCHAR* field) const; - /** - * Returns the prefix length, i.e. the number of characters at the start - * of a term that must be identical (not fuzzy) to the query term if the query - * is to match that term. - */ - size_t getPrefixLength() const; + //Returns the name "FuzzyQuery" + static const char* getClassName(); + const char* getObjectName() const; - //Query* FuzzyQuery::rewrite(IndexReader* reader) + Query* clone() const; + bool equals(Query * other) const; + size_t hashCode() const; - protected: - FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader); - }; +protected: + FilteredTermEnum* getEnum(CL_NS(index)::IndexReader* reader); +}; /** Subclass of FilteredTermEnum for enumerating all terms that are similiar - * to the specified filter term. - * - * <p>Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - */ +* to the specified filter term. +* +* <p>Term enumerations are always ordered by Term.compareTo(). Each term in +* the enumeration is greater than all that precede it. +*/ class CLUCENE_EXPORT FuzzyTermEnum: public FilteredTermEnum { - private: - float_t distance; - bool _endEnum; +private: + /* Allows us save time required to create a new array + * everytime similarity is called. + */ + int32_t* d; + size_t dWidth; + size_t dHeight; - CL_NS(index)::Term* searchTerm; - TCHAR* text; - size_t textLen; - TCHAR* prefix; - size_t prefixLength; - float_t minimumSimilarity; - double scale_factor; + //float_t distance; + float_t _similarity; + bool _endEnum; - - /** - * This static array saves us from the time required to create a new array - * everytime editDistance is called. - */ - int32_t* e; - int32_t eWidth; - int32_t eHeight; + CL_NS(index)::Term* searchTerm; + //String field; + TCHAR* text; + size_t textLen; + TCHAR* prefix; + size_t prefixLength; - /****************************** - * Compute Levenshtein distance - ******************************/ - - /** - Levenshtein distance also known as edit distance is a measure of similiarity - between two strings where the distance is measured as the number of character - deletions, insertions or substitutions required to transform one string to - the other string. - <p>This method takes in four parameters; two strings and their respective - lengths to compute the Levenshtein distance between the two strings. - The result is returned as an integer. - */ - int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m) ; + float_t minimumSimilarity; + double scale_factor; + int32_t maxDistances[LUCENE_TYPICAL_LONGEST_WORD_IN_INDEX]; - protected: - /** - * The termCompare method in FuzzyTermEnum uses Levenshtein distance to - * calculate the distance between the given term and the comparing term. - */ - bool termCompare(CL_NS(index)::Term* term) ; - - ///Returns the fact if the current term in the enumeration has reached the end - bool endEnum(); - public: - - /** - * Empty prefix and minSimilarity of 0.5f are used. - * - * @param reader - * @param term - * @throws IOException - * @see #FuzzyTermEnum(IndexReader, Term, float_t, int32_t) - */ - FuzzyTermEnum(CL_NS(index)::IndexReader* reader, CL_NS(index)::Term* term, float_t minSimilarity=FuzzyQuery::defaultMinSimilarity, size_t prefixLength=0); - /** Destructor */ - ~FuzzyTermEnum(); - /** Close the enumeration */ - void close(); - - /** Returns the difference between the distance and the fuzzy threshold - * multiplied by the scale factor - */ - float_t difference(); - - const char* getObjectName() const; - static const char* getClassName(); - }; + + /* LEGACY: + int32_t* e; + int32_t eWidth; + int32_t eHeight; + ** + Levenshtein distance also known as edit distance is a measure of similiarity + between two strings where the distance is measured as the number of character + deletions, insertions or substitutions required to transform one string to + the other string. + <p>This method takes in four parameters; two strings and their respective + lengths to compute the Levenshtein distance between the two strings. + The result is returned as an integer. + * + int32_t editDistance(const TCHAR* s, const TCHAR* t, const int32_t n, const int32_t m); + */ + + /****************************** + * Compute Levenshtein distance + ******************************/ + + /** + * <p>Similarity returns a number that is 1.0f or less (including negative numbers) + * based on how similar the Term is compared to a target term. It returns + * exactly 0.0f when + * <pre> + * editDistance < maximumEditDistance</pre> + * Otherwise it returns: + * <pre> + * 1 - (editDistance / length)</pre> + * where length is the length of the shortest term (text or target) including a + * prefix that are identical and editDistance is the Levenshtein distance for + * the two words.</p> + * + * <p>Embedded within this algorithm is a fail-fast Levenshtein distance + * algorithm. The fail-fast algorithm differs from the standard Levenshtein + * distance algorithm in that it is aborted if it is discovered that the + * mimimum distance between the words is greater than some threshold. + * + * <p>To calculate the maximum distance threshold we use the following formula: + * <pre> + * (1 - minimumSimilarity) * length</pre> + * where length is the shortest term including any prefix that is not part of the + * similarity comparision. This formula was derived by solving for what maximum value + * of distance returns false for the following statements: + * <pre> + * similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); + * return (similarity > minimumSimilarity);</pre> + * where distance is the Levenshtein distance for the two words. + * </p> + * <p>Levenshtein distance (also known as edit distance) is a measure of similiarity + * between two strings where the distance is measured as the number of character + * deletions, insertions or substitutions required to transform one string to + * the other string. + * @param target the target word or phrase + * @return the similarity, 0.0 or less indicates that it matches less than the required + * threshold and 1.0 indicates that the text and target are identical + */ + float_t similarity(const TCHAR* target, const size_t targetLen); + + /** + * Grow the second dimension of the array, so that we can calculate the + * Levenshtein difference. + */ + /* + void growDistanceArray(int32_t m) { + for (int i = 0; i < d.length; i++) { + d[i] = new int[m+1]; + } + }*/ + + /** + * The max Distance is the maximum Levenshtein distance for the text + * compared to some other value that results in score that is + * better than the minimum similarity. + * @param m the length of the "other value" + * @return the maximum levenshtein distance that we care about + */ + int32_t getMaxDistance(const size_t m); + + void initializeMaxDistances(); + + int32_t calculateMaxDistance(const size_t m) const; + +protected: + /** + * The termCompare method in FuzzyTermEnum uses Levenshtein distance to + * calculate the distance between the given term and the comparing term. + */ + bool termCompare(CL_NS(index)::Term* term) ; + + /** Returns the fact if the current term in the enumeration has reached the end */ + bool endEnum(); +public: + + /** + * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of + * length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity > + * <code>minSimilarity</code>. + * <p> + * After calling the constructor the enumeration is already pointing to the first + * valid term if such a term exists. + * + * @param reader Delivers terms. + * @param term Pattern term. + * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. + * @param prefixLength Length of required common prefix. Default value is 0. + * @throws IOException + */ + FuzzyTermEnum(CL_NS(index)::IndexReader* reader, CL_NS(index)::Term* term, float_t minSimilarity=FuzzyQuery::defaultMinSimilarity, size_t prefixLength=0); + virtual ~FuzzyTermEnum(); + + /** Close the enumeration */ + void close(); + + /** Returns the difference between the distance and the fuzzy threshold + * multiplied by the scale factor + */ + float_t difference(); + + const char* getObjectName() const; + static const char* getClassName(); +}; + CL_NS_END #endif Modified: branches/lucene2_3_2/src/core/CLucene/search/_PhraseQueue.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/search/_PhraseQueue.h 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/search/_PhraseQueue.h 2009-07-08 09:54:46 UTC (rev 3013) @@ -18,7 +18,7 @@ PhraseQueue(const int32_t size) { initialize(size,false); } - ~PhraseQueue(){ + virtual ~PhraseQueue(){ } protected: Modified: branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h 2009-07-08 09:54:46 UTC (rev 3013) @@ -54,11 +54,7 @@ } protected: - PriorityQueue(){ - this->_size = 0; - this->dk = false; - this->heap = NULL; - this->maxSize = 0; + PriorityQueue():_size(0),dk(false),maxSize(0),heap(NULL){ } // Determines the ordering of objects in this priority queue. Subclasses Modified: branches/lucene2_3_2/src/core/files_list.txt =================================================================== --- branches/lucene2_3_2/src/core/files_list.txt 2009-07-08 09:53:49 UTC (rev 3012) +++ branches/lucene2_3_2/src/core/files_list.txt 2009-07-08 09:54:46 UTC (rev 3013) @@ -39,7 +39,7 @@ DONE ISH queryParser\MultiFieldQueryParser.java - Some tests are missing DONE ISH queryParser\ParseException.java - Done, integrated within QueryParser as functions (no special Exception class required) -DONE ISH queryParser\QueryParser.java - Missing Locale and Calendar support (for RangeQuery), ConstantScoreRangeQuery, MultiPhraseQuery, and some tests. _tcstod. +DONE ISH queryParser\QueryParser.java - Missing Locale and Calendar support (for RangeQuery), ConstantScoreRangeQuery, and some tests. _tcstod. IRRELEVANT queryParser\QueryParser.jj DONE ISH queryParser\QueryParserConstants.java DONE ISH queryParser\QueryParserTokenManager.java - PrintStream implementation is missing (if at all necessary) @@ -64,12 +64,12 @@ ? search\FieldDoc.java ? search\FieldDocSortedHitQueue.java ? search\FieldSortedHitQueue.java -DONE ISH search\Filter.java - Remove virtual toString once CachingWrapperFilter and ChainedFilter (does not exist in JL?) conform to JL +DONE ISH search\Filter.java - Remove virtual toString once CachingWrapperFilter and ChainedFilter (does not exist in JL?) conform to JL ? search\FilteredQuery.java -? search\FilteredTermEnum.java +DONE ISH search\FilteredTermEnum.java - Can we mend term(void) and term(bool) ? ? search\FilterManager.java -? search\FuzzyQuery.java -? search\FuzzyTermEnum.java +DONE ISH search\FuzzyQuery.java - See TODOs. +DONE ISH search\FuzzyTermEnum.java - See TODOs. Also, Old similarity code is commented out and marked as "legacy". It looks like some optimizations were made there, but since Fuzzy queries weren't working as they should we had to revert to Java's implementation. Perhaps after tests are complete we could try and get the optimized version to work again. ? search\Hit.java ? search\HitCollector.java ? search\HitIterator.java @@ -107,6 +107,7 @@ get rid of Misc.h, repl_* check up on sub-folders bug (analysis/standard) MapViewOfFile issues (cmake not picking up functions in kernel32) +Use safe CRT where possible. For example, make _tcsdup / stringDuplicate require n and call the _s version if cmake realizes it exists Misc TODOs: Update jstreams from latest code of Strigi This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:53:53
|
Revision: 3012 http://clucene.svn.sourceforge.net/clucene/?rev=3012&view=rev Author: ustramooner Date: 2009-07-08 09:53:49 +0000 (Wed, 08 Jul 2009) Log Message: ----------- working... Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/test/index/TestIndexReader.cpp Modified: branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -207,7 +207,14 @@ return this; } FindSegmentsFile_Reopen runner(closeDirectory, deletionPolicy, _directory, this); - return runner.run(); + IndexReader* ret = runner.run(); + + //disown this memory... + this->writeLock = NULL; + this->_directory = NULL; + this->deletionPolicy = NULL; + + return ret; } void DirectoryIndexReader::setDeletionPolicy(IndexDeletionPolicy* deletionPolicy) { Modified: branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -400,6 +400,7 @@ }catch (CLuceneError& err){ _CLDELETE(data); _CLDELETE(b); + throw err; } _CLDELETE(b); fieldsData = data; Modified: branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h 2009-07-08 09:53:49 UTC (rev 3012) @@ -183,24 +183,21 @@ * <p> * If the index has not changed since this instance was (re)opened, then this * call is a NOOP and returns this instance. Otherwise, a new instance is - * returned. The old instance is <b>not</b> closed and remains usable.<br> - * <b>Note:</b> The re-opened reader instance and the old instance might share - * the same resources. For this reason no index modification operations - * (e. g. {@link #deleteDocument(int)}, {@link #setNorm(int, String, byte)}) - * should be performed using one of the readers until the old reader instance - * is closed. <b>Otherwise, the behavior of the readers is undefined.</b> + * returned. The old instance <B>is</b> closed (unlink JLucene) and must + * be deleted<br> * <p> * You can determine whether a reader was actually reopened by comparing the * old instance with the instance returned by this method: * <pre> - * IndexReader reader = ... + * IndexReader* reader = ... * ... - * IndexReader new = r.reopen(); - * if (new != reader) { + * IndexReader* newreader = r->reopen(); + * if (newreader != reader) { * ... // reader was reopened - * reader.close(); + * reader->close(); + * _CLDELETE(reader); * } - * reader = new; + * reader = newreader; * ... * </pre> * Modified: branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -44,6 +44,7 @@ } MultiSegmentReader::MultiSegmentReader(CL_NS(store)::Directory* directory, SegmentInfos* sis, bool closeDirectory): + normsCache(NormsCacheType(true,true)), DirectoryIndexReader(directory,sis,closeDirectory) { // To reduce the chance of hitting FileNotFound @@ -81,6 +82,7 @@ CL_NS(util)::ArrayBase<IndexReader*>* oldReaders, int32_t* oldStarts, NormsCacheType* oldNormsCache): + normsCache(NormsCacheType(true,true)), DirectoryIndexReader(directory, infos, closeDirectory) { // we put the old SegmentReaders in a map, that allows us Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -767,6 +767,7 @@ //Get an IndexInput to the norm file for this field in this segment if ( normBuffer.length < maxDoc ){ normBuffer.resize(maxDoc); + memset(normBuffer.values,0,sizeof(uint8_t) * maxDoc); } if (!reader->hasDeletions()) { Modified: branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -131,7 +131,6 @@ this->fieldsReader = NULL; this->cfsReader = NULL; this->storeCFSReader = NULL; - this->referencedSegmentReader = NULL; this->segment = si->name; this->si = si; @@ -345,13 +344,6 @@ // tis != NULL //Post - All streams to files have been closed - bool hasReferencedReader = (referencedSegmentReader != NULL); - - if (hasReferencedReader) { - referencedSegmentReader->decRefReaderNotNorms(); - _CLDELETE(referencedSegmentReader); - } - _CLDELETE(deletedDocs); // close the single norms stream @@ -369,42 +361,39 @@ _CLDELETE(fieldsReader); } - if (!hasReferencedReader) { - // close everything, nothing is shared anymore with other readers - if (tis != NULL) { - tis->close(); - _CLDELETE(tis); - } + if (tis != NULL) { + tis->close(); + _CLDELETE(tis); + } - //Close the frequency stream - if (freqStream != NULL){ - freqStream->close(); - _CLDELETE(freqStream); - } - //Close the prox stream - if (proxStream != NULL){ - proxStream->close(); - _CLDELETE(proxStream); - } + //Close the frequency stream + if (freqStream != NULL){ + freqStream->close(); + _CLDELETE(freqStream); + } + //Close the prox stream + if (proxStream != NULL){ + proxStream->close(); + _CLDELETE(proxStream); + } - if (termVectorsReaderOrig != NULL){ - termVectorsReaderOrig->close(); - _CLDELETE(termVectorsReaderOrig); - } + if (termVectorsReaderOrig != NULL){ + termVectorsReaderOrig->close(); + _CLDELETE(termVectorsReaderOrig); + } - if (cfsReader != NULL){ - cfsReader->close(); - _CLDECDELETE(cfsReader); - } + if (cfsReader != NULL){ + cfsReader->close(); + _CLDECDELETE(cfsReader); + } - if (storeCFSReader != NULL){ - storeCFSReader->close(); - _CLDELETE(storeCFSReader); - } - - // maybe close directory - DirectoryIndexReader::doClose(); + if (storeCFSReader != NULL){ + storeCFSReader->close(); + _CLDELETE(storeCFSReader); } + + // maybe close directory + DirectoryIndexReader::doClose(); } bool SegmentReader::hasDeletions() const{ @@ -771,9 +760,9 @@ newReader = SegmentReader::get(infos, infos->info(0), false); } } else { - ArrayBase<IndexReader*>* readers = _CLNEW ObjectArray<IndexReader>(1); - readers->values[0] = this; - return _CLNEW MultiSegmentReader(_directory, infos, closeDirectory, readers, NULL, NULL); + ValueArray<IndexReader*> readers(1); + readers.values[0] = this; + return _CLNEW MultiSegmentReader(_directory, infos, closeDirectory, &readers, NULL, NULL); } return newReader; @@ -1005,7 +994,6 @@ clone = _CLNEW SegmentReader(); clone->init(_directory, NULL, false); clone->initialize(si, readBufferSize, false, true); - clone->_directory = _directory; clone->cfsReader = cfsReader; clone->storeCFSReader = storeCFSReader; clone->_fieldInfos = _fieldInfos; @@ -1092,26 +1080,30 @@ success = true; } _CLFINALLY ( - if (this->referencedSegmentReader != NULL) { - // this reader shares resources with another SegmentReader, - // so we increment the other readers refCount. We don't - // increment the refCount of the norms because we did - // that already for the shared norms - clone->referencedSegmentReader = this->referencedSegmentReader; - referencedSegmentReader->incRefReaderNotNorms(); - } else { - // this reader wasn't reopened, so we increment this - // readers refCount - clone->referencedSegmentReader = this; - incRefReaderNotNorms(); - } - if (!success) { // An exception occured during reopen, we have to decRef the norms // that we incRef'ed already and close singleNormsStream and FieldsReader clone->decRef(); } ) + + //disown this memory + this->freqStream = NULL; + this->_fieldInfos = NULL; + this->fieldsReader = NULL; + this->tis = NULL; + this->deletedDocs = NULL; + this->ones = NULL; + this->termVectorsReaderOrig = NULL; + this->cfsReader = NULL; + this->singleNormStream = NULL; + this->fieldsReader = NULL; + this->tis = NULL; + this->freqStream = NULL; + this->proxStream = NULL; + this->termVectorsReaderOrig = NULL; + this->cfsReader = NULL; + this->storeCFSReader = NULL; return clone; } Modified: branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -62,8 +62,6 @@ maxSkipLevels = 10; lastTermTextLength = 0; lastFieldNumber = -1; - termTextBuffer = NULL; - termTextBufferLen = 0; lastTi = _CLNEW TermInfo(); @@ -98,16 +96,12 @@ void TermInfosWriter::add(Term* term, TermInfo* ti){ const size_t length = term->textLength(); - if ( termTextBuffer == NULL ){ - termTextBufferLen = (int32_t)(length*1.25); - termTextBuffer = (TCHAR*)malloc(sizeof(TCHAR) * termTextBufferLen); - }else if (termTextBufferLen < length){ - termTextBufferLen = (int32_t)(length*1.25); - termTextBuffer = (TCHAR*)realloc(termTextBuffer, sizeof(TCHAR) * termTextBufferLen); + if ( termTextBuffer.values == NULL || termTextBuffer.length < length ){ + termTextBuffer.resize( (int32_t)(length*1.25) ); } - _tcsncpy(termTextBuffer, term->text(), length); + _tcsncpy(termTextBuffer.values, term->text(), length); - add(fieldInfos->fieldNumber(term->field()), termTextBuffer, length, ti); + add(fieldInfos->fieldNumber(term->field()), termTextBuffer.values, length, ti); } // Currently used only by assert statement Modified: branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h 2009-07-08 09:53:49 UTC (rev 3012) @@ -61,7 +61,7 @@ MultiSegmentReader(CL_NS(store)::Directory* directory, SegmentInfos* sis, bool closeDirectory); /** This contructor is only used for {@link #reopen()} */ - MultiSegmentReader( + CLUCENE_LOCAL_DECL MultiSegmentReader( CL_NS(store)::Directory* directory, SegmentInfos* sis, bool closeDirectory, Modified: branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h 2009-07-08 09:53:49 UTC (rev 3012) @@ -238,10 +238,6 @@ CompoundFileReader* cfsReader; CompoundFileReader* storeCFSReader; - // indicates the SegmentReader with which the resources are being shared, - // in case this is a re-opened reader - SegmentReader* referencedSegmentReader; - ///Reads the Field Info file FieldsReader* fieldsReader; TermVectorsReader* termVectorsReaderOrig; Modified: branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h 2009-07-08 09:53:49 UTC (rev 3012) @@ -33,8 +33,7 @@ int32_t lastTermTextLength; int32_t lastFieldNumber; - TCHAR* termTextBuffer; - size_t termTextBufferLen; //current length of termTextBuffer buffer + CL_NS(util)::ValueArray<TCHAR> termTextBuffer; TermInfosWriter* other; Modified: branches/lucene2_3_2/src/core/CLucene/util/Array.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/Array.h 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/core/CLucene/util/Array.h 2009-07-08 09:53:49 UTC (rev 3012) @@ -190,7 +190,9 @@ this->deleteArray(); } void deleteValue(T v){} //nothing to do... - virtual ~ValueArray(){/*deleteValues();*/} + virtual ~ValueArray(){ + deleteValues(); + } }; /** A value array for const values (never deleted) */ Modified: branches/lucene2_3_2/src/test/index/TestIndexReader.cpp =================================================================== --- branches/lucene2_3_2/src/test/index/TestIndexReader.cpp 2009-07-08 09:52:28 UTC (rev 3011) +++ branches/lucene2_3_2/src/test/index/TestIndexReader.cpp 2009-07-08 09:53:49 UTC (rev 3012) @@ -206,33 +206,41 @@ index1 = couple.newReader; IndexReader* index2_refreshed = couple.refreshedReader; index2->close(); + assertReaderClosed(tc, index2, true, true); + if ( index2_refreshed != index2 ){ + _CLDELETE(index2); + } // test if refreshed reader and newly opened reader return equal results TestAssertIndexReaderEquals(tc, index1, index2_refreshed); index1->close(); + _CLDELETE(index1); index2_refreshed->close(); - //TODO: it's closed, so invalid! assertReaderClosed(tc, index2, true, true); assertReaderClosed(tc, index2_refreshed, true, true); + _CLDELETE(index2_refreshed); index2 = index2B; for (int i = 1; i < 4; i++) { - - index1->close(); couple = refreshReader(tc, index2, test, i, true); // refresh IndexReader index2->close(); + if ( couple.refreshedReader != index2 ){ + _CLDELETE(index2); + } index2 = couple.refreshedReader; index1 = couple.newReader; TestAssertIndexReaderEquals(tc, index1, index2); + index1->close(); + assertReaderClosed(tc, index1, true, true); + _CLDELETE(index1); } - index1->close(); index2->close(); - assertReaderClosed(tc, index1, true, true); assertReaderClosed(tc, index2, true, true); + _CLDELETE(index2); } @@ -290,7 +298,7 @@ { CuSuite *suite = CuSuiteNew(_T("CLucene IndexReader Test")); SUITE_ADD_TEST(suite, testIndexReaderReopen); - SUITE_ADD_TEST(suite, testMultiReaderReopen); + //SUITE_ADD_TEST(suite, testMultiReaderReopen); return suite; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:52:30
|
Revision: 3011 http://clucene.svn.sourceforge.net/clucene/?rev=3011&view=rev Author: ustramooner Date: 2009-07-08 09:52:28 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Switching from deprecated function (although JL haven't done so yet) Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:51:40 UTC (rev 3010) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:52:28 UTC (rev 3011) @@ -370,7 +370,7 @@ multiTerms.clear(); } position += t->getPositionIncrement(); - multiTerms.push_back(_CLNEW Term(field, t->termText())); + multiTerms.push_back(_CLNEW Term(field, t->termBuffer())); } if (enablePositionIncrements) { Term** termsArray = _CL_NEWARRAY(Term*,multiTerms.size()+1); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-07-08 09:51:42
|
Revision: 3010 http://clucene.svn.sourceforge.net/clucene/?rev=3010&view=rev Author: ustramooner Date: 2009-07-08 09:51:40 +0000 (Wed, 08 Jul 2009) Log Message: ----------- unnecessary const Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2009-06-11 17:34:23 UTC (rev 3009) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2009-07-08 09:51:40 UTC (rev 3010) @@ -28,7 +28,7 @@ MultiFieldQueryParser::~MultiFieldQueryParser(){ } -Query* MultiFieldQueryParser::getFieldQuery(const TCHAR* field, const TCHAR* queryText, const int32_t slop){ +Query* MultiFieldQueryParser::getFieldQuery(const TCHAR* field, TCHAR* queryText, const int32_t slop){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-06-11 17:34:23 UTC (rev 3009) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-07-08 09:51:40 UTC (rev 3010) @@ -63,8 +63,8 @@ protected: - CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const TCHAR* queryText, const int32_t slop); - CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const TCHAR* queryText) { return getFieldQuery(field,queryText,0); } + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, TCHAR* queryText, const int32_t slop); + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, TCHAR* queryText) { return getFieldQuery(field,queryText,0); } CL_NS(search)::Query* getFuzzyQuery(const TCHAR* field, TCHAR* termStr, const float_t minSimilarity); CL_NS(search)::Query* getPrefixQuery(const TCHAR* field, TCHAR* termStr); CL_NS(search)::Query* getWildcardQuery(const TCHAR* field, TCHAR* termStr); Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-06-11 17:34:23 UTC (rev 3009) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 09:51:40 UTC (rev 3010) @@ -295,10 +295,9 @@ } } -Query* QueryParser::getFieldQuery(const TCHAR* _field, const TCHAR* _queryText) { +Query* QueryParser::getFieldQuery(const TCHAR* _field, TCHAR* queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TCHAR* queryText = (TCHAR*)_queryText; //TODO: this is horrible, getFieldQuery shouldn't consume queryText!!!! StringReader reader(queryText); TokenStream* source = analyzer->tokenStream(_field, &reader); @@ -408,7 +407,7 @@ } } -Query* QueryParser::getFieldQuery(const TCHAR* _field, const TCHAR* queryText, const int32_t slop) { +Query* QueryParser::getFieldQuery(const TCHAR* _field, TCHAR* queryText, const int32_t slop) { Query* query = getFieldQuery(_field, queryText); if (query) { Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-06-11 17:34:23 UTC (rev 3009) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 09:51:40 UTC (rev 3010) @@ -320,7 +320,7 @@ /** * @exception ParseException throw in overridden method to disallow */ - virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText); + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, TCHAR* queryText); /** * Base implementation delegates to {@link #getFieldQuery(String,String)}. @@ -329,7 +329,7 @@ * * @exception ParseException throw in overridden method to disallow */ - virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText, const int32_t slop); + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, TCHAR* queryText, const int32_t slop); /** * @exception ParseException throw in overridden method to disallow This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-06-11 17:34:30
|
Revision: 3009 http://clucene.svn.sourceforge.net/clucene/?rev=3009&view=rev Author: ustramooner Date: 2009-06-11 17:34:23 +0000 (Thu, 11 Jun 2009) Log Message: ----------- Merge branch 'master' into svn Modified Paths: -------------- branches/lucene2_3_2/CMakeLists.txt branches/lucene2_3_2/COPYING branches/lucene2_3_2/INSTALL branches/lucene2_3_2/cmake/CreateClucenePackages.cmake branches/lucene2_3_2/configure branches/lucene2_3_2/dist-test.sh branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballFilter.h branches/lucene2_3_2/src/core/CLucene/CLConfig.h branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp branches/lucene2_3_2/src/core/CLucene/StdHeader.h branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.h branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.cpp branches/lucene2_3_2/src/core/CLucene/analysis/Analyzers.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardAnalyzer.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardFilter.h branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.cpp branches/lucene2_3_2/src/core/CLucene/analysis/standard/StandardTokenizer.h branches/lucene2_3_2/src/core/CLucene/debug/error.cpp branches/lucene2_3_2/src/core/CLucene/debug/error.h branches/lucene2_3_2/src/core/CLucene/debug/lucenebase.h branches/lucene2_3_2/src/core/CLucene/debug/mem.h branches/lucene2_3_2/src/core/CLucene/document/DateTools.cpp branches/lucene2_3_2/src/core/CLucene/document/DateTools.h branches/lucene2_3_2/src/core/CLucene/document/Document.cpp branches/lucene2_3_2/src/core/CLucene/document/Document.h branches/lucene2_3_2/src/core/CLucene/document/Field.cpp branches/lucene2_3_2/src/core/CLucene/document/Field.h branches/lucene2_3_2/src/core/CLucene/document/FieldSelector.cpp branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp branches/lucene2_3_2/src/core/CLucene/index/CompoundFile.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsReader.cpp branches/lucene2_3_2/src/core/CLucene/index/FieldsWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexFileNames.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexModifier.h branches/lucene2_3_2/src/core/CLucene/index/IndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexReader.h branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexWriter.h branches/lucene2_3_2/src/core/CLucene/index/MultiReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiReader.h branches/lucene2_3_2/src/core/CLucene/index/Payload.cpp branches/lucene2_3_2/src/core/CLucene/index/Payload.h branches/lucene2_3_2/src/core/CLucene/index/SegmentInfos.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentMerger.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermDocs.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/SegmentTermVector.cpp branches/lucene2_3_2/src/core/CLucene/index/Term.cpp branches/lucene2_3_2/src/core/CLucene/index/Term.h branches/lucene2_3_2/src/core/CLucene/index/TermInfosReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermInfosWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVector.h branches/lucene2_3_2/src/core/CLucene/index/TermVectorReader.cpp branches/lucene2_3_2/src/core/CLucene/index/TermVectorWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/Terms.h branches/lucene2_3_2/src/core/CLucene/index/_CompoundFile.h branches/lucene2_3_2/src/core/CLucene/index/_FieldInfo.h branches/lucene2_3_2/src/core/CLucene/index/_FieldInfos.h branches/lucene2_3_2/src/core/CLucene/index/_FieldsReader.h branches/lucene2_3_2/src/core/CLucene/index/_FieldsWriter.h branches/lucene2_3_2/src/core/CLucene/index/_IndexFileNames.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentHeader.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentInfos.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentMergeInfo.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentMergeQueue.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentMerger.h branches/lucene2_3_2/src/core/CLucene/index/_SegmentTermEnum.h branches/lucene2_3_2/src/core/CLucene/index/_Term.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosReader.h branches/lucene2_3_2/src/core/CLucene/index/_TermInfosWriter.h branches/lucene2_3_2/src/core/CLucene/index/_TermVector.h branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.h branches/lucene2_3_2/src/core/CLucene/queryParser/_FastCharStream.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/legacy/QueryParserBase.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanClause.h branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanQuery.h branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer2.cpp branches/lucene2_3_2/src/core/CLucene/search/Compare.h branches/lucene2_3_2/src/core/CLucene/search/ConjunctionScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/DisjunctionSumScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/ExactPhraseScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/Explanation.cpp branches/lucene2_3_2/src/core/CLucene/search/Explanation.h branches/lucene2_3_2/src/core/CLucene/search/FieldCacheImpl.cpp branches/lucene2_3_2/src/core/CLucene/search/Filter.h branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/FuzzyQuery.h branches/lucene2_3_2/src/core/CLucene/search/HitQueue.cpp branches/lucene2_3_2/src/core/CLucene/search/Hits.h branches/lucene2_3_2/src/core/CLucene/search/IndexSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/IndexSearcher.h branches/lucene2_3_2/src/core/CLucene/search/MultiSearcher.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PhraseQuery.h branches/lucene2_3_2/src/core/CLucene/search/PhraseScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/PrefixQuery.h branches/lucene2_3_2/src/core/CLucene/search/Query.h branches/lucene2_3_2/src/core/CLucene/search/RangeFilter.h branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/RangeQuery.h branches/lucene2_3_2/src/core/CLucene/search/Scorer.cpp branches/lucene2_3_2/src/core/CLucene/search/Scorer.h branches/lucene2_3_2/src/core/CLucene/search/ScorerDocQueue.cpp branches/lucene2_3_2/src/core/CLucene/search/ScorerDocQueue.h branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.cpp branches/lucene2_3_2/src/core/CLucene/search/SearchHeader.h branches/lucene2_3_2/src/core/CLucene/search/Searchable.h branches/lucene2_3_2/src/core/CLucene/search/Similarity.h branches/lucene2_3_2/src/core/CLucene/search/SloppyPhraseScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/Sort.cpp branches/lucene2_3_2/src/core/CLucene/search/Sort.h branches/lucene2_3_2/src/core/CLucene/search/TermQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/TermQuery.h branches/lucene2_3_2/src/core/CLucene/search/TermScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardQuery.h branches/lucene2_3_2/src/core/CLucene/search/WildcardTermEnum.cpp branches/lucene2_3_2/src/core/CLucene/search/WildcardTermEnum.h branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer.h branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer2.h branches/lucene2_3_2/src/core/CLucene/search/_ConjunctionScorer.h branches/lucene2_3_2/src/core/CLucene/search/_DisjunctionSumScorer.h branches/lucene2_3_2/src/core/CLucene/search/_ExactPhraseScorer.h branches/lucene2_3_2/src/core/CLucene/search/_FieldCacheImpl.h branches/lucene2_3_2/src/core/CLucene/search/_PhraseQueue.h branches/lucene2_3_2/src/core/CLucene/search/_PhraseScorer.h branches/lucene2_3_2/src/core/CLucene/search/_SloppyPhraseScorer.h branches/lucene2_3_2/src/core/CLucene/search/_TermScorer.h branches/lucene2_3_2/src/core/CLucene/store/Directory.cpp branches/lucene2_3_2/src/core/CLucene/store/Directory.h branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/FSDirectory.h branches/lucene2_3_2/src/core/CLucene/store/IndexInput.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexInput.h branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.cpp branches/lucene2_3_2/src/core/CLucene/store/IndexOutput.h branches/lucene2_3_2/src/core/CLucene/store/Lock.cpp branches/lucene2_3_2/src/core/CLucene/store/Lock.h branches/lucene2_3_2/src/core/CLucene/store/LockFactory.cpp branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h branches/lucene2_3_2/src/core/CLucene/store/MMapInput.cpp branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/RAMDirectory.h branches/lucene2_3_2/src/core/CLucene/store/TransactionalRAMDirectory.cpp branches/lucene2_3_2/src/core/CLucene/store/_Lock.h branches/lucene2_3_2/src/core/CLucene/store/_MMap.h branches/lucene2_3_2/src/core/CLucene/store/_RAMDirectory.h branches/lucene2_3_2/src/core/CLucene/store/_TransactionalRAMDirectory.h branches/lucene2_3_2/src/core/CLucene/util/Array.h branches/lucene2_3_2/src/core/CLucene/util/BitSet.h branches/lucene2_3_2/src/core/CLucene/util/CLStreams.h branches/lucene2_3_2/src/core/CLucene/util/Equators.cpp branches/lucene2_3_2/src/core/CLucene/util/Equators.h branches/lucene2_3_2/src/core/CLucene/util/MD5Digester.cpp branches/lucene2_3_2/src/core/CLucene/util/PriorityQueue.h branches/lucene2_3_2/src/core/CLucene/util/Reader.cpp branches/lucene2_3_2/src/core/CLucene/util/StringIntern.cpp branches/lucene2_3_2/src/core/CLucene/util/ThreadLocal.cpp branches/lucene2_3_2/src/core/CLucene/util/VoidList.h branches/lucene2_3_2/src/core/CLucene/util/VoidMap.h branches/lucene2_3_2/src/core/CLucene/util/_Arrays.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/demo/CMakeLists.txt branches/lucene2_3_2/src/demo/IndexFiles.cpp branches/lucene2_3_2/src/demo/Main.cpp branches/lucene2_3_2/src/demo/SearchFiles.cpp branches/lucene2_3_2/src/demo/Statistics.cpp branches/lucene2_3_2/src/shared/CLucene/LuceneThreads.h branches/lucene2_3_2/src/shared/CLucene/SharedHeader.cpp branches/lucene2_3_2/src/shared/CLucene/SharedHeader.h branches/lucene2_3_2/src/shared/CLucene/_clucene-config.h.cmake branches/lucene2_3_2/src/shared/CLucene/clucene-config.h.cmake branches/lucene2_3_2/src/shared/CLucene/config/_threads.h branches/lucene2_3_2/src/shared/CLucene/config/repl_lltot.cpp branches/lucene2_3_2/src/shared/CLucene/config/repl_tchar.h branches/lucene2_3_2/src/shared/CLucene/config/repl_wchar.h branches/lucene2_3_2/src/shared/CLucene/config/threads.cpp branches/lucene2_3_2/src/shared/CLucene/config/utf8.cpp branches/lucene2_3_2/src/shared/CLucene/debug/_condition.h branches/lucene2_3_2/src/shared/CLucene/debug/condition.cpp branches/lucene2_3_2/src/shared/CLucene/util/Misc.cpp branches/lucene2_3_2/src/shared/CLucene/util/Misc.h branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h branches/lucene2_3_2/src/shared/CMakeLists.txt branches/lucene2_3_2/src/test/CLMonolithic_Test.cpp branches/lucene2_3_2/src/test/CMakeLists.txt branches/lucene2_3_2/src/test/CuTest.cpp branches/lucene2_3_2/src/test/CuTest.h branches/lucene2_3_2/src/test/analysis/TestAnalysis.cpp branches/lucene2_3_2/src/test/analysis/TestAnalyzers.cpp branches/lucene2_3_2/src/test/data/readme.txt branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.f0 branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.f1 branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.fdt branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.fdx branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.frq branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.prx branches/lucene2_3_2/src/test/data/reuters-21578-index/_z.tis branches/lucene2_3_2/src/test/data/reuters-21578-index/segments branches/lucene2_3_2/src/test/debug/TestError.cpp branches/lucene2_3_2/src/test/document/TestDocument.cpp branches/lucene2_3_2/src/test/index/TestIndexWriter.cpp branches/lucene2_3_2/src/test/index/TestReuters.cpp branches/lucene2_3_2/src/test/index/TestUtf8.cpp branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp branches/lucene2_3_2/src/test/search/TestForDuplicates.cpp branches/lucene2_3_2/src/test/search/TestQueries.cpp branches/lucene2_3_2/src/test/search/TestSearch.cpp branches/lucene2_3_2/src/test/search/TestSort.cpp branches/lucene2_3_2/src/test/search/TestTermVector.cpp branches/lucene2_3_2/src/test/store/TestStore.cpp branches/lucene2_3_2/src/test/test.h branches/lucene2_3_2/src/test/testall.cpp branches/lucene2_3_2/src/test/tests.cpp Added Paths: ----------- branches/lucene2_3_2/README.PACKAGE branches/lucene2_3_2/src/core/CLucene/document/FieldSelector.h branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.cpp branches/lucene2_3_2/src/core/CLucene/index/DirectoryIndexReader.h branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/DocumentsWriterThreadState.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexDeletionPolicy.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexDeletionPolicy.h branches/lucene2_3_2/src/core/CLucene/index/IndexFileDeleter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexFileNameFilter.cpp branches/lucene2_3_2/src/core/CLucene/index/IndexFileNameFilter.h branches/lucene2_3_2/src/core/CLucene/index/MergePolicy.cpp branches/lucene2_3_2/src/core/CLucene/index/MergePolicy.h branches/lucene2_3_2/src/core/CLucene/index/MergeScheduler.cpp branches/lucene2_3_2/src/core/CLucene/index/MergeScheduler.h branches/lucene2_3_2/src/core/CLucene/index/MultiSegmentReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.cpp branches/lucene2_3_2/src/core/CLucene/index/MultipleTermPositions.h branches/lucene2_3_2/src/core/CLucene/index/SkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/SkipListWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/_DocumentsWriter.h branches/lucene2_3_2/src/core/CLucene/index/_IndexFileDeleter.h branches/lucene2_3_2/src/core/CLucene/index/_MultiSegmentReader.h branches/lucene2_3_2/src/core/CLucene/index/_SkipListReader.h branches/lucene2_3_2/src/core/CLucene/index/_SkipListWriter.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryToken.h branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MatchAllDocsQuery.h branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.cpp branches/lucene2_3_2/src/core/CLucene/search/MultiPhraseQuery.h branches/lucene2_3_2/src/core/files_list.txt branches/lucene2_3_2/src/core/libclucene.pc.cmake branches/lucene2_3_2/src/shared/CLucene/util/deflate.cpp branches/lucene2_3_2/src/shared/CLucene/util/zlib/ branches/lucene2_3_2/src/shared/CLucene/util/zlib/adler32.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/compress.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/crc32.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/crc32.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/deflate.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/deflate.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/gzio.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffast.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffast.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inffixed.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inflate.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inflate.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/inftrees.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/inftrees.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/trees.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/trees.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zconf.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zlib.h branches/lucene2_3_2/src/shared/CLucene/util/zlib/zutil.c branches/lucene2_3_2/src/shared/CLucene/util/zlib/zutil.h branches/lucene2_3_2/src/test/index/TestIndexReader.cpp branches/lucene2_3_2/src/test/index/TestThreading.cpp Removed Paths: ------------- branches/lucene2_3_2/src/core/CLucene/document/_FieldSelector.h branches/lucene2_3_2/src/core/CLucene/files_list.txt branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/DefaultSkipListReader.h branches/lucene2_3_2/src/core/CLucene/index/DocumentWriter.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.cpp branches/lucene2_3_2/src/core/CLucene/index/MultiLevelSkipListReader.h branches/lucene2_3_2/src/core/CLucene/index/_DocumentWriter.h branches/lucene2_3_2/src/core/CLucene/index/_MultiReader.h branches/lucene2_3_2/src/core/CLucene/queryParser/CharStream.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/Token.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/Token.h branches/lucene2_3_2/src/core/CLucene/util/VoidMapSetDefinitions.h Property Changed: ---------------- branches/lucene2_3_2/src/core/CLucene/index/IndexFileNames.cpp branches/lucene2_3_2/src/core/CLucene/index/_IndexFileNames.h branches/lucene2_3_2/src/core/CLucene/search/BooleanScorer2.cpp branches/lucene2_3_2/src/core/CLucene/search/DisjunctionSumScorer.cpp branches/lucene2_3_2/src/core/CLucene/search/ScorerDocQueue.cpp branches/lucene2_3_2/src/core/CLucene/search/ScorerDocQueue.h branches/lucene2_3_2/src/core/CLucene/search/_BooleanScorer2.h branches/lucene2_3_2/src/core/CLucene/search/_DisjunctionSumScorer.h branches/lucene2_3_2/src/core/CLucene/store/LockFactory.h Modified: branches/lucene2_3_2/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/CMakeLists.txt 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/CMakeLists.txt 2009-06-11 17:34:23 UTC (rev 3009) @@ -9,6 +9,7 @@ SET(CLUCENE_VERSION_MINOR "9") SET(CLUCENE_VERSION_REVISION "23") SET(CLUCENE_VERSION_PATCH "0") +SET(CLUCENE_INT_VERSION 92300) SET(CLUCENE_VERSION "${CLUCENE_VERSION_MAJOR}.${CLUCENE_VERSION_MINOR}.${CLUCENE_VERSION_REVISION}.${CLUCENE_VERSION_PATCH}") SET(CLUCENE_SOVERSION "${CLUCENE_VERSION_MAJOR}.${CLUCENE_VERSION_MINOR}.${CLUCENE_VERSION_REVISION}") @@ -74,6 +75,9 @@ SET(LUCENE_SYS_INCLUDES "" CACHE PATH "location for non-system independent files. defaults to CMAKE_INSTALL_PREFIX. see INSTALL documentation for further information." ) +#install path options +SET(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32/64)" ) +SET(LIB_DESTINATION "lib${LIB_SUFFIX}") SET ( ENABLE_COMPILE_TESTS_VALUE ON ) @@ -89,11 +93,6 @@ #check flags... INCLUDE (TestCXXAcceptsFlag) IF ( CMAKE_COMPILER_IS_GNUCC ) - CHECK_CXX_ACCEPTS_FLAG(-g GccFlagG) - IF ( GccFlagG ) - SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") - ENDIF ( GccFlagG ) - CHECK_CXX_ACCEPTS_FLAG(-pg GccFlagPg) IF ( GccFlagPg ) OPTION(ENABLE_GPROF @@ -115,18 +114,11 @@ ENDIF(CMAKE_COMPILER_IS_GNUCC) -IF (ENABLE_PACKAGING) - INCLUDE(CreateClucenePackages) - CREATE_CLUCENE_PACKAGES() -ENDIF ( ENABLE_PACKAGING) - - #Single output directory for building all executables and libraries. SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Executable Output Directory" FORCE) SET(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Library Output Directory" FORCE) #add tests -#todo: why does 'make test' not work??? ENABLE_TESTING() ADD_TEST(SimpleTest ${EXECUTABLE_OUTPUT_PATH}/cl_test ) @@ -142,6 +134,7 @@ ADD_SUBDIRECTORY (src/demo EXCLUDE_FROM_ALL) IF ( BUILD_CONTRIBS ) ADD_SUBDIRECTORY (src/contribs EXCLUDE_FROM_ALL) + SET(BUILD_CONTRIBS_LIB 1) ENDIF ( BUILD_CONTRIBS ) IF ( BUILD_CONTRIBS_LIB ) ADD_SUBDIRECTORY (src/contribs-lib EXCLUDE_FROM_ALL) @@ -155,3 +148,8 @@ ADD_CUSTOM_TARGET(uninstall "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake") + +#this must go last... +IF (ENABLE_PACKAGING) + INCLUDE(CreateClucenePackages) +ENDIF ( ENABLE_PACKAGING) Modified: branches/lucene2_3_2/COPYING =================================================================== --- branches/lucene2_3_2/COPYING 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/COPYING 2009-06-11 17:34:23 UTC (rev 3009) @@ -90,9 +90,35 @@ # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. +the src/core/util/Compress.cpp component: +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.3, July 18th, 2005 + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly jl...@gz... + Mark Adler ma...@al... + +*/ + + + CLUCENE CONTRIBUTIONS CODE: PorterStemmer code: couldn't find license. This component is deprecated and will be removed very soon. Modified: branches/lucene2_3_2/INSTALL =================================================================== --- branches/lucene2_3_2/INSTALL 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/INSTALL 2009-06-11 17:34:23 UTC (rev 3009) @@ -155,8 +155,22 @@ # DMALLOC_OPTIONS=medium,log=dmalloc.log.txt # export DMALLOC_OPTIONS +UPDATE: when i upgrade my machine to Ubuntu 9.04, dmalloc stopped working (caused +clucene to crash). + +Performance with callgrind +-------------------------- +Really simple + +valgrind --tool=callgrind <command: e.g. bin/cl_test> +this will create a file like callgrind.out.12345. you can open this with kcachegrind or some +tool like that. + + Performance with gprof ---------------------- +Note: I recommend callgrind, it works much better. + Compile with gprof turned on (ENABLE_GPROF in cmake gui or using ccmake). I've found (at least on windows cygwin) that gprof wasn't working over dll boundaries, running the cl_test-pedantic monolithic build worked better. @@ -165,6 +179,32 @@ compiled application has exited: # gprof bin/cl_test-pedantic.exe gmon.out >gprof.txt +Code coverage with gcov +----------------------- +To create a code coverage report of the test, you can use gcov. Here are the +steps I followed to create a nice html report. You'll need the lcov package +installed to generate html. Also, I recommend using an out-of-source build +directory as there are lots of files that will be generated. + +NOTE: you must have lcov installed for this to work + +* It is normally recommended to compile with no optimisations, so change CMAKE_BUILD_TYPE +to Debug. + +* I have created a cl_test-gcov target which contains the necessary gcc switches +already. So all you need to do is +# make test-gcov + +If everything goes well, there will be a directory called code-coverage containing the report. + +If you want to do this process manually, then: +# lcov --directory ./src/test/CMakeFiles/cl_test-gcov.dir/__/core/CLucene -c -o clucene-coverage.info +# lcov --remove clucene-coverage.info "/usr/*" > clucene-coverage.clean +# genhtml -o clucene-coverage clucene-coverage.clean + +If both those commands pass, then there will be a clucene coverage report in the +clucene-coverage directory. + Benchmarks ---------- Very little benchmarking has been done on clucene. Andi Vajda posted some Added: branches/lucene2_3_2/README.PACKAGE =================================================================== --- branches/lucene2_3_2/README.PACKAGE (rev 0) +++ branches/lucene2_3_2/README.PACKAGE 2009-06-11 17:34:23 UTC (rev 3009) @@ -0,0 +1,11 @@ +CLucene is a C++ port of the popular Apache Lucene search engine +(http://lucene.apache.org/java). It is released under LGPL or the Apache +License. + +CLucene aims to be a high-speed alternative to Java Lucene, its API is very +similar to that of the Java version. CLucene has recently been brought up to +date with Lucene 2.3.2. It contains most of the same functionality as the +Java version. + +This package contains the files necessary for running applications that +use the libclucene library. Modified: branches/lucene2_3_2/cmake/CreateClucenePackages.cmake =================================================================== --- branches/lucene2_3_2/cmake/CreateClucenePackages.cmake 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/cmake/CreateClucenePackages.cmake 2009-06-11 17:34:23 UTC (rev 3009) @@ -1,7 +1,5 @@ #Creates all the relevant packages -MACRO( CREATE_CLUCENE_PACKAGES ) - #Rules for version: #MAJOR and MINOR versions are purely political #REVISION version MUST be revised if the headers or compatibility change @@ -15,15 +13,23 @@ SET(CPACK_PACKAGE_VERSION ${CLUCENE_VERSION}) SET(CPACK_PACKAGE_SOVERSION ${CLUCENE_SOVERSION}) -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CLucene") +SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "library for full-featured text search engine (runtime)") SET(CPACK_PACKAGE_VENDOR "Ben van Klinken") +SET(CPACK_PACKAGE_CONTACT "clu...@li...") +SET(CPACK_PACKAGE_NAME "libclucene1") SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README") SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "CLucene - a C++ search engine, ported from the popular Apache Lucene") SET(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README") SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") +SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") +#so, what are we going to install? +SET(CPACK_INSTALL_CMAKE_PROJECTS + "${CMAKE_BINARY_DIR};clucene-core;ALL;/" + "${CMAKE_BINARY_DIR};clucene-shared;ALL;/") +SET(CPACK_COMPONENTS_ALL development runtime) SET(CPACK_GENERATOR "TGZ") SET(CPACK_PACKAGE_FILE_NAME "clucene-core-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_NAME}") @@ -34,9 +40,22 @@ ENDIF(WIN32 AND NOT UNIX) SET(CPACK_SOURCE_PACKAGE_FILE_NAME "clucene-core-${CPACK_PACKAGE_VERSION}-Source") +#specific packaging requirements: +SET(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21)") + + #don't include the current binary dir. get_filename_component(clucene_BINARY_DIR_name ${clucene_BINARY_DIR} NAME) -SET(CPACK_SOURCE_IGNORE_FILES "/\\\\.svn/;\\\\.swp$;\\\\.#;/#;.*~;.*\\\\.tmp;/${clucene_BINARY_DIR_name}/") +SET(CPACK_SOURCE_IGNORE_FILES + "/\\\\.svn/" + "/\\\\.git/" + "\\\\.swp$" + "\\\\.#;/#" + ".*~" + ".*\\\\.tmp" + ".*\\\\.save" + "/${clucene_BINARY_DIR_name}/" +) IF(WIN32 AND NOT UNIX) # There is a bug in NSI that does not handle full unix paths properly. Make @@ -55,9 +74,7 @@ ENDIF(WIN32 AND NOT UNIX) #SET(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") -INCLUDE(CPack) - ADD_CUSTOM_TARGET(dist-package COMMAND rsync -avP -e ssh ${CPACK_PACKAGE_FILE_NAME}.* ust...@fr...:uploads/ # DEPENDS package @@ -66,4 +83,6 @@ COMMAND rsync -avP -e ssh ${CPACK_SOURCE_PACKAGE_FILE_NAME}.* ust...@fr...:uploads/ # DEPENDS package_source ) -ENDMACRO( CREATE_CLUCENE_PACKAGES ) + +#this must be last +INCLUDE(CPack) Modified: branches/lucene2_3_2/configure =================================================================== --- branches/lucene2_3_2/configure 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/configure 2009-06-11 17:34:23 UTC (rev 3009) @@ -1,2 +1,6 @@ -cmake . +#!/bin/bash +PWD=`dirname $0` +PWD=`cd $PWD && pwd` + +cmake $PWD $@ Modified: branches/lucene2_3_2/dist-test.sh =================================================================== --- branches/lucene2_3_2/dist-test.sh 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/dist-test.sh 2009-06-11 17:34:23 UTC (rev 3009) @@ -27,7 +27,7 @@ t_exports=0 FAIL=0 -if [ $# == 0 ]; then +if [ $# -eq 0 ]; then usage else while [ "$1" != "" ]; do @@ -56,7 +56,7 @@ done fi -if [ $t_all == 1 ]; then +if [ $t_all -eq 1 ]; then t_env=1 t_c_all=1 t_c_h=1 @@ -81,16 +81,14 @@ } -if [ $t_env == 1 ]; then +if [ $t_env -eq 1 ]; then rm -fdr $TMP 2>/dev/null mkdir $TMP #create header file for testing of symbols in headers. echo "#include \"CLucene/StdHeader.h"\" >$TMP/pub-headers.cpp -fi -#iterate all headers -if [ $t_env == 1 ]; then + #iterate all headers for H in `find ../src/shared/CLucene| grep "\.h$"` `find ../src/core/CLucene| grep "\.h$"`; do BH=`basename "$H"` DN=`dirname "$H"` @@ -107,63 +105,60 @@ fi fi done -fi - -if [ $t_env == 1 ]; then + echo "int main(){return 0;}" >>$TMP/pub-headers.cpp fi -#find inline code: -if [ $t_inline == 1 ]; then - if [ $t_env == 1 ]; then - cmake -DENABLE_CLDOCS:BOOLEAN=TRUE . - make doc - if [ $? != 0 ]; then - exit 1 - fi - fi -fi ################################################ #now the environment is finished being setup... ################################################ echo "Starting tests..." -if [ $t_c_h == 1 ] || [ $t_ifdefs == 1 ] || [ $t_exports == 1 ]; then +if [ $t_c_h -eq 1 ] || [ $t_ifdefs -eq 1 ] || [ $t_exports -eq 1 ]; then for H in `find $TMP/src | grep "\.h$"`; do BH=`basename "$H"` DH=`dirname "${H:3}"` - if [ $t_ifdefs == 1 ]; then + if [ $t_ifdefs -eq 1 ]; then checkForIfdefs $H fi #check that all classes are exported - if [ $t_exports == 1 ]; then - XX=`awk '/^[ \t]*(class|struct)/ { print $line }' $H| grep -v ";$"| grep -v CLUCENE_EXPORT| grep -v CLUCENE_INLINE_EXPORT| grep -v CLUCENE_SHARED_EXPORT| grep -v CLUCENE_SHARED_INLINE_EXPORT` - if [ "$XX" != "" ]; then - echo "$H has unexported class: $XX" - echo "" - FAIL=1 - fi + if [ $t_exports -eq 1 ]; then + if [ "${H:0:1}" == "_" ]; then + #internal headers... none must be exported + XX=`awk '/^[ \t]*(class|struct)/ { print $line }' $H| grep -v ";$"| grep -v CLUCENE_EXPORT| grep -v CLUCENE_INLINE_EXPORT| grep -v CLUCENE_SHARED_EXPORT| grep -v CLUCENE_SHARED_INLINE_EXPORT` + if [ "$XX" == "" ]; then + echo "$H has exported class: $XX" + echo "" + FAIL=1 + fi + else + #external headers... all must be exported + XX=`awk '/^[ \t]*(class|struct)/ { print $line }' $H| grep -v ";$"| grep -v CLUCENE_EXPORT| grep -v CLUCENE_INLINE_EXPORT| grep -v CLUCENE_SHARED_EXPORT| grep -v CLUCENE_SHARED_INLINE_EXPORT` + if [ "$XX" != "" ]; then + echo "$H has unexported class: $XX" + echo "" + FAIL=1 + fi + fi fi #test that each header compiles independently... - if [ $t_c_h == 1 ] && [ "${H:7}" != "disttest/src/core/CLucene/util/Reader.h" ]; then + if [ $t_c_h -eq 1 ] && [ "${H:7}" != "disttest/src/core/CLucene/util/Reader.h" ]; then echo "Test that $H compiles seperately..." echo "#include \"CLucene/StdHeader.h"\" >$TMP/pub-header.cpp echo "#include \"$H"\" >>$TMP/pub-header.cpp - echo "int main(){return 0;}" >>$TMP/pub-header.cpp + echo "int main(){ return 0; }" >>"$TMP/pub-header.cpp" g++ -I. -I$TMP/src/shared -I./src/shared -I$TMP/src/core $TMP/pub-header.cpp - if [ $? != 0 ]; then - FAIL=1; - fi + if [ $? -ne 0 ]; then FAIL=1; fi fi done fi #iterate all our code... -if [ $t_license == 1 ]; then +if [ $t_license -eq 1 ]; then for H in `find ../src`; do BH=`basename "$H"` BH_len=${#BH} @@ -182,11 +177,19 @@ #test if headers can compile together by themselves: -if [ $t_c_all == 1 ]; then +if [ $t_c_all -eq 1 ]; then g++ -I$TMP/src -I$TMP/src/shared -I$TMP/src/core $TMP/pub-headers.cpp -I./src/shared fi -if [ $t_inline == 1 ]; then +if [ $t_inline -eq 1 ]; then + if [ ! -f "./doc" ]; then + echo "Couldn't find docs, run:" + echo "# cmake -DENABLE_CLDOCS:BOOLEAN=TRUE ." + echo "# make doc" + echo "and then try again" + exit 1 + fi + INLINES=0 grep -c "\[inline" doc/html/*.html|grep -v ":0$"|grep -v "util"|grep -v "jstreams" | while read line; do @@ -195,7 +198,7 @@ continue; fi - if [ $INLINES == 0 ]; then + if [ $INLINES -eq 0 ]; then echo "These files report inline code:" INLINES=1 FAIL=1 @@ -204,16 +207,16 @@ done fi -if [ $t_compile == 1 ]; then +if [ $t_compile -eq 1 ]; then #compile serperately make cl_test - if [ $? != 0 ]; then + if [ $? -ne 0 ]; then FAIL=1; fi #compile together make test-all - if [ $? != 0 ]; then + if [ $? -ne 0 ]; then FAIL=1; fi fi Modified: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -10,14 +10,14 @@ TokenStream* ts = an.tokenStream(_T("test"), &reader); Token t; - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t)!=NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("he")) == 0); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t)!=NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("abhor")) == 0); - CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(ts->next(&t)!=NULL); CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("accent")) == 0); - CLUCENE_ASSERT(ts->next(&t) == false); + CLUCENE_ASSERT(ts->next(&t) == NULL); _CLDELETE(ts); } Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -21,7 +21,7 @@ ignoreSurrogates = true; } -bool CJKTokenizer::next(Token* token){ +CL_NS(analysis)::Token* CJKTokenizer::next(Token* token){ /** how many character(s) has been stored in buffer */ int32_t length = 0; @@ -31,7 +31,7 @@ while (true) { /** current character */ clunichar c; - int charlen = 1; + int charlen = 1; offset++; @@ -49,7 +49,7 @@ break; } else { - return false; + return NULL; } } else { //get current character @@ -178,7 +178,7 @@ buffer[length]='\0'; token->set(buffer,start, start+length, tokenType); - return true; + return token; } CL_NS_END2 Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/analysis/cjk/CJKAnalyzer.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -91,7 +91,7 @@ * hanppened in the InputStream * */ - bool next(CL_NS(analysis)::Token* token); + CL_NS(analysis)::Token* next(CL_NS(analysis)::Token* token); bool getIgnoreSurrogates(){ return ignoreSurrogates; }; void setIgnoreSurrogates(bool ignoreSurrogates){ this->ignoreSurrogates = ignoreSurrogates; }; Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/Highlighter.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -233,7 +233,7 @@ substringBuffer[endOffset-startOffset]=_T('\0'); TCHAR* encoded = _encoder->encodeText(substringBuffer); - const TCHAR* markedUpText=_formatter->highlightTerm(encoded, tokenGroup); + TCHAR* markedUpText=_formatter->highlightTerm(encoded, tokenGroup); _CLDELETE_CARRAY(encoded); //store any whitespace etc from between this and last group @@ -291,7 +291,7 @@ substringBuffer[endOffset-startOffset]=_T('\0'); TCHAR* encoded = _encoder->encodeText(substringBuffer); - const TCHAR* markedUpText=_formatter->highlightTerm(encoded, tokenGroup); + TCHAR* markedUpText=_formatter->highlightTerm(encoded, tokenGroup); _CLDELETE_CARRAY(encoded); //store any whitespace etc from between this and last group Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -91,7 +91,7 @@ return 0; } //found a query term - is it unique in this doc? - if(_uniqueTermsInFragment.find(termText)==_uniqueTermsInFragment.end()) + if(_uniqueTermsInFragment.find((TCHAR*)termText)==_uniqueTermsInFragment.end()) { _totalScore+=queryTerm->getWeight(); TCHAR* owned_term = stringDuplicate(termText); Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/QueryScorer.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -43,7 +43,7 @@ { private: TextFragment * _currentTextFragment; - CL_NS(util)::CLHashSet<const TCHAR*, + CL_NS(util)::CLHashSet<TCHAR*, CL_NS(util)::Compare::TChar, CL_NS(util)::Deletor::tcArray> _uniqueTermsInFragment; float_t _totalScore; @@ -51,7 +51,7 @@ CL_NS(util)::CLHashMap<const TCHAR*, const WeightedTerm *, CL_NS(util)::Compare::TChar, CL_NS(util)::Equals::TChar, - CL_NS(util)::Deletor::tcArray, + CL_NS(util)::Deletor::Dummy, CL_NS(util)::Deletor::Object<const WeightedTerm> > _termsToFind; public: Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/SimpleHTMLFormatter.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -29,8 +29,8 @@ class CLUCENE_CONTRIBS_EXPORT SimpleHTMLFormatter :public Formatter { private: - const TCHAR* _preTag; - const TCHAR* _postTag; + TCHAR* _preTag; + TCHAR* _postTag; public: ~SimpleHTMLFormatter(); Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenGroup.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -26,11 +26,12 @@ numTokens=0; startOffset=0; endOffset=0; - tokens = _CL_NEWARRAY(Token, MAX_NUM_TOKENS_PER_GROUP); + tokens = new Token[MAX_NUM_TOKENS_PER_GROUP]; } TokenGroup::~TokenGroup(void) { + delete[] tokens; } void TokenGroup::addToken(Token* token, float_t score) Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -17,7 +17,7 @@ #include "CLucene/_ApiHeader.h" #include "TokenSources.h" -#include "CLucene/util/VoidList.h" +#include "CLucene/util/VoidList.h" #include "CLucene/util/CLStreams.h" #include "CLucene/index/IndexReader.h" #include "CLucene/index/TermVector.h" @@ -108,7 +108,7 @@ CLSetList<Token*,TokenOrderCompare>* unsortedTokens = NULL; for (int32_t t = 0; t < freq->length; t++) { - ObjectArray<TermVectorOffsetInfo>* offsets=tpv->getOffsets(t); + ArrayBase<TermVectorOffsetInfo*>* offsets=tpv->getOffsets(t); if(offsets==NULL) return NULL; @@ -211,16 +211,16 @@ this->tokens=tokens; this->length = len; } -bool TokenSources::StoredTokenStream::next(CL_NS(analysis)::Token* token) +CL_NS(analysis)::Token* TokenSources::StoredTokenStream::next(CL_NS(analysis)::Token* token) { if(currentToken>=length) { - return false; + return NULL; } Token* t = tokens[currentToken++]; token->set(t->termBuffer(),t->startOffset(),t->endOffset(),t->type());; - return true; + return token; } void TokenSources::StoredTokenStream::close(){ Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/highlighter/TokenSources.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -26,7 +26,7 @@ size_t length; int32_t currentToken; StoredTokenStream(CL_NS(analysis)::Token** tokens, size_t len); - bool next(CL_NS(analysis)::Token* token); + CL_NS(analysis)::Token* next(CL_NS(analysis)::Token* token); void close(); }; public: Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/Snowball.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -30,7 +30,7 @@ SnowballAnalyzer::SnowballAnalyzer(const TCHAR* language, const TCHAR** stopWords) { this->language = STRDUP_TtoT(language); - stopSet = _CLNEW CLTCSetList; + stopSet = _CLNEW CLTCSetList(true); StopFilter::fillStopTable(stopSet,stopWords); } @@ -89,9 +89,9 @@ } /** Returns the next input Token, after being stemmed */ - bool SnowballFilter::next(Token* token){ - if (!input->next(token)) - return false; + Token* SnowballFilter::next(Token* token){ + if (input->next(token) == NULL) + return NULL; unsigned char uctext[LUCENE_MAX_WORD_LEN]; TCHAR tchartext[LUCENE_MAX_WORD_LEN]; @@ -124,7 +124,7 @@ tchartext[i]=stemmed[i]; #endif token->set(tchartext,token->startOffset(), token->endOffset(), token->type()); - return true; + return token; } Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballAnalyzer.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -14,7 +14,7 @@ * {@link EnglishStemmer} is named "English". */ class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer { - const TCHAR* language; + TCHAR* language; CLTCSetList* stopSet; public: Modified: branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballFilter.h =================================================================== --- branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballFilter.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/contribs-lib/CLucene/snowball/SnowballFilter.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -28,7 +28,7 @@ ~SnowballFilter(); /** Returns the next input Token, after being stemmed */ - bool next(Token* token); + Token* next(Token* token); }; CL_NS_END2 Modified: branches/lucene2_3_2/src/core/CLucene/CLConfig.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/core/CLucene/CLConfig.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -18,7 +18,7 @@ // //define this if you want condition debugging to be enabled #if defined(_DEBUG) && !defined(_CL__CND_DEBUG) - //#define _CL__CND_DEBUG + #define _CL__CND_DEBUG #endif // //define this to print out lots of information about merges, etc @@ -41,12 +41,6 @@ // Your application //////////////////////////////////////////////////////////////////// // -//define this to force the build into ascii mode -//#define _ASCII -// -//define this to force the build into ucs2 mode -//#define _UCS2 -// //define this to enable mmap support in the fsdirectory IndexInput //EXPERIMENTAL //#define LUCENE_FS_MMAP @@ -112,40 +106,14 @@ // application //////////////////////////////////////////////////////////////////// // -//define this to your own setting if you would like to implement your own -//threading locking code. it should have the same sort of functions as -//mutex_default. If not defined, clucene will try and use posix,win32 critical -//sections, or a timer based mutex hack. -//#define _LUCENE_THREADMUTEX CL_NS(util)::mutex_default -// //define this if you want to implement the _Cnd_OutDebug routine yourself //you can then easily customise in your own application how to handle debug messages //#define _CND_DEBUG_DONTIMPLEMENT_OUTDEBUG // -//define this if you want to implement your own namespace macros -//#define _LUCENE_DONTIMPLEMENT_NS_MACROS -// -//define this if you do not want clucene to include any standard libraries. -//this could be useful if you want to use alternate libraries -//#define LUCENE_DISABLE_INCLUDES -// //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// -// These options will be changed depending on your compiler/platform -// but can also be changed here if required -//////////////////////////////////////////////////////////////////// -// -//if you want to define your own default file encoding. specify it -//here - normally defined in the platform specific headers -//#define PLATFORM_DEFAULT_READER_ENCODING CL_NS(util)::FileReader::ENCODING_ASCII -// -//////////////////////////////////////////////////////////////////// - - - -//////////////////////////////////////////////////////////////////// // These options should not be changed. But you can experiment with // them to optimize performance //////////////////////////////////////////////////////////////////// @@ -190,13 +158,6 @@ #define LUCENE_DEFAULT_TOKEN_BUFFER_SIZE 32 //todo: should implement a similar strategy in analysis/token // -//Expert: The fraction of {@link TermDocs} entries stored in skip tables, -//used to accellerate {@link TermDocs#skipTo(int)}. Larger values result in -//smaller indices, greater acceleration, but fewer accelerable cases, while -//smaller values result in bigger indices, less acceleration and more -//accelerable cases. More detailed experiments would be useful here. */ -#define LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL 16 -// //Size of TermScore cache. Required. #define LUCENE_SCORE_CACHE_SIZE 32 // @@ -239,9 +200,6 @@ //Size of the CharTokenizer buffersize. Required. #define LUCENE_IO_BUFFER_SIZE 1024 // -//the minimum amount the segment term enum should grow by. Must be at least 1 -#define LUCENE_SEGMENTTERMENUM_GROWSIZE 8 -// //////////////////////////////////////////////////////////////////// #endif Modified: branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/core/CLucene/CLMonolithic.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ /* * this is a monolithic file that can be used to compile clucene using one source file. * it simplifies some build processes by avoiding static & dynamic compalation pitfalls. -* +* * note: when creating a project add either this file, or all the other .cpp files, not both! */ #include "CLucene/StdHeader.cpp" @@ -20,17 +20,27 @@ #include "CLucene/document/DateField.cpp" #include "CLucene/document/DateTools.cpp" #include "CLucene/document/Document.cpp" +#include "CLucene/document/FieldSelector.cpp" #include "CLucene/document/NumberTools.cpp" #include "CLucene/document/Field.cpp" #include "CLucene/index/CompoundFile.cpp" -#include "CLucene/index/DocumentWriter.cpp" +#include "CLucene/index/DirectoryIndexReader.cpp" +#include "CLucene/index/DocumentsWriter.cpp" +#include "CLucene/index/DocumentsWriterThreadState.cpp" #include "CLucene/index/FieldInfos.cpp" #include "CLucene/index/FieldsReader.cpp" #include "CLucene/index/FieldsWriter.cpp" +#include "CLucene/index/IndexFileDeleter.cpp" +#include "CLucene/index/IndexFileNameFilter.cpp" #include "CLucene/index/IndexFileNames.cpp" +#include "CLucene/index/IndexModifier.cpp" #include "CLucene/index/IndexWriter.cpp" #include "CLucene/index/IndexReader.cpp" +#include "CLucene/index/MergePolicy.cpp" +#include "CLucene/index/MergeScheduler.cpp" #include "CLucene/index/MultiReader.cpp" +#include "CLucene/index/MultiSegmentReader.cpp" +#include "CLucene/index/Payload.cpp" #include "CLucene/index/SegmentInfos.cpp" #include "CLucene/index/SegmentMergeInfo.cpp" #include "CLucene/index/SegmentMergeQueue.cpp" @@ -40,6 +50,8 @@ #include "CLucene/index/SegmentTermEnum.cpp" #include "CLucene/index/SegmentTermPositions.cpp" #include "CLucene/index/SegmentTermVector.cpp" +#include "CLucene/index/SkipListReader.cpp" +#include "CLucene/index/SkipListWriter.cpp" #include "CLucene/index/Term.cpp" #include "CLucene/index/Terms.cpp" #include "CLucene/index/TermInfo.cpp" @@ -47,12 +59,11 @@ #include "CLucene/index/TermInfosWriter.cpp" #include "CLucene/index/TermVectorReader.cpp" #include "CLucene/index/TermVectorWriter.cpp" -#include "CLucene/queryParser/Lexer.cpp" +#include "CLucene/queryParser/FastCharStream.cpp" +#include "CLucene/queryParser/QueryParserTokenManager.cpp" #include "CLucene/queryParser/MultiFieldQueryParser.cpp" #include "CLucene/queryParser/QueryParser.cpp" -#include "CLucene/queryParser/QueryParserBase.cpp" #include "CLucene/queryParser/QueryToken.cpp" -#include "CLucene/queryParser/TokenList.cpp" #include "CLucene/search/BooleanQuery.cpp" #include "CLucene/search/BooleanScorer.cpp" #include "CLucene/search/BooleanScorer2.cpp" @@ -73,6 +84,7 @@ #include "CLucene/search/Hits.cpp" #include "CLucene/search/HitQueue.cpp" #include "CLucene/search/IndexSearcher.cpp" +#include "CLucene/search/MatchAllDocsQuery.cpp" #include "CLucene/search/MultiSearcher.cpp" #include "CLucene/search/MultiTermQuery.cpp" #include "CLucene/search/PhrasePositions.cpp" @@ -100,7 +112,7 @@ #include "CLucene/store/IndexOutput.cpp" #include "CLucene/store/Directory.cpp" #include "CLucene/store/RAMDirectory.cpp" -#include "CLucene/store/TransactionalRAMDirectory.cpp" +//#include "CLucene/store/TransactionalRAMDirectory.cpp" #include "CLucene/util/BitSet.cpp" #include "CLucene/util/Equators.cpp" #include "CLucene/util/FastCharStream.cpp" Modified: branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/core/CLucene/StdHeader.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -10,6 +10,8 @@ #include "CLucene/search/Sort.h" #include "CLucene/search/Similarity.h" #include "CLucene/search/FieldCache.h" +#include "CLucene/index/TermVector.h" +#include "CLucene/index/IndexFileNameFilter.h" #include "CLucene/search/FieldSortedHitQueue.h" #include "CLucene/store/LockFactory.h" #include "CLucene/util/_StringIntern.h" @@ -22,20 +24,21 @@ CL_NS_USE(util) CL_NS_USE(search) +CL_NS_USE(index) CL_NS_USE(store) //clears all static memory. do not attempt to do anything else //in clucene after calling this function void _lucene_shutdown(){ - FieldSortedHitQueue::_shutdown(); - Sort::_shutdown(); - ScoreDocComparator::_shutdown(); - SortField::_shutdown(); - FieldCache::_shutdown(); - Similarity::_shutdown(); - - CLStringIntern::_shutdown(); - NoLockFactory::_shutdown(); - -_ThreadLocal::_shutdown(); + FieldSortedHitQueue::_shutdown(); + Sort::_shutdown(); + ScoreDocComparator::_shutdown(); + SortField::_shutdown(); + FieldCache::_shutdown(); + Similarity::_shutdown(); + CLStringIntern::_shutdown(); + NoLockFactory::_shutdown(); + _ThreadLocal::_shutdown(); + IndexFileNameFilter::_shutdown(); + _CLDELETE (TermVectorOffsetInfo_EMPTY_OFFSET_INFO); } Modified: branches/lucene2_3_2/src/core/CLucene/StdHeader.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/StdHeader.h 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/core/CLucene/StdHeader.h 2009-06-11 17:34:23 UTC (rev 3009) @@ -27,12 +27,12 @@ #define StringArray std::vector<TCHAR*> #define StringArrayWithDeletor CL_NS(util)::CLVector<TCHAR*, CL_NS(util)::Deletor::tcArray > #define StringArrayConst std::vector<const TCHAR*> -#define StringArrayConstWithDeletor CL_NS(util)::CLVector<const TCHAR*, CL_NS(util)::Deletor::tcArray > +//#define StringArrayConstWithDeletor CL_NS(util)::CLVector<const TCHAR*, CL_NS(util)::Deletor::tcArray > #define AStringArray std::vector<char*> #define AStringArrayWithDeletor CL_NS(util)::CLVector<char*, CL_NS(util)::Deletor::acArray > #define AStringArrayConst std::vector<const char*> -#define AStringArrayConstWithDeletor CL_NS(util)::CLVector<const char*, CL_NS(util)::Deletor::acArray > +//#define AStringArrayConstWithDeletor CL_NS(util)::CLVector<const char*, CL_NS(util)::Deletor::acArray > //call this at the end of running to clean up memory. extern CLUCENE_EXPORT void _lucene_shutdown(); Modified: branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-06-11 16:19:00 UTC (rev 3008) +++ branches/lucene2_3_2/src/core/CLucene/analysis/AnalysisHeader.cpp 2009-06-11 17:34:23 UTC (rev 3009) @@ -1,13 +1,14 @@ /*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team -* -* Distributable under the terms of either the Apache License (Version 2.0) or +* +* Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" #include "AnalysisHeader.h" #include "CLucene/util/StringBuffer.h" #include "CLucene/util/_ThreadLocal.h" +#include <assert.h> CL_NS_USE(util) ... [truncated message content] |
From: <ust...@us...> - 2009-06-11 16:19:04
|
Revision: 3008 http://clucene.svn.sourceforge.net/clucene/?rev=3008&view=rev Author: ustramooner Date: 2009-06-11 16:19:00 +0000 (Thu, 11 Jun 2009) Log Message: ----------- easier to debug when core is compiled before test code... Modified Paths: -------------- branches/lucene2_3_2/src/test/CMakeLists.txt Modified: branches/lucene2_3_2/src/test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/test/CMakeLists.txt 2009-06-11 16:18:14 UTC (rev 3007) +++ branches/lucene2_3_2/src/test/CMakeLists.txt 2009-06-11 16:19:00 UTC (rev 3008) @@ -62,7 +62,7 @@ IF ( ENABLE_COMPILE_TESTS ) -SET(test_monolithic_Files ./CLMonolithic_Test.cpp ${clucene-core_SOURCE_DIR}/CLucene/CLMonolithic.cpp) +SET(test_monolithic_Files ${clucene-core_SOURCE_DIR}/CLucene/CLMonolithic.cpp ./CLMonolithic_Test.cpp) IF ( CMAKE_COMPILER_IS_GNUCC ) CHECK_CXX_ACCEPTS_FLAG(-Wall GccFlagWall) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ust...@us...> - 2009-06-11 16:18:19
|
Revision: 3007 http://clucene.svn.sourceforge.net/clucene/?rev=3007&view=rev Author: ustramooner Date: 2009-06-11 16:18:14 +0000 (Thu, 11 Jun 2009) Log Message: ----------- dgap bitvectors Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp branches/lucene2_3_2/src/core/CLucene/util/BitSet.h Modified: branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp 2009-05-04 14:46:40 UTC (rev 3006) +++ branches/lucene2_3_2/src/core/CLucene/util/BitSet.cpp 2009-06-11 16:18:14 UTC (rev 3007) @@ -13,6 +13,25 @@ CL_NS_USE(store) CL_NS_DEF(util) + +const uint8_t BitSet::BYTE_COUNTS[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + BitSet::BitSet( const BitSet& copy ) : _size( copy._size ), _count(-1) @@ -36,11 +55,12 @@ _count=-1; CL_NS(store)::IndexInput* input = d->openInput( name ); try { - _size = input->readInt(); // read size - _count = input->readInt(); // read count - - bits = _CL_NEWARRAY(uint8_t,(_size >> 3) + 1); // allocate bits - input->readBytes(bits, (_size >> 3) + 1); // read bits + _size = input->readInt(); // read size + if (_size == -1) { + readDgaps(input); + } else { + readBits(input); + } } _CLFINALLY ( input->close(); _CLDELETE(input ); @@ -50,9 +70,11 @@ void BitSet::write(CL_NS(store)::Directory* d, const char* name) { CL_NS(store)::IndexOutput* output = d->createOutput(name); try { - output->writeInt(size()); // write size - output->writeInt(count()); // write count - output->writeBytes(bits, (_size >> 3) + 1); // write bits + if (isSparse()) { + writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. + } else { + writeBits(output); + } } _CLFINALLY ( output->close(); _CLDELETE(output); @@ -62,7 +84,12 @@ _CLDELETE_ARRAY(bits); } + void BitSet::set(const int32_t bit, bool val){ + if (bit >= _size) { + _CLTHROWA(CL_ERR_IndexOutOfBounds, "bit out of range"); + } + _count = -1; if (val) @@ -78,23 +105,6 @@ int32_t BitSet::count(){ // if the BitSet has been modified if (_count == -1) { - static const uint8_t BYTE_COUNTS[] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; int32_t c = 0; int32_t end = (_size >> 3) + 1; @@ -108,4 +118,66 @@ return _CLNEW BitSet( *this ); } + /** Read as a bit set */ + void BitSet::readBits(IndexInput* input) { + _count = input->readInt(); // read count + bits = _CL_NEWARRAY(uint8_t,(_size >> 3) + 1); // allocate bits + input->readBytes(bits, (_size >> 3) + 1); // read bits + } + + /** read as a d-gaps list */ + void BitSet::readDgaps(IndexInput* input) { + _size = input->readInt(); // (re)read size + _count = input->readInt(); // read count + bits = _CL_NEWARRAY(uint8_t,(_size >> 3) + 1); // allocate bits + int32_t last=0; + int32_t n = count(); + while (n>0) { + last += input->readVInt(); + bits[last] = input->readByte(); + n -= BYTE_COUNTS[bits[last] & 0xFF]; + } + } + + /** Write as a bit set */ + void BitSet::writeBits(IndexOutput* output) { + output->writeInt(size()); // write size + output->writeInt(count()); // write count + output->writeBytes(bits, (_size >> 3) + 1); // write bits + } + + /** Write as a d-gaps list */ + void BitSet::writeDgaps(IndexOutput* output) { + output->writeInt(-1); // mark using d-gaps + output->writeInt(size()); // write size + output->writeInt(count()); // write count + int32_t last=0; + int32_t n = count(); + int32_t m = (_size >> 3); + for (int32_t i=0; i<m && n>0; i++) { + if (bits[i]!=0) { + output->writeVInt(i-last); + output->writeByte(bits[i]); + last = i; + n -= BYTE_COUNTS[bits[i] & 0xFF]; + } + } + } + + /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */ + bool BitSet::isSparse() { + // note: order of comparisons below set to favor smaller values (no binary range search.) + // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. + // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore + // multiplying count by (8+8) or (8+16) or (8+24) etc.: + // - first 8 for writing bits[i] (1 byte vs. 1 bit), and + // - second part for writing the byte-number d-gap as vint. + // note: factor is for read/write of byte-arrays being faster than vints. + int32_t factor = 10; + if ((_size >> 3) < (1<< 7)) return factor * (4 + (8+ 8)*count()) < size(); + if ((_size >> 3) < (1<<14)) return factor * (4 + (8+16)*count()) < size(); + if ((_size >> 3) < (1<<21)) return factor * (4 + (8+24)*count()) < size(); + if ((_size >> 3) < (1<<28)) return factor * (4 + (8+32)*count()) < size(); + return factor * (4 + (8+40)*count()) < size(); + } CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/util/BitSet.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/util/BitSet.h 2009-05-04 14:46:40 UTC (rev 3006) +++ branches/lucene2_3_2/src/core/CLucene/util/BitSet.h 2009-06-11 16:18:14 UTC (rev 3007) @@ -9,13 +9,36 @@ CL_CLASS_DEF(store,Directory) +CL_CLASS_DEF(store,IndexInput) +CL_CLASS_DEF(store,IndexOutput) CL_NS_DEF(util) + + +/** Optimized implementation of a vector of bits. This is more-or-less like + java.util.BitSet, but also includes the following: + <ul> + <li>a count() method, which efficiently computes the number of one bits;</li> + <li>optimized read from and write to disk;</li> + <li>inlinable get() method;</li> + <li>store and load, as bit set or d-gaps, depending on sparseness;</li> + </ul> + */ class CLUCENE_EXPORT BitSet:LUCENE_BASE { int32_t _size; int32_t _count; uint8_t *bits; - + + void readBits(CL_NS(store)::IndexInput* input); + /** read as a d-gaps list */ + void readDgaps(CL_NS(store)::IndexInput* input); + /** Write as a bit set */ + void writeBits(CL_NS(store)::IndexOutput* output); + /** Write as a d-gaps list */ + void writeDgaps(CL_NS(store)::IndexOutput* output); + /** Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. */ + bool isSparse(); + static const uint8_t BYTE_COUNTS[256]; protected: BitSet( const BitSet& copy ); @@ -29,7 +52,11 @@ ~BitSet(); ///get the value of the specified bit + ///get the value of the specified bit inline bool get(const int32_t bit) const{ + if (bit >= _size) { + _CLTHROWA(CL_ERR_IndexOutOfBounds, "bit out of range"); + } return (bits[bit >> 3] & (1 << (bit & 7))) != 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: Itamar Syn-H. <it...@di...> - 2009-05-05 06:04:37
|
Yes, files_list.txt.... > -----Original Message----- > From: Ben Van Klinken [mailto:bva...@gm...] > Sent: Tuesday, May 05, 2009 8:54 AM > To: syn...@us... > Cc: clu...@li... > Subject: Re: [Clucene-cvs] SF.net SVN: > clucene:[3006]branches/lucene2_3_2/src > > Looks like u might have got some binary data in your commit somehow. > Any idea what that is? Says "binary file a differ" > > B > > Verstuurd vanaf mijn iPhone > > Op 4 mei 2009 om 16:46 heeft syn...@us... > het volgende geschreven:\ > > > Revision: 3006 > > > http://clucene.svn.sourceforge.net/clucene/?rev=3006&view=rev > > Author: synhershko > > Date: 2009-05-04 14:46:40 +0000 (Mon, 04 May 2009) > > > > Log Message: > > ----------- > > MultiFieldQueryParser is back, now completely conforms with JL 2.3.2 > > > > Modified Paths: > > -------------- > > branches/lucene2_3_2/src/core/CLucene/files_list.txt > > branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.cpp > > branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.h > > branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h > > branches/lucene2_3_2/src/core/CMakeLists.txt > > branches/lucene2_3_2/src/test/queryParser/ > > TestMultiFieldQueryParser.cpp > > branches/lucene2_3_2/src/test/tests.cpp > > > > Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt > > =================================================================== > > (Binary files differ) > > > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.cpp > > =================================================================== > > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) > > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) > > @@ -21,76 +21,18 @@ > > CL_NS_DEF(queryParser) > > > > > > -MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** fields, > > CL_NS(analysis)::Analyzer* a, BoostMap* boosts): > > - QueryParser(NULL,a) > > +MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** _fields, > > CL_NS(analysis)::Analyzer* a, BoostMap* _boosts): > > + QueryParser(NULL,a), fields(_fields), boosts(_boosts) > > { > > - this->fields = fields; > > - this->boosts = boosts; > > } > > MultiFieldQueryParser::~MultiFieldQueryParser(){ > > } > > > > -//static > > -Query* MultiFieldQueryParser::parse(const TCHAR* query, const > > TCHAR** fields, Analyzer* analyzer) > > -{ > > - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); > > - int32_t i = 0; > > - while ( fields[i] != NULL ){ > > - Query* q = QueryParser::parse(query, fields[i], analyzer); > > - if (q && (q->getQueryName()!=BooleanQuery::getClassName() > > || ((BooleanQuery*)q)->getClauseCount() > 0)) { > > - //todo: Move to using BooleanClause::Occur > > - bQuery->add(q, true, false, false); > > - } else { > > - _CLDELETE(q); > > - } > > - > > - i++; > > - } > > - return bQuery; > > -} > > - > > -//static > > -Query* MultiFieldQueryParser::parse(const TCHAR* query, const > > TCHAR** fields, const uint8_t* flags, Analyzer* analyzer) -{ > > - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); > > - int32_t i = 0; > > - while ( fields[i] != NULL ) > > - { > > - Query* q = QueryParser::parse(query, fields[i], analyzer); > > - if (q && (q->getQueryName()!=BooleanQuery::getClassName() > > || ((BooleanQuery*)q)->getClauseCount() > 0)) { > > - uint8_t flag = flags[i]; > > - switch (flag) > > - { > > - //todo: Move to using BooleanClause::Occur > > - case MultiFieldQueryParser::REQUIRED_FIELD: > > - bQuery->add(q, true, true, false); > > - break; > > - case MultiFieldQueryParser::PROHIBITED_FIELD: > > - bQuery->add(q, true, false, true); > > - break; > > - default: > > - bQuery->add(q, true, false, false); > > - break; > > - } > > - } else { > > - _CLDELETE(q); > > - } > > - > > - i++; > > - } > > - return bQuery; > > -} > > - > > -//not static > > -CL_NS(search)::Query* MultiFieldQueryParser::parse(const TCHAR* > > query) { > > - return parse(query, this->fields, this->analyzer); > > -} > > - > > -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, > > TCHAR* queryText, int32_t slop){ > > +Query* MultiFieldQueryParser::getFieldQuery(const TCHAR* field, > > const TCHAR* queryText, const int32_t slop){ > > if (field == NULL) { > > vector<BooleanClause*> clauses; > > for (int i = 0; fields[i]!=NULL; ++i) { > > - Query* q = QueryParser::GetFieldQuery(fields[i], > > queryText); > > + Query* q = QueryParser::getFieldQuery(fields[i], > > queryText); > > if (q != NULL) { > > //If the user passes a map of boosts > > if (boosts != NULL) { > > @@ -103,116 +45,133 @@ > > if (q->getQueryName() == > PhraseQuery::getClassName()) { > > ((PhraseQuery*)q)->setSlop(slop); > > } > > + // TODO: > > //if (q instanceof MultiPhraseQuery) { > > // ((MultiPhraseQuery) q).setSlop(slop); > > //} > > - q = QueryAddedCallback(fields[i], q); > > - if ( q ) > > - clauses.push_back(_CLNEW > BooleanClause(q, true, > > false,false)); > > + clauses.push_back(_CLNEW BooleanClause(q, true, > > BooleanClause::SHOULD)); > > } > > } > > if (clauses.size() == 0) // happens for stopwords > > return NULL; > > - Query* q = QueryParser::GetBooleanQuery(clauses); > > - return q; > > + return QueryParser::getBooleanQuery(clauses, true); > > }else{ > > - Query* q = QueryParser::GetFieldQuery(field, queryText); > > - if ( q ) > > - q = QueryAddedCallback(field,q); > > - return q; > > + return QueryParser::getFieldQuery(field, queryText); > > } > > } > > > > - > > -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, > > TCHAR* queryText){ > > - return GetFieldQuery(field, queryText, 0); > > -} > > - > > - > > -CL_NS(search)::Query* MultiFieldQueryParser::GetFuzzyQuery(const > > TCHAR* field, TCHAR* termStr){ > > +Query* MultiFieldQueryParser::getFuzzyQuery(const TCHAR* field, > > TCHAR* termStr, const float_t minSimilarity){ > > if (field == NULL) { > > vector<BooleanClause*> clauses; > > for (int i = 0; fields[i]!=NULL; ++i) { > > - Query* q = QueryParser::GetFuzzyQuery(fields[i], > > termStr); //todo: , minSimilarity > > - if ( q ){ > > - q = QueryAddedCallback(fields[i], q); > > - if ( q ){ > > - clauses.push_back(_CLNEW > > BooleanClause(q,true,false,false) ); > > - } > > - } > > + Query* q = QueryParser::getFuzzyQuery(fields[i], > > termStr, minSimilarity); > > + if (q) clauses.push_back(_CLNEW BooleanClause(q,true, > > BooleanClause::SHOULD) ); > > } > > - return QueryParser::GetBooleanQuery(clauses); > > - }else{ > > - Query* q = QueryParser::GetFuzzyQuery(field, termStr);// > > todo: , minSimilarity > > - if ( q ) > > - q = QueryAddedCallback(field,q); > > - return q; > > + return QueryParser::getBooleanQuery(clauses, true); > > } > > + return QueryParser::getFuzzyQuery(field, termStr, > minSimilarity); > > } > > > > -Query* MultiFieldQueryParser::GetPrefixQuery(const TCHAR* field, > > TCHAR* termStr){ > > +Query* MultiFieldQueryParser::getPrefixQuery(const TCHAR* field, > > TCHAR* termStr){ > > if (field == NULL) { > > vector<BooleanClause*> clauses; > > for (int i = 0; fields[i]!=NULL; ++i) { > > - Query* q = QueryParser::GetPrefixQuery(fields[i], > > termStr); > > - if ( q ){ > > - q = QueryAddedCallback(fields[i],q); > > - if ( q ){ > > - clauses.push_back(_CLNEW > > BooleanClause(q,true,false,false)); > > - } > > - } > > + Query* q = QueryParser::getPrefixQuery(fields[i], > > termStr); > > + if (q) clauses.push_back(_CLNEW > > BooleanClause(q,true,BooleanClause::SHOULD)); > > } > > - return QueryParser::GetBooleanQuery(clauses); > > - }else{ > > - Query* q = QueryParser::GetPrefixQuery(field, termStr); > > - if ( q ) > > - q = QueryAddedCallback(field,q); > > - return q; > > + return QueryParser::getBooleanQuery(clauses, true); > > } > > + return QueryParser::getPrefixQuery(field, termStr); > > } > > > > -Query* MultiFieldQueryParser::GetWildcardQuery(const TCHAR* field, > > TCHAR* termStr){ > > +Query* MultiFieldQueryParser::getWildcardQuery(const TCHAR* field, > > TCHAR* termStr){ > > if (field == NULL) { > > vector<BooleanClause*> clauses; > > for (int i = 0; fields[i]!=NULL; ++i) { > > - Query* q = QueryParser::GetWildcardQuery(fields[i], > > termStr); > > - if ( q ){ > > - q = QueryAddedCallback(fields[i],q); > > - if ( q ){ > > - clauses.push_back(_CLNEW > > BooleanClause(q,true,false,false)); > > - } > > - } > > + Query* q = QueryParser::getWildcardQuery(fields[i], > > termStr); > > + if (q) clauses.push_back(_CLNEW > > BooleanClause(q,true,BooleanClause::SHOULD)); > > } > > - return QueryParser::GetBooleanQuery(clauses); > > - }else{ > > - Query* q = QueryParser::GetWildcardQuery(field, termStr); > > - if ( q ) > > - q = QueryAddedCallback(field,q); > > - return q; > > + return QueryParser::getBooleanQuery(clauses, true); > > } > > + return QueryParser::getWildcardQuery(field, termStr); > > } > > > > > > -Query* MultiFieldQueryParser::GetRangeQuery(const TCHAR* field, > > TCHAR* part1, TCHAR* part2, bool inclusive){ > > +Query* MultiFieldQueryParser::getRangeQuery(const TCHAR* field, > > TCHAR* part1, TCHAR* part2, const bool inclusive){ > > if (field == NULL) { > > vector<BooleanClause*> clauses; > > for (int i = 0; fields[i]!=NULL; ++i) { > > - Query* q = > QueryParser::GetRangeQuery(fields[i], part1, > > part2, inclusive); > > - if ( q ){ > > - q = QueryAddedCallback(fields[i],q); > > - if ( q ){ > > - clauses.push_back(_CLNEW > > BooleanClause(q,true,false,false)); > > - } > > - } > > + Query* q = QueryParser::getRangeQuery(fields[i], part1, > > part2, inclusive); > > + if (q) clauses.push_back(_CLNEW > > BooleanClause(q,true,BooleanClause::SHOULD)); > > } > > - return QueryParser::GetBooleanQuery(clauses); > > + return QueryParser::getBooleanQuery(clauses, true); > > }else{ > > - Query* q = QueryParser::GetRangeQuery(field, > part1, part2, > > inclusive); > > - if ( q ) > > - q = QueryAddedCallback(field,q); > > - return q; > > + return QueryParser::getRangeQuery(field, part1, part2, > > inclusive); > > } > > } > > > > +//static > > +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const > > TCHAR** _fields, Analyzer* analyzer) > > +{ > > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > > + for (size_t i = 0; _fields[i]!=NULL; i++) > > + { > > + if (_queries[i] == NULL) { > > + _CLLDELETE(bQuery); > > + _CLTHROWA(CL_ERR_IllegalArgument, "_queries.length != > > _fields.length"); > > + } > > + // TODO: Reuse qp instead of creating it over and > over again > > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > > + Query* q = qp->parse(_queries[i]); > > + if (q!=NULL && // q never null, just being defensive > > + (!(q->instanceOf(BooleanQuery::getClassName()) || > > ((BooleanQuery*)q)->getClauseCount() > 0))) { > > + bQuery->add(q, true, BooleanClause::SHOULD); > > + } else > > + _CLLDELETE(q); > > + _CLLDELETE(qp); > > + } > > + return bQuery; > > +} > > > > +// static > > +Query* MultiFieldQueryParser::parse(const TCHAR* query, const > > TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer) { > > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > > + for (size_t i = 0; _fields[i]!=NULL; i++) { > > + if (flags[i] == NULL) { > > + _CLLDELETE(bQuery); > > + _CLTHROWA(CL_ERR_IllegalArgument, "_fields.length != > > flags.length"); > > + } > > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > > + Query* q = qp->parse(query); > > + if (q!=NULL && // q never null, just being defensive > > + (!(q->instanceOf(BooleanQuery::getClassName())) || > > ((BooleanQuery*)q)->getClauseCount()>0)) { > > + bQuery->add(q, true, > (BooleanClause::Occur)flags[i]); > > + } else > > + _CLLDELETE(q); > > + _CLLDELETE(qp); > > + } > > + return bQuery; > > +} > > + > > +//static > > +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const > > TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer){ > > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > > + for (size_t i = 0; _fields[i]!=NULL; i++) > > + { > > + if (_queries[i] == NULL || flags[i] == NULL) { > > + _CLLDELETE(bQuery); > > + _CLTHROWA(CL_ERR_IllegalArgument, "_queries, _fields, > > and flags array have have different length"); > > + } > > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > > + Query* q = qp->parse(_queries[i]); > > + if (q!=NULL && // q never null, just being defensive > > + (!(q->instanceOf(BooleanQuery::getClassName())) || > > ((BooleanQuery*)q)->getClauseCount()>0)) { > > + bQuery->add(q, true, > (BooleanClause::Occur)flags[i]); > > + } else > > + _CLLDELETE(q); > > + _CLLDELETE(qp); > > + } > > + return bQuery; > > +} > > + > > CL_NS_END > > > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.h > > =================================================================== > > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) > > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > > MultiFieldQueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) > > @@ -24,112 +24,142 @@ > > CL_NS(util)::Deletor::DummyFloat > >> BoostMap; > > > > - /** > > - * A QueryParser which constructs queries to search multiple > > fields. > > - * > > - */ > > - class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser > > - { > > - protected: > > - const TCHAR** fields; > > - BoostMap* boosts; > > - public: > > - LUCENE_STATIC_CONSTANT(uint8_t, NORMAL_FIELD=0); > > - LUCENE_STATIC_CONSTANT(uint8_t, REQUIRED_FIELD=1); > > - LUCENE_STATIC_CONSTANT(uint8_t, PROHIBITED_FIELD=2); > > - > > - /** > > - * Creates a MultiFieldQueryParser. > > - * > > - * <p>It will, when parse(String query) > > - * is called, construct a query like this (assuming the > > query consists of > > - * two terms and you specify the two fields <code>title</ > > code> and <code>body</code>):</p> > > - * > > - * <code> > > - * (title:term1 body:term1) (title:term2 body:term2) > > - * </code> > > - * > > - * <p>When setDefaultOperator(AND_OPERATOR) is set, the > > result will be:</p> > > - * > > - * <code> > > - * +(title:term1 body:term1) +(title:term2 body:term2) > > - * </code> > > - * > > - * <p>In other words, all the query's terms must > appear, but > > it doesn't matter in > > - * what fields they appear.</p> > > - */ > > - MultiFieldQueryParser(const TCHAR** fields, > > CL_NS(analysis)::Analyzer* a, BoostMap* boosts = NULL); > > - virtual ~MultiFieldQueryParser(); > > - > > - /** > > - * <p> > > - * Parses a query which searches on the fields specified. > > - * <p> > > - * If x fields are specified, this effectively constructs: > > - * <pre> > > - * <code> > > - * (field1:query) (field2:query) (field3:query)... > > (fieldx:query) > > - * </code> > > - * </pre> > > - * > > - * @param query Query string to parse > > - * @param fields Fields to search on > > - * @param analyzer Analyzer to use > > - * @throws ParserException if query parsing fails > > - * @throws TokenMgrError if query parsing fails > > - */ > > - static CL_NS(search)::Query* parse(const TCHAR* query, > > const TCHAR** fields, CL_NS(analysis)::Analyzer* analyzer); > > - > > - /** > > - * <p> > > - * Parses a query, searching on the fields specified. > > - * Use this if you need to specify certain fields as > > required, > > - * and others as prohibited. > > - * <p><pre> > > - * Usage: > > - * <code> > > - * TCHAR** fields = {"filename", "contents", > "description"}; > > - * int8_t* flags = {MultiFieldQueryParser::NORMAL FIELD, > > - * MultiFieldQueryParser::REQUIRED FIELD, > > - * MultiFieldQueryParser::PROHIBITED FIELD}; > > - * parse(query, fields, flags, analyzer); > > - * </code> > > - * </pre> > > - *<p> > > - * The code above would construct a query: > > - * <pre> > > - * <code> > > - * (filename:query) +(contents:query) -(description:query) > > - * </code> > > - * </pre> > > - * > > - * @param query Query string to parse > > - * @param fields Fields to search on > > - * @param flags Flags describing the fields > > - * @param analyzer Analyzer to use > > - * @throws ParserException if query parsing fails > > - * @throws TokenMgrError if query parsing fails > > - */ > > - static CL_NS(search)::Query* parse(const TCHAR* query, > > const TCHAR** fields, const uint8_t* flags, > > CL_NS(analysis)::Analyzer* analyzer); > > +/** > > +* A QueryParser which constructs queries to search multiple fields. > > +* > > +*/ > > +class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser { > > +protected: > > + const TCHAR** fields; > > + BoostMap* boosts; > > +public: > > + /** > > + * Creates a MultiFieldQueryParser. > > + * Allows passing of a map with term to Boost, and the boost to > > apply to each term. > > + * > > + * <p>It will, when parse(String query) > > + * is called, construct a query like this (assuming the query > > consists of > > + * two terms and you specify the two fields <code>title</code> > > and <code>body</code>):</p> > > + * > > + * <code> > > + * (title:term1 body:term1) (title:term2 body:term2) > > + * </code> > > + * > > + * <p>When setDefaultOperator(AND_OPERATOR) is set, the result > > will be:</p> > > + * > > + * <code> > > + * +(title:term1 body:term1) +(title:term2 body:term2) > > + * </code> > > + * > > + * <p>When you pass a boost (title=>5 body=>10) you can get </p> > > + * > > + * <code> > > + * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 > > body:term2^10.0) > > + * </code> > > + * > > + * <p>In other words, all the query's terms must appear, but it > > doesn't matter in > > + * what fields they appear.</p> > > + */ > > + MultiFieldQueryParser(const TCHAR** _fields, > > CL_NS(analysis)::Analyzer* a, BoostMap* _boosts = NULL); > > + virtual ~MultiFieldQueryParser(); > > > > - // non-static version of the above > > - CL_NS(search)::Query* parse(const TCHAR* query); > > +protected: > > + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const > > TCHAR* queryText, const int32_t slop); > > + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const > > TCHAR* queryText) { return getFieldQuery(field,queryText,0); } > > + CL_NS(search)::Query* getFuzzyQuery(const TCHAR* field, TCHAR* > > termStr, const float_t minSimilarity); > > + CL_NS(search)::Query* getPrefixQuery(const TCHAR* field, TCHAR* > > termStr); > > + CL_NS(search)::Query* getWildcardQuery(const TCHAR* field, > > TCHAR* termStr); > > + CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* > > part1, TCHAR* part2, const bool inclusive); > > > > - protected: > > - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, > > TCHAR* queryText); > > - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, > > TCHAR* queryText, int32_t slop); > > - CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field, > > TCHAR* termStr); > > - CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, > > TCHAR* part1, TCHAR* part2, bool inclusive); > > - CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field, > > TCHAR* termStr); > > - CL_NS(search)::Query* GetWildcardQuery(const > TCHAR* field, > > TCHAR* termStr); > > +public: > > + /** > > + * Parses a query which searches on the fields specified. > > + * <p> > > + * If x fields are specified, this effectively constructs: > > + * <pre> > > + * <code> > > + * (field1:query1) (field2:query2) (field3:query3)... > > (fieldx:queryx) > > + * </code> > > + * </pre> > > + * @param queries Queries strings to parse > > + * @param fields Fields to search on > > + * @param analyzer Analyzer to use > > + * @throws ParseException if query parsing fails > > + * @throws IllegalArgumentException if the length of > the queries > > array differs > > + * from the length of the fields array > > + */ > > + static CL_NS(search)::Query* parse(const TCHAR** _queries, > > const TCHAR** _fields, > > + CL_NS(analysis)::Analyzer* analyzer); > > > > - /** > > - * A special virtual function for the > MultiFieldQueryParser > > which can be used > > - * to clean up queries. Once the field name is > known and the > > query has been > > - * created, its passed to this function. > > - * An example of this usage is to set boosts. > > - */ > > - virtual CL_NS(search)::Query* QueryAddedCallback(const > > TCHAR* field, CL_NS(search)::Query* query){ return query; } > > - }; > > + /** > > + * Parses a query, searching on the fields specified. > > + * Use this if you need to specify certain fields as required, > > + * and others as prohibited. > > + * <p><pre> > > + * Usage: > > + * <code> > > + * String[] fields = {"filename", "contents", "description"}; > > + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, > > + * BooleanClause.Occur.MUST, > > + * BooleanClause.Occur.MUST_NOT}; > > + * MultiFieldQueryParser.parse("query", fields, flags, > analyzer); > > + * </code> > > + * </pre> > > + *<p> > > + * The code above would construct a query: > > + * <pre> > > + * <code> > > + * (filename:query) +(contents:query) -(description:query) > > + * </code> > > + * </pre> > > + * > > + * @param query Query string to parse > > + * @param fields Fields to search on > > + * @param flags Flags describing the fields > > + * @param analyzer Analyzer to use > > + * @throws ParseException if query parsing fails > > + * @throws IllegalArgumentException if the length of > the fields > > array differs > > + * from the length of the flags array > > + */ > > + static CL_NS(search)::Query* parse(const TCHAR* query, const > > TCHAR** _fields, > > + const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); > > + > > + /** > > + * Parses a query, searching on the fields specified. > > + * Use this if you need to specify certain fields as required, > > + * and others as prohibited. > > + * <p><pre> > > + * Usage: > > + * <code> > > + * String[] query = {"query1", "query2", "query3"}; > > + * String[] fields = {"filename", "contents", "description"}; > > + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, > > + * BooleanClause.Occur.MUST, > > + * BooleanClause.Occur.MUST_NOT}; > > + * MultiFieldQueryParser.parse(query, fields, flags, analyzer); > > + * </code> > > + * </pre> > > + *<p> > > + * The code above would construct a query: > > + * <pre> > > + * <code> > > + * (filename:query1) +(contents:query2) -(description:query3) > > + * </code> > > + * </pre> > > + * > > + * @param queries Queries string to parse > > + * @param fields Fields to search on > > + * @param flags Flags describing the fields > > + * @param analyzer Analyzer to use > > + * @throws ParseException if query parsing fails > > + * @throws IllegalArgumentException if the length of the > > queries, fields, > > + * and flags array differ > > + */ > > + static CL_NS(search)::Query* parse(const TCHAR** _queries, > > const TCHAR** _fields, const uint8_t* flags, > > + CL_NS(analysis)::Analyzer* analyzer); > > + > > + CL_NS(search)::Query* parse(const TCHAR* _query){return > > QueryParser::parse(_query);} > > +}; > > CL_NS_END > > #endif > > > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > > QueryParser.h > > =================================================================== > > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > > QueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) > > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > > QueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) > > @@ -322,7 +322,7 @@ > > /** > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getFieldQuery(const TCHAR* > _field, const > > TCHAR* queryText); > > + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* > > _field, const TCHAR* queryText); > > > > /** > > * Base implementation delegates to {@link > > #getFieldQuery(String,String)}. > > @@ -331,12 +331,12 @@ > > * > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getFieldQuery(const TCHAR* > _field, const > > TCHAR* queryText, const int32_t slop); > > + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* > > _field, const TCHAR* queryText, const int32_t slop); > > > > /** > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getRangeQuery(const TCHAR* > field, TCHAR* > > part1, TCHAR* part2, const bool inclusive); > > + virtual CL_NS(search)::Query* getRangeQuery(const > TCHAR* field, > > TCHAR* part1, TCHAR* part2, const bool inclusive); > > > > /** > > * Factory method for generating query, given a set of clauses. > > @@ -375,7 +375,7 @@ > > * @return Resulting {@link Query} built for the term > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getWildcardQuery(const TCHAR* _field, > > TCHAR* termStr); > > + virtual CL_NS(search)::Query* getWildcardQuery(const TCHAR* > > _field, TCHAR* termStr); > > > > /** > > * Factory method for generating a query (similar to > > @@ -400,7 +400,7 @@ > > * @return Resulting {@link Query} built for the term > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getPrefixQuery(const TCHAR* _field, > > TCHAR* _termStr); > > + virtual CL_NS(search)::Query* getPrefixQuery(const TCHAR* > > _field, TCHAR* _termStr); > > > > /** > > * Factory method for generating a query (similar to > > @@ -413,7 +413,7 @@ > > * @return Resulting {@link Query} built for the term > > * @exception ParseException throw in overridden method > to disallow > > */ > > - CL_NS(search)::Query* getFuzzyQuery(const TCHAR* > _field, TCHAR* > > termStr, const float_t minSimilarity); > > + virtual CL_NS(search)::Query* getFuzzyQuery(const TCHAR* > > _field, TCHAR* termStr, const float_t minSimilarity); > > > > private: > > /** > > > > Modified: branches/lucene2_3_2/src/core/CMakeLists.txt > > =================================================================== > > --- branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-03 > > 21:01:38 UTC (rev 3005) > > +++ branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-04 > > 14:46:40 UTC (rev 3006) > > @@ -29,7 +29,7 @@ > > ./CLucene/util/StringIntern.cpp > > ./CLucene/util/BitSet.cpp > > ./CLucene/queryParser/FastCharStream.cpp > > - #./CLucene/queryParser/MultiFieldQueryParser.cpp > > + ./CLucene/queryParser/MultiFieldQueryParser.cpp > > ./CLucene/queryParser/QueryParser.cpp > > ./CLucene/queryParser/QueryParserTokenManager.cpp > > ./CLucene/queryParser/Token.cpp > > > > Modified: branches/lucene2_3_2/src/test/queryParser/ > > TestMultiFieldQueryParser.cpp > > =================================================================== > > --- branches/lucene2_3_2/src/test/queryParser/ > > TestMultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) > > +++ branches/lucene2_3_2/src/test/queryParser/ > > TestMultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) > > @@ -6,164 +6,167 @@ > > --- > > --- > > --- > > > --------------------------------------------------------------------- > > */ > > #include "test.h" > > > > -//class MQPTestFilter: public TokenFilter { > > -//public: > > -// > > -// bool inPhrase; > > -// int32_t savedStart, savedEnd; > > -// > > -// /** > > -// * Filter which discards the token 'stop' and which > expands the > > -// * token 'phrase' into 'phrase1 phrase2' > > -// */ > > -// MQPTestFilter(TokenStream* in): > > -// TokenFilter(in,true), > > -// inPhrase(false), > > -// savedStart(0), > > -// savedEnd(0) > > -// { > > -// } > > -// > > -// bool next(CL_NS(analysis)::Token* token) { > > -// if (inPhrase) { > > -// inPhrase = false; > > -// token->set( _T("phrase2"), savedStart, savedEnd); > > -// return true; > > -// }else{ > > -// while( input->next(token) ){ > > -// if ( _tcscmp(token->termBuffer(), _T("phrase")) > > == 0 ) { > > -// inPhrase = true; > > -// savedStart = token->startOffset(); > > -// savedEnd = token->endOffset(); > > -// token->set( _T("phrase1"), savedStart, > > savedEnd); > > -// return true; > > -// }else if ( _tcscmp(token->termBuffer(), > > _T("stop") ) !=0 ){ > > -// return true; > > -// } > > -// } > > -// } > > -// return false; > > -// } > > -//}; > > -// > > -//class MQPTestAnalyzer: public Analyzer { > > -//public: > > -// MQPTestAnalyzer() { > > -// } > > -// > > -// /** Filters LowerCaseTokenizer with StopFilter. */ > > -// TokenStream* tokenStream(const TCHAR* fieldName, Reader* > > reader) { > > -// return _CLNEW MQPTestFilter(_CLNEW > > LowerCaseTokenizer(reader)); > > -// } > > -//}; > > -// > > -//void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { > > -// if ( q == NULL ) > > -// return; > > -// > > -// const TCHAR* s = q->toString(); > > -// int ret = _tcscmp(s,result); > > -// _CLDELETE(q); > > -// if ( ret != 0 ) { > > -// TCHAR buf[HUGE_STRING_LEN]; > > -// _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query > > yielded /%s/, expecting /%s/\n"), s, result); > > -// _CLDELETE_LCARRAY(s); > > -// CuFail(tc, buf); > > -// } > > -// _CLDELETE_LCARRAY(s); > > -//} > > -// > > -//// verify parsing of query using a stopping analyzer > > -//void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const > > TCHAR* expectedRes) { > > -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; > > -// //Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; > > -// MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); > > -// MultiFieldQueryParser mfqp(fields, a); > > -// > > -// Query *q = mfqp.parse(qtxt); > > -// assertEquals(tc, expectedRes, q); > > -// > > -// q = MultiFieldQueryParser::parse(qtxt, fields, a); > > -// assertEquals(tc, expectedRes, q); > > -// _CLDELETE(a); > > -//} > > -// > > -///** test stop words arsing for both the non static form, > and for > > the > > -//* corresponding static form (qtxt, fields[]). */ > > -//void tesStopwordsParsing(CuTest *tc) { > > -// assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); > > -// assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); > > -// assertStopQueryEquals(tc, _T("one (stop)"), > _T("b:one t:one")); > > -// assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one > > t:one")); > > -// assertStopQueryEquals(tc, _T("stop"), _T("")); > > -// assertStopQueryEquals(tc, _T("(stop)"), _T("")); > > -// assertStopQueryEquals(tc, _T("((stop))"), _T("")); > > -//} > > -// > > -//void testMFQPSimple(CuTest *tc) { > > -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; > > -// Analyzer* a = _CLNEW StandardAnalyzer(); > > -// MultiFieldQueryParser mfqp(fields, a); > > -// > > -// Query *q = mfqp.parse(_T("one")); > > -// assertEquals(tc, _T("b:one t:one"), q); > > -// > > -// q = mfqp.parse(_T("one two")); > > -// assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); > > -// > > -// q = mfqp.parse(_T("+one +two")); > > -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > > -// > > -// q = mfqp.parse(_T("+one -two -three")); > > -// assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) > -(b:three > > t:three)"), q); > > -// > > -// q = mfqp.parse(_T("one^2 two")); > > -// assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); > > -// > > -// q = mfqp.parse(_T("one~ two")); > > -// assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two > t:two)"), q); > > -// > > -// q = mfqp.parse(_T("one~0.8 two^2")); > > -// assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two > > t:two)^2.0)"), q); > > -// > > -// q = mfqp.parse(_T("one* two*")); > > -// assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); > > -// > > -// q = mfqp.parse(_T("[a TO c] two")); > > -// assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two > t:two)"), > > q); > > -// > > -// q = mfqp.parse(_T("w?ldcard")); > > -// assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); > > -// > > -// q = mfqp.parse(_T("\"foo bar\"")); > > -// assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); > > -// > > -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > > -// assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") > (b:\"dd > > ee\" t:\"dd ee\")"), q); > > -// > > -// q = mfqp.parse(_T("\"foo bar\"~4")); > > -// assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); > > -// > > -// // make sure that terms which have a field are not touched: > > -// q = mfqp.parse(_T("one f:two")); > > -// assertEquals(tc, _T("(b:one t:one) f:two"), q); > > -// > > -// // AND mode: > > -// mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); > > -// q = mfqp.parse(_T("one two")); > > -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > > -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > > -// assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b: > > \"dd ee\" t:\"dd ee\")"), q); > > -// > > -// _CLDELETE(a); > > -//} > > -// > > -//CuSuite *testMultiFieldQueryParser(void) > > -//{ > > -// CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field > QP Test")); > > -// > > -// SUITE_ADD_TEST(suite, tesStopwordsParsing); > > -// SUITE_ADD_TEST(suite, testMFQPSimple); > > -// > > -// return suite; > > -//} > > \ No newline at end of file > > +class MQPTestFilter: public TokenFilter { > > +public: > > + > > + bool inPhrase; > > + int32_t savedStart, savedEnd; > > + > > + /** > > + * Filter which discards the token 'stop' and which expands the > > + * token 'phrase' into 'phrase1 phrase2' > > + */ > > + MQPTestFilter(TokenStream* in): > > + TokenFilter(in,true), > > + inPhrase(false), > > + savedStart(0), > > + savedEnd(0) > > + { > > + } > > + > > + CL_NS(analysis)::Token* next(CL_NS(analysis)::Token*& token) { > > + if (inPhrase) { > > + if (token == NULL) token=_CLNEW > CL_NS(analysis)::Token(); > > + inPhrase = false; > > + token->set( _T("phrase2"), savedStart, savedEnd); > > + return token; > > + }else{ > > + while( input->next(token) ){ > > + if ( _tcscmp(token->termBuffer(), > _T("phrase")) == > > 0 ) { > > + inPhrase = true; > > + savedStart = token->startOffset(); > > + savedEnd = token->endOffset(); > > + token->set( _T("phrase1"), savedStart, > savedEnd); > > + return token; > > + }else if ( _tcscmp(token->termBuffer(), > > _T("stop") ) !=0 ){ > > + return token; > > + } > > + } > > + } > > + _CLDELETE(token); > > + return NULL; > > + } > > +}; > > + > > +class MQPTestAnalyzer: public Analyzer { > > +public: > > + MQPTestAnalyzer() { > > + } > > + > > + /** Filters LowerCaseTokenizer with StopFilter. */ > > + TokenStream* tokenStream(const TCHAR* fieldName, Reader* > > reader) { > > + return _CLNEW MQPTestFilter(_CLNEW > > LowerCaseTokenizer(reader)); > > + } > > +}; > > + > > +void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { > > + if ( q == NULL ) > > + return; > > + > > + const TCHAR* s = q->toString(); > > + int ret = _tcscmp(s,result); > > + _CLDELETE(q); > > + if ( ret != 0 ) { > > + TCHAR buf[HUGE_STRING_LEN]; > > + _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query > yielded / > > %s/, expecting /%s/\n"), s, result); > > + _CLDELETE_LCARRAY(s); > > + CuFail(tc, buf); > > + } > > + _CLDELETE_LCARRAY(s); > > +} > > + > > +// verify parsing of query using a stopping analyzer > > +void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const > > TCHAR* expectedRes) { > > + const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; > > + const uint8_t occur[] = {BooleanClause::SHOULD, > > BooleanClause::SHOULD, NULL}; > > + MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); > > + MultiFieldQueryParser mfqp(fields, a); > > + > > + Query *q = mfqp.parse(qtxt); > > + assertEquals(tc, expectedRes, q); > > + > > + q = MultiFieldQueryParser::parse(qtxt, reinterpret_cast<const > > TCHAR**>(&fields), > > + reinterpret_cast<const uint8_t*>(&occur), a); > > + assertEquals(tc, expectedRes, q); > > + _CLDELETE(a); > > +} > > + > > +/** test stop words arsing for both the non static form, > and for the > > +* corresponding static form (qtxt, fields[]). */ > > +void tesStopwordsParsing(CuTest *tc) { > > + assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); > > + assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); > > + assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); > > + assertStopQueryEquals(tc, _T("one ((stop))"), > _T("b:one t:one")); > > + assertStopQueryEquals(tc, _T("stop"), _T("")); > > + assertStopQueryEquals(tc, _T("(stop)"), _T("")); > > + assertStopQueryEquals(tc, _T("((stop))"), _T("")); > > +} > > + > > +void testMFQPSimple(CuTest *tc) { > > + const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; > > + Analyzer* a = _CLNEW StandardAnalyzer(); > > + MultiFieldQueryParser mfqp(fields, a); > > + > > + Query *q = mfqp.parse(_T("one")); > > + assertEquals(tc, _T("b:one t:one"), q); > > + > > + q = mfqp.parse(_T("one two")); > > + assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); > > + > > + q = mfqp.parse(_T("+one +two")); > > + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > > + > > + q = mfqp.parse(_T("+one -two -three")); > > + assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three > > t:three)"), q); > > + > > + q = mfqp.parse(_T("one^2 two")); > > + assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); > > + > > + q = mfqp.parse(_T("one~ two")); > > + assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); > > + > > + q = mfqp.parse(_T("one~0.8 two^2")); > > + assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two > > t:two)^2.0)"), q); > > + > > + q = mfqp.parse(_T("one* two*")); > > + assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); > > + > > + q = mfqp.parse(_T("[a TO c] two")); > > + assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two > t:two)"), q); > > + > > + q = mfqp.parse(_T("w?ldcard")); > > + assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); > > + > > + q = mfqp.parse(_T("\"foo bar\"")); > > + assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); > > + > > + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > > + assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") > (b:\"dd ee > > \" t:\"dd ee\")"), q); > > + > > + q = mfqp.parse(_T("\"foo bar\"~4")); > > + assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); > > + > > + // make sure that terms which have a field are not touched: > > + q = mfqp.parse(_T("one f:two")); > > + assertEquals(tc, _T("(b:one t:one) f:two"), q); > > + > > + // AND mode: > > + mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); > > + q = mfqp.parse(_T("one two")); > > + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > > + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > > + assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") > +(b:\"dd > > ee\" t:\"dd ee\")"), q); > > + > > + _CLDELETE(a); > > +} > > + > > +CuSuite *testMultiFieldQueryParser(void) > > +{ > > + CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); > > + > > + SUITE_ADD_TEST(suite, tesStopwordsParsing); > > + SUITE_ADD_TEST(suite, testMFQPSimple); > > + > > + return suite; > > +} > > \ No newline at end of file > > > > Modified: branches/lucene2_3_2/src/test/tests.cpp > > =================================================================== > > --- branches/lucene2_3_2/src/test/tests.cpp 2009-05-03 21:01:38 > > UTC (rev 3005) > > +++ branches/lucene2_3_2/src/test/tests.cpp 2009-05-04 14:46:40 > > UTC (rev 3006) > > @@ -16,7 +16,7 @@ > > {"highfreq", testhighfreq}, > > {"priorityqueue", testpriorityqueue}, > > {"queryparser", testQueryParser}, > > - //{"mfqueryparser", testMultiFieldQueryParser}, > > + {"mfqueryparser", testMultiFieldQueryParser}, > > {"search", testsearch}, > > {"queries", testqueries}, > > {"termvector",testtermvector}, > > > > > > This was sent by the SourceForge.net collaborative development > > platform, the world's largest Open Source development site. > > > > --- > > --- > > --- > > > --------------------------------------------------------------------- > > Register Now & Save for Velocity, the Web Performance & Operations > > Conference from O'Reilly Media. Velocity features a full day of > > expert-led, hands-on workshops and two days of sessions > from industry > > leaders in dedicated Performance & Operations tracks. Use code > > vel09scf > > and Save an extra 15% before 5/3. http://p.sf.net/sfu/velocityconf > > _______________________________________________ > > Clucene-cvs mailing list > > Clu...@li... > > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > > iorityqueue", testpriorityqueue}, > > {"queryparser", testQueryParser}, > > - //{"mfqueryparser", testMultiFieldQueryParser}, > > + {"mfqueryparser", testMultiFieldQueryParser}, > > {"search", testsearch}, > > {"queries", testqueries}, > > {"termvector",testtermvector}, > > > > > > This was sent by the SourceForge.net collaborative development > > platform, the world's largest Open Source development site. > > > > --- > > --- > > --- > > > --------------------------------------------------------------------- > > Register Now & Save for Velocity, the Web Performance & Operations > > Conference from O'Reilly Media. Velocity features a full day of > > expert-led, hands-on workshops and two days of sessions > from industry > > leaders in dedicated Performance & Operations tracks. Use code > > vel09scf > > and Save an extra 15% before 5/3. http://p.sf.net/sfu/velocityconf > > _______________________________________________ > > Clucene-cvs mailing list > > Clu...@li... > > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > > istinfo/clucene-cvs > > -------------------------------------------------------------- > ---------------- > The NEW KODAK i700 Series Scanners deliver under ANY > circumstances! Your > production scanning environment may not be a perfect world - > but thanks to > Kodak, there's a perfect scanner to get the job done! With > the NEW KODAK i700 > Series Scanner you'll get full speed at 300 dpi even with all image > processing features enabled. http://p.sf.net/sfu/kodak-com > _______________________________________________ > Clucene-cvs mailing list > Clu...@li... > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > > |
From: Ben V. K. <bva...@gm...> - 2009-05-05 05:54:36
|
Looks like u might have got some binary data in your commit somehow. Any idea what that is? Says "binary file a differ" B Verstuurd vanaf mijn iPhone Op 4 mei 2009 om 16:46 heeft syn...@us... het volgende geschreven:\ > Revision: 3006 > http://clucene.svn.sourceforge.net/clucene/?rev=3006&view=rev > Author: synhershko > Date: 2009-05-04 14:46:40 +0000 (Mon, 04 May 2009) > > Log Message: > ----------- > MultiFieldQueryParser is back, now completely conforms with JL 2.3.2 > > Modified Paths: > -------------- > branches/lucene2_3_2/src/core/CLucene/files_list.txt > branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.cpp > branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.h > branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h > branches/lucene2_3_2/src/core/CMakeLists.txt > branches/lucene2_3_2/src/test/queryParser/ > TestMultiFieldQueryParser.cpp > branches/lucene2_3_2/src/test/tests.cpp > > Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt > =================================================================== > (Binary files differ) > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.cpp > =================================================================== > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) > @@ -21,76 +21,18 @@ > CL_NS_DEF(queryParser) > > > -MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** fields, > CL_NS(analysis)::Analyzer* a, BoostMap* boosts): > - QueryParser(NULL,a) > +MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** _fields, > CL_NS(analysis)::Analyzer* a, BoostMap* _boosts): > + QueryParser(NULL,a), fields(_fields), boosts(_boosts) > { > - this->fields = fields; > - this->boosts = boosts; > } > MultiFieldQueryParser::~MultiFieldQueryParser(){ > } > > -//static > -Query* MultiFieldQueryParser::parse(const TCHAR* query, const > TCHAR** fields, Analyzer* analyzer) > -{ > - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); > - int32_t i = 0; > - while ( fields[i] != NULL ){ > - Query* q = QueryParser::parse(query, fields[i], analyzer); > - if (q && (q->getQueryName()!=BooleanQuery::getClassName() > || ((BooleanQuery*)q)->getClauseCount() > 0)) { > - //todo: Move to using BooleanClause::Occur > - bQuery->add(q, true, false, false); > - } else { > - _CLDELETE(q); > - } > - > - i++; > - } > - return bQuery; > -} > - > -//static > -Query* MultiFieldQueryParser::parse(const TCHAR* query, const > TCHAR** fields, const uint8_t* flags, Analyzer* analyzer) > -{ > - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); > - int32_t i = 0; > - while ( fields[i] != NULL ) > - { > - Query* q = QueryParser::parse(query, fields[i], analyzer); > - if (q && (q->getQueryName()!=BooleanQuery::getClassName() > || ((BooleanQuery*)q)->getClauseCount() > 0)) { > - uint8_t flag = flags[i]; > - switch (flag) > - { > - //todo: Move to using BooleanClause::Occur > - case MultiFieldQueryParser::REQUIRED_FIELD: > - bQuery->add(q, true, true, false); > - break; > - case MultiFieldQueryParser::PROHIBITED_FIELD: > - bQuery->add(q, true, false, true); > - break; > - default: > - bQuery->add(q, true, false, false); > - break; > - } > - } else { > - _CLDELETE(q); > - } > - > - i++; > - } > - return bQuery; > -} > - > -//not static > -CL_NS(search)::Query* MultiFieldQueryParser::parse(const TCHAR* > query) { > - return parse(query, this->fields, this->analyzer); > -} > - > -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, > TCHAR* queryText, int32_t slop){ > +Query* MultiFieldQueryParser::getFieldQuery(const TCHAR* field, > const TCHAR* queryText, const int32_t slop){ > if (field == NULL) { > vector<BooleanClause*> clauses; > for (int i = 0; fields[i]!=NULL; ++i) { > - Query* q = QueryParser::GetFieldQuery(fields[i], > queryText); > + Query* q = QueryParser::getFieldQuery(fields[i], > queryText); > if (q != NULL) { > //If the user passes a map of boosts > if (boosts != NULL) { > @@ -103,116 +45,133 @@ > if (q->getQueryName() == PhraseQuery::getClassName()) { > ((PhraseQuery*)q)->setSlop(slop); > } > + // TODO: > //if (q instanceof MultiPhraseQuery) { > // ((MultiPhraseQuery) q).setSlop(slop); > //} > - q = QueryAddedCallback(fields[i], q); > - if ( q ) > - clauses.push_back(_CLNEW BooleanClause(q, true, > false,false)); > + clauses.push_back(_CLNEW BooleanClause(q, true, > BooleanClause::SHOULD)); > } > } > if (clauses.size() == 0) // happens for stopwords > return NULL; > - Query* q = QueryParser::GetBooleanQuery(clauses); > - return q; > + return QueryParser::getBooleanQuery(clauses, true); > }else{ > - Query* q = QueryParser::GetFieldQuery(field, queryText); > - if ( q ) > - q = QueryAddedCallback(field,q); > - return q; > + return QueryParser::getFieldQuery(field, queryText); > } > } > > - > -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, > TCHAR* queryText){ > - return GetFieldQuery(field, queryText, 0); > -} > - > - > -CL_NS(search)::Query* MultiFieldQueryParser::GetFuzzyQuery(const > TCHAR* field, TCHAR* termStr){ > +Query* MultiFieldQueryParser::getFuzzyQuery(const TCHAR* field, > TCHAR* termStr, const float_t minSimilarity){ > if (field == NULL) { > vector<BooleanClause*> clauses; > for (int i = 0; fields[i]!=NULL; ++i) { > - Query* q = QueryParser::GetFuzzyQuery(fields[i], > termStr); //todo: , minSimilarity > - if ( q ){ > - q = QueryAddedCallback(fields[i], q); > - if ( q ){ > - clauses.push_back(_CLNEW > BooleanClause(q,true,false,false) ); > - } > - } > + Query* q = QueryParser::getFuzzyQuery(fields[i], > termStr, minSimilarity); > + if (q) clauses.push_back(_CLNEW BooleanClause(q,true, > BooleanClause::SHOULD) ); > } > - return QueryParser::GetBooleanQuery(clauses); > - }else{ > - Query* q = QueryParser::GetFuzzyQuery(field, termStr);// > todo: , minSimilarity > - if ( q ) > - q = QueryAddedCallback(field,q); > - return q; > + return QueryParser::getBooleanQuery(clauses, true); > } > + return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); > } > > -Query* MultiFieldQueryParser::GetPrefixQuery(const TCHAR* field, > TCHAR* termStr){ > +Query* MultiFieldQueryParser::getPrefixQuery(const TCHAR* field, > TCHAR* termStr){ > if (field == NULL) { > vector<BooleanClause*> clauses; > for (int i = 0; fields[i]!=NULL; ++i) { > - Query* q = QueryParser::GetPrefixQuery(fields[i], > termStr); > - if ( q ){ > - q = QueryAddedCallback(fields[i],q); > - if ( q ){ > - clauses.push_back(_CLNEW > BooleanClause(q,true,false,false)); > - } > - } > + Query* q = QueryParser::getPrefixQuery(fields[i], > termStr); > + if (q) clauses.push_back(_CLNEW > BooleanClause(q,true,BooleanClause::SHOULD)); > } > - return QueryParser::GetBooleanQuery(clauses); > - }else{ > - Query* q = QueryParser::GetPrefixQuery(field, termStr); > - if ( q ) > - q = QueryAddedCallback(field,q); > - return q; > + return QueryParser::getBooleanQuery(clauses, true); > } > + return QueryParser::getPrefixQuery(field, termStr); > } > > -Query* MultiFieldQueryParser::GetWildcardQuery(const TCHAR* field, > TCHAR* termStr){ > +Query* MultiFieldQueryParser::getWildcardQuery(const TCHAR* field, > TCHAR* termStr){ > if (field == NULL) { > vector<BooleanClause*> clauses; > for (int i = 0; fields[i]!=NULL; ++i) { > - Query* q = QueryParser::GetWildcardQuery(fields[i], > termStr); > - if ( q ){ > - q = QueryAddedCallback(fields[i],q); > - if ( q ){ > - clauses.push_back(_CLNEW > BooleanClause(q,true,false,false)); > - } > - } > + Query* q = QueryParser::getWildcardQuery(fields[i], > termStr); > + if (q) clauses.push_back(_CLNEW > BooleanClause(q,true,BooleanClause::SHOULD)); > } > - return QueryParser::GetBooleanQuery(clauses); > - }else{ > - Query* q = QueryParser::GetWildcardQuery(field, termStr); > - if ( q ) > - q = QueryAddedCallback(field,q); > - return q; > + return QueryParser::getBooleanQuery(clauses, true); > } > + return QueryParser::getWildcardQuery(field, termStr); > } > > > -Query* MultiFieldQueryParser::GetRangeQuery(const TCHAR* field, > TCHAR* part1, TCHAR* part2, bool inclusive){ > +Query* MultiFieldQueryParser::getRangeQuery(const TCHAR* field, > TCHAR* part1, TCHAR* part2, const bool inclusive){ > if (field == NULL) { > vector<BooleanClause*> clauses; > for (int i = 0; fields[i]!=NULL; ++i) { > - Query* q = QueryParser::GetRangeQuery(fields[i], part1, > part2, inclusive); > - if ( q ){ > - q = QueryAddedCallback(fields[i],q); > - if ( q ){ > - clauses.push_back(_CLNEW > BooleanClause(q,true,false,false)); > - } > - } > + Query* q = QueryParser::getRangeQuery(fields[i], part1, > part2, inclusive); > + if (q) clauses.push_back(_CLNEW > BooleanClause(q,true,BooleanClause::SHOULD)); > } > - return QueryParser::GetBooleanQuery(clauses); > + return QueryParser::getBooleanQuery(clauses, true); > }else{ > - Query* q = QueryParser::GetRangeQuery(field, part1, part2, > inclusive); > - if ( q ) > - q = QueryAddedCallback(field,q); > - return q; > + return QueryParser::getRangeQuery(field, part1, part2, > inclusive); > } > } > > +//static > +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const > TCHAR** _fields, Analyzer* analyzer) > +{ > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > + for (size_t i = 0; _fields[i]!=NULL; i++) > + { > + if (_queries[i] == NULL) { > + _CLLDELETE(bQuery); > + _CLTHROWA(CL_ERR_IllegalArgument, "_queries.length != > _fields.length"); > + } > + // TODO: Reuse qp instead of creating it over and over again > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > + Query* q = qp->parse(_queries[i]); > + if (q!=NULL && // q never null, just being defensive > + (!(q->instanceOf(BooleanQuery::getClassName()) || > ((BooleanQuery*)q)->getClauseCount() > 0))) { > + bQuery->add(q, true, BooleanClause::SHOULD); > + } else > + _CLLDELETE(q); > + _CLLDELETE(qp); > + } > + return bQuery; > +} > > +// static > +Query* MultiFieldQueryParser::parse(const TCHAR* query, const > TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer) { > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > + for (size_t i = 0; _fields[i]!=NULL; i++) { > + if (flags[i] == NULL) { > + _CLLDELETE(bQuery); > + _CLTHROWA(CL_ERR_IllegalArgument, "_fields.length != > flags.length"); > + } > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > + Query* q = qp->parse(query); > + if (q!=NULL && // q never null, just being defensive > + (!(q->instanceOf(BooleanQuery::getClassName())) || > ((BooleanQuery*)q)->getClauseCount()>0)) { > + bQuery->add(q, true, (BooleanClause::Occur)flags[i]); > + } else > + _CLLDELETE(q); > + _CLLDELETE(qp); > + } > + return bQuery; > +} > + > +//static > +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const > TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer){ > + BooleanQuery* bQuery = _CLNEW BooleanQuery(); > + for (size_t i = 0; _fields[i]!=NULL; i++) > + { > + if (_queries[i] == NULL || flags[i] == NULL) { > + _CLLDELETE(bQuery); > + _CLTHROWA(CL_ERR_IllegalArgument, "_queries, _fields, > and flags array have have different length"); > + } > + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); > + Query* q = qp->parse(_queries[i]); > + if (q!=NULL && // q never null, just being defensive > + (!(q->instanceOf(BooleanQuery::getClassName())) || > ((BooleanQuery*)q)->getClauseCount()>0)) { > + bQuery->add(q, true, (BooleanClause::Occur)flags[i]); > + } else > + _CLLDELETE(q); > + _CLLDELETE(qp); > + } > + return bQuery; > +} > + > CL_NS_END > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.h > =================================================================== > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > MultiFieldQueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) > @@ -24,112 +24,142 @@ > CL_NS(util)::Deletor::DummyFloat >> BoostMap; > > - /** > - * A QueryParser which constructs queries to search multiple > fields. > - * > - */ > - class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser > - { > - protected: > - const TCHAR** fields; > - BoostMap* boosts; > - public: > - LUCENE_STATIC_CONSTANT(uint8_t, NORMAL_FIELD=0); > - LUCENE_STATIC_CONSTANT(uint8_t, REQUIRED_FIELD=1); > - LUCENE_STATIC_CONSTANT(uint8_t, PROHIBITED_FIELD=2); > - > - /** > - * Creates a MultiFieldQueryParser. > - * > - * <p>It will, when parse(String query) > - * is called, construct a query like this (assuming the > query consists of > - * two terms and you specify the two fields <code>title</ > code> and <code>body</code>):</p> > - * > - * <code> > - * (title:term1 body:term1) (title:term2 body:term2) > - * </code> > - * > - * <p>When setDefaultOperator(AND_OPERATOR) is set, the > result will be:</p> > - * > - * <code> > - * +(title:term1 body:term1) +(title:term2 body:term2) > - * </code> > - * > - * <p>In other words, all the query's terms must appear, but > it doesn't matter in > - * what fields they appear.</p> > - */ > - MultiFieldQueryParser(const TCHAR** fields, > CL_NS(analysis)::Analyzer* a, BoostMap* boosts = NULL); > - virtual ~MultiFieldQueryParser(); > - > - /** > - * <p> > - * Parses a query which searches on the fields specified. > - * <p> > - * If x fields are specified, this effectively constructs: > - * <pre> > - * <code> > - * (field1:query) (field2:query) (field3:query)... > (fieldx:query) > - * </code> > - * </pre> > - * > - * @param query Query string to parse > - * @param fields Fields to search on > - * @param analyzer Analyzer to use > - * @throws ParserException if query parsing fails > - * @throws TokenMgrError if query parsing fails > - */ > - static CL_NS(search)::Query* parse(const TCHAR* query, > const TCHAR** fields, CL_NS(analysis)::Analyzer* analyzer); > - > - /** > - * <p> > - * Parses a query, searching on the fields specified. > - * Use this if you need to specify certain fields as > required, > - * and others as prohibited. > - * <p><pre> > - * Usage: > - * <code> > - * TCHAR** fields = {"filename", "contents", "description"}; > - * int8_t* flags = {MultiFieldQueryParser::NORMAL FIELD, > - * MultiFieldQueryParser::REQUIRED FIELD, > - * MultiFieldQueryParser::PROHIBITED FIELD}; > - * parse(query, fields, flags, analyzer); > - * </code> > - * </pre> > - *<p> > - * The code above would construct a query: > - * <pre> > - * <code> > - * (filename:query) +(contents:query) -(description:query) > - * </code> > - * </pre> > - * > - * @param query Query string to parse > - * @param fields Fields to search on > - * @param flags Flags describing the fields > - * @param analyzer Analyzer to use > - * @throws ParserException if query parsing fails > - * @throws TokenMgrError if query parsing fails > - */ > - static CL_NS(search)::Query* parse(const TCHAR* query, > const TCHAR** fields, const uint8_t* flags, > CL_NS(analysis)::Analyzer* analyzer); > +/** > +* A QueryParser which constructs queries to search multiple fields. > +* > +*/ > +class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser > +{ > +protected: > + const TCHAR** fields; > + BoostMap* boosts; > +public: > + /** > + * Creates a MultiFieldQueryParser. > + * Allows passing of a map with term to Boost, and the boost to > apply to each term. > + * > + * <p>It will, when parse(String query) > + * is called, construct a query like this (assuming the query > consists of > + * two terms and you specify the two fields <code>title</code> > and <code>body</code>):</p> > + * > + * <code> > + * (title:term1 body:term1) (title:term2 body:term2) > + * </code> > + * > + * <p>When setDefaultOperator(AND_OPERATOR) is set, the result > will be:</p> > + * > + * <code> > + * +(title:term1 body:term1) +(title:term2 body:term2) > + * </code> > + * > + * <p>When you pass a boost (title=>5 body=>10) you can get </p> > + * > + * <code> > + * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 > body:term2^10.0) > + * </code> > + * > + * <p>In other words, all the query's terms must appear, but it > doesn't matter in > + * what fields they appear.</p> > + */ > + MultiFieldQueryParser(const TCHAR** _fields, > CL_NS(analysis)::Analyzer* a, BoostMap* _boosts = NULL); > + virtual ~MultiFieldQueryParser(); > > - // non-static version of the above > - CL_NS(search)::Query* parse(const TCHAR* query); > +protected: > + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const > TCHAR* queryText, const int32_t slop); > + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const > TCHAR* queryText) { return getFieldQuery(field,queryText,0); } > + CL_NS(search)::Query* getFuzzyQuery(const TCHAR* field, TCHAR* > termStr, const float_t minSimilarity); > + CL_NS(search)::Query* getPrefixQuery(const TCHAR* field, TCHAR* > termStr); > + CL_NS(search)::Query* getWildcardQuery(const TCHAR* field, > TCHAR* termStr); > + CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* > part1, TCHAR* part2, const bool inclusive); > > - protected: > - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, > TCHAR* queryText); > - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, > TCHAR* queryText, int32_t slop); > - CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field, > TCHAR* termStr); > - CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, > TCHAR* part1, TCHAR* part2, bool inclusive); > - CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field, > TCHAR* termStr); > - CL_NS(search)::Query* GetWildcardQuery(const TCHAR* field, > TCHAR* termStr); > +public: > + /** > + * Parses a query which searches on the fields specified. > + * <p> > + * If x fields are specified, this effectively constructs: > + * <pre> > + * <code> > + * (field1:query1) (field2:query2) (field3:query3)... > (fieldx:queryx) > + * </code> > + * </pre> > + * @param queries Queries strings to parse > + * @param fields Fields to search on > + * @param analyzer Analyzer to use > + * @throws ParseException if query parsing fails > + * @throws IllegalArgumentException if the length of the queries > array differs > + * from the length of the fields array > + */ > + static CL_NS(search)::Query* parse(const TCHAR** _queries, > const TCHAR** _fields, > + CL_NS(analysis)::Analyzer* analyzer); > > - /** > - * A special virtual function for the MultiFieldQueryParser > which can be used > - * to clean up queries. Once the field name is known and the > query has been > - * created, its passed to this function. > - * An example of this usage is to set boosts. > - */ > - virtual CL_NS(search)::Query* QueryAddedCallback(const > TCHAR* field, CL_NS(search)::Query* query){ return query; } > - }; > + /** > + * Parses a query, searching on the fields specified. > + * Use this if you need to specify certain fields as required, > + * and others as prohibited. > + * <p><pre> > + * Usage: > + * <code> > + * String[] fields = {"filename", "contents", "description"}; > + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, > + * BooleanClause.Occur.MUST, > + * BooleanClause.Occur.MUST_NOT}; > + * MultiFieldQueryParser.parse("query", fields, flags, analyzer); > + * </code> > + * </pre> > + *<p> > + * The code above would construct a query: > + * <pre> > + * <code> > + * (filename:query) +(contents:query) -(description:query) > + * </code> > + * </pre> > + * > + * @param query Query string to parse > + * @param fields Fields to search on > + * @param flags Flags describing the fields > + * @param analyzer Analyzer to use > + * @throws ParseException if query parsing fails > + * @throws IllegalArgumentException if the length of the fields > array differs > + * from the length of the flags array > + */ > + static CL_NS(search)::Query* parse(const TCHAR* query, const > TCHAR** _fields, > + const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); > + > + /** > + * Parses a query, searching on the fields specified. > + * Use this if you need to specify certain fields as required, > + * and others as prohibited. > + * <p><pre> > + * Usage: > + * <code> > + * String[] query = {"query1", "query2", "query3"}; > + * String[] fields = {"filename", "contents", "description"}; > + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, > + * BooleanClause.Occur.MUST, > + * BooleanClause.Occur.MUST_NOT}; > + * MultiFieldQueryParser.parse(query, fields, flags, analyzer); > + * </code> > + * </pre> > + *<p> > + * The code above would construct a query: > + * <pre> > + * <code> > + * (filename:query1) +(contents:query2) -(description:query3) > + * </code> > + * </pre> > + * > + * @param queries Queries string to parse > + * @param fields Fields to search on > + * @param flags Flags describing the fields > + * @param analyzer Analyzer to use > + * @throws ParseException if query parsing fails > + * @throws IllegalArgumentException if the length of the > queries, fields, > + * and flags array differ > + */ > + static CL_NS(search)::Query* parse(const TCHAR** _queries, > const TCHAR** _fields, const uint8_t* flags, > + CL_NS(analysis)::Analyzer* analyzer); > + > + CL_NS(search)::Query* parse(const TCHAR* _query){return > QueryParser::parse(_query);} > +}; > CL_NS_END > #endif > > Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/ > QueryParser.h > =================================================================== > --- branches/lucene2_3_2/src/core/CLucene/queryParser/ > QueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) > +++ branches/lucene2_3_2/src/core/CLucene/queryParser/ > QueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) > @@ -322,7 +322,7 @@ > /** > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const > TCHAR* queryText); > + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* > _field, const TCHAR* queryText); > > /** > * Base implementation delegates to {@link > #getFieldQuery(String,String)}. > @@ -331,12 +331,12 @@ > * > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const > TCHAR* queryText, const int32_t slop); > + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* > _field, const TCHAR* queryText, const int32_t slop); > > /** > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* > part1, TCHAR* part2, const bool inclusive); > + virtual CL_NS(search)::Query* getRangeQuery(const TCHAR* field, > TCHAR* part1, TCHAR* part2, const bool inclusive); > > /** > * Factory method for generating query, given a set of clauses. > @@ -375,7 +375,7 @@ > * @return Resulting {@link Query} built for the term > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getWildcardQuery(const TCHAR* _field, > TCHAR* termStr); > + virtual CL_NS(search)::Query* getWildcardQuery(const TCHAR* > _field, TCHAR* termStr); > > /** > * Factory method for generating a query (similar to > @@ -400,7 +400,7 @@ > * @return Resulting {@link Query} built for the term > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getPrefixQuery(const TCHAR* _field, > TCHAR* _termStr); > + virtual CL_NS(search)::Query* getPrefixQuery(const TCHAR* > _field, TCHAR* _termStr); > > /** > * Factory method for generating a query (similar to > @@ -413,7 +413,7 @@ > * @return Resulting {@link Query} built for the term > * @exception ParseException throw in overridden method to disallow > */ > - CL_NS(search)::Query* getFuzzyQuery(const TCHAR* _field, TCHAR* > termStr, const float_t minSimilarity); > + virtual CL_NS(search)::Query* getFuzzyQuery(const TCHAR* > _field, TCHAR* termStr, const float_t minSimilarity); > > private: > /** > > Modified: branches/lucene2_3_2/src/core/CMakeLists.txt > =================================================================== > --- branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-03 > 21:01:38 UTC (rev 3005) > +++ branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-04 > 14:46:40 UTC (rev 3006) > @@ -29,7 +29,7 @@ > ./CLucene/util/StringIntern.cpp > ./CLucene/util/BitSet.cpp > ./CLucene/queryParser/FastCharStream.cpp > - #./CLucene/queryParser/MultiFieldQueryParser.cpp > + ./CLucene/queryParser/MultiFieldQueryParser.cpp > ./CLucene/queryParser/QueryParser.cpp > ./CLucene/queryParser/QueryParserTokenManager.cpp > ./CLucene/queryParser/Token.cpp > > Modified: branches/lucene2_3_2/src/test/queryParser/ > TestMultiFieldQueryParser.cpp > =================================================================== > --- branches/lucene2_3_2/src/test/queryParser/ > TestMultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) > +++ branches/lucene2_3_2/src/test/queryParser/ > TestMultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) > @@ -6,164 +6,167 @@ > --- > --- > --- > --------------------------------------------------------------------- > */ > #include "test.h" > > -//class MQPTestFilter: public TokenFilter { > -//public: > -// > -// bool inPhrase; > -// int32_t savedStart, savedEnd; > -// > -// /** > -// * Filter which discards the token 'stop' and which expands the > -// * token 'phrase' into 'phrase1 phrase2' > -// */ > -// MQPTestFilter(TokenStream* in): > -// TokenFilter(in,true), > -// inPhrase(false), > -// savedStart(0), > -// savedEnd(0) > -// { > -// } > -// > -// bool next(CL_NS(analysis)::Token* token) { > -// if (inPhrase) { > -// inPhrase = false; > -// token->set( _T("phrase2"), savedStart, savedEnd); > -// return true; > -// }else{ > -// while( input->next(token) ){ > -// if ( _tcscmp(token->termBuffer(), _T("phrase")) > == 0 ) { > -// inPhrase = true; > -// savedStart = token->startOffset(); > -// savedEnd = token->endOffset(); > -// token->set( _T("phrase1"), savedStart, > savedEnd); > -// return true; > -// }else if ( _tcscmp(token->termBuffer(), > _T("stop") ) !=0 ){ > -// return true; > -// } > -// } > -// } > -// return false; > -// } > -//}; > -// > -//class MQPTestAnalyzer: public Analyzer { > -//public: > -// MQPTestAnalyzer() { > -// } > -// > -// /** Filters LowerCaseTokenizer with StopFilter. */ > -// TokenStream* tokenStream(const TCHAR* fieldName, Reader* > reader) { > -// return _CLNEW MQPTestFilter(_CLNEW > LowerCaseTokenizer(reader)); > -// } > -//}; > -// > -//void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { > -// if ( q == NULL ) > -// return; > -// > -// const TCHAR* s = q->toString(); > -// int ret = _tcscmp(s,result); > -// _CLDELETE(q); > -// if ( ret != 0 ) { > -// TCHAR buf[HUGE_STRING_LEN]; > -// _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query > yielded /%s/, expecting /%s/\n"), s, result); > -// _CLDELETE_LCARRAY(s); > -// CuFail(tc, buf); > -// } > -// _CLDELETE_LCARRAY(s); > -//} > -// > -//// verify parsing of query using a stopping analyzer > -//void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const > TCHAR* expectedRes) { > -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; > -// //Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; > -// MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); > -// MultiFieldQueryParser mfqp(fields, a); > -// > -// Query *q = mfqp.parse(qtxt); > -// assertEquals(tc, expectedRes, q); > -// > -// q = MultiFieldQueryParser::parse(qtxt, fields, a); > -// assertEquals(tc, expectedRes, q); > -// _CLDELETE(a); > -//} > -// > -///** test stop words arsing for both the non static form, and for > the > -//* corresponding static form (qtxt, fields[]). */ > -//void tesStopwordsParsing(CuTest *tc) { > -// assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); > -// assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); > -// assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); > -// assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one > t:one")); > -// assertStopQueryEquals(tc, _T("stop"), _T("")); > -// assertStopQueryEquals(tc, _T("(stop)"), _T("")); > -// assertStopQueryEquals(tc, _T("((stop))"), _T("")); > -//} > -// > -//void testMFQPSimple(CuTest *tc) { > -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; > -// Analyzer* a = _CLNEW StandardAnalyzer(); > -// MultiFieldQueryParser mfqp(fields, a); > -// > -// Query *q = mfqp.parse(_T("one")); > -// assertEquals(tc, _T("b:one t:one"), q); > -// > -// q = mfqp.parse(_T("one two")); > -// assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); > -// > -// q = mfqp.parse(_T("+one +two")); > -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > -// > -// q = mfqp.parse(_T("+one -two -three")); > -// assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three > t:three)"), q); > -// > -// q = mfqp.parse(_T("one^2 two")); > -// assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); > -// > -// q = mfqp.parse(_T("one~ two")); > -// assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); > -// > -// q = mfqp.parse(_T("one~0.8 two^2")); > -// assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two > t:two)^2.0)"), q); > -// > -// q = mfqp.parse(_T("one* two*")); > -// assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); > -// > -// q = mfqp.parse(_T("[a TO c] two")); > -// assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two t:two)"), > q); > -// > -// q = mfqp.parse(_T("w?ldcard")); > -// assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); > -// > -// q = mfqp.parse(_T("\"foo bar\"")); > -// assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); > -// > -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > -// assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd > ee\" t:\"dd ee\")"), q); > -// > -// q = mfqp.parse(_T("\"foo bar\"~4")); > -// assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); > -// > -// // make sure that terms which have a field are not touched: > -// q = mfqp.parse(_T("one f:two")); > -// assertEquals(tc, _T("(b:one t:one) f:two"), q); > -// > -// // AND mode: > -// mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); > -// q = mfqp.parse(_T("one two")); > -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > -// assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b: > \"dd ee\" t:\"dd ee\")"), q); > -// > -// _CLDELETE(a); > -//} > -// > -//CuSuite *testMultiFieldQueryParser(void) > -//{ > -// CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); > -// > -// SUITE_ADD_TEST(suite, tesStopwordsParsing); > -// SUITE_ADD_TEST(suite, testMFQPSimple); > -// > -// return suite; > -//} > \ No newline at end of file > +class MQPTestFilter: public TokenFilter { > +public: > + > + bool inPhrase; > + int32_t savedStart, savedEnd; > + > + /** > + * Filter which discards the token 'stop' and which expands the > + * token 'phrase' into 'phrase1 phrase2' > + */ > + MQPTestFilter(TokenStream* in): > + TokenFilter(in,true), > + inPhrase(false), > + savedStart(0), > + savedEnd(0) > + { > + } > + > + CL_NS(analysis)::Token* next(CL_NS(analysis)::Token*& token) { > + if (inPhrase) { > + if (token == NULL) token=_CLNEW CL_NS(analysis)::Token(); > + inPhrase = false; > + token->set( _T("phrase2"), savedStart, savedEnd); > + return token; > + }else{ > + while( input->next(token) ){ > + if ( _tcscmp(token->termBuffer(), _T("phrase")) == > 0 ) { > + inPhrase = true; > + savedStart = token->startOffset(); > + savedEnd = token->endOffset(); > + token->set( _T("phrase1"), savedStart, savedEnd); > + return token; > + }else if ( _tcscmp(token->termBuffer(), > _T("stop") ) !=0 ){ > + return token; > + } > + } > + } > + _CLDELETE(token); > + return NULL; > + } > +}; > + > +class MQPTestAnalyzer: public Analyzer { > +public: > + MQPTestAnalyzer() { > + } > + > + /** Filters LowerCaseTokenizer with StopFilter. */ > + TokenStream* tokenStream(const TCHAR* fieldName, Reader* > reader) { > + return _CLNEW MQPTestFilter(_CLNEW > LowerCaseTokenizer(reader)); > + } > +}; > + > +void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { > + if ( q == NULL ) > + return; > + > + const TCHAR* s = q->toString(); > + int ret = _tcscmp(s,result); > + _CLDELETE(q); > + if ( ret != 0 ) { > + TCHAR buf[HUGE_STRING_LEN]; > + _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query yielded / > %s/, expecting /%s/\n"), s, result); > + _CLDELETE_LCARRAY(s); > + CuFail(tc, buf); > + } > + _CLDELETE_LCARRAY(s); > +} > + > +// verify parsing of query using a stopping analyzer > +void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const > TCHAR* expectedRes) { > + const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; > + const uint8_t occur[] = {BooleanClause::SHOULD, > BooleanClause::SHOULD, NULL}; > + MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); > + MultiFieldQueryParser mfqp(fields, a); > + > + Query *q = mfqp.parse(qtxt); > + assertEquals(tc, expectedRes, q); > + > + q = MultiFieldQueryParser::parse(qtxt, reinterpret_cast<const > TCHAR**>(&fields), > + reinterpret_cast<const uint8_t*>(&occur), a); > + assertEquals(tc, expectedRes, q); > + _CLDELETE(a); > +} > + > +/** test stop words arsing for both the non static form, and for the > +* corresponding static form (qtxt, fields[]). */ > +void tesStopwordsParsing(CuTest *tc) { > + assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); > + assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); > + assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); > + assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one t:one")); > + assertStopQueryEquals(tc, _T("stop"), _T("")); > + assertStopQueryEquals(tc, _T("(stop)"), _T("")); > + assertStopQueryEquals(tc, _T("((stop))"), _T("")); > +} > + > +void testMFQPSimple(CuTest *tc) { > + const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; > + Analyzer* a = _CLNEW StandardAnalyzer(); > + MultiFieldQueryParser mfqp(fields, a); > + > + Query *q = mfqp.parse(_T("one")); > + assertEquals(tc, _T("b:one t:one"), q); > + > + q = mfqp.parse(_T("one two")); > + assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); > + > + q = mfqp.parse(_T("+one +two")); > + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > + > + q = mfqp.parse(_T("+one -two -three")); > + assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three > t:three)"), q); > + > + q = mfqp.parse(_T("one^2 two")); > + assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); > + > + q = mfqp.parse(_T("one~ two")); > + assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); > + > + q = mfqp.parse(_T("one~0.8 two^2")); > + assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two > t:two)^2.0)"), q); > + > + q = mfqp.parse(_T("one* two*")); > + assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); > + > + q = mfqp.parse(_T("[a TO c] two")); > + assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two t:two)"), q); > + > + q = mfqp.parse(_T("w?ldcard")); > + assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); > + > + q = mfqp.parse(_T("\"foo bar\"")); > + assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); > + > + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > + assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee > \" t:\"dd ee\")"), q); > + > + q = mfqp.parse(_T("\"foo bar\"~4")); > + assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); > + > + // make sure that terms which have a field are not touched: > + q = mfqp.parse(_T("one f:two")); > + assertEquals(tc, _T("(b:one t:one) f:two"), q); > + > + // AND mode: > + mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); > + q = mfqp.parse(_T("one two")); > + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); > + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); > + assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd > ee\" t:\"dd ee\")"), q); > + > + _CLDELETE(a); > +} > + > +CuSuite *testMultiFieldQueryParser(void) > +{ > + CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); > + > + SUITE_ADD_TEST(suite, tesStopwordsParsing); > + SUITE_ADD_TEST(suite, testMFQPSimple); > + > + return suite; > +} > \ No newline at end of file > > Modified: branches/lucene2_3_2/src/test/tests.cpp > =================================================================== > --- branches/lucene2_3_2/src/test/tests.cpp 2009-05-03 21:01:38 > UTC (rev 3005) > +++ branches/lucene2_3_2/src/test/tests.cpp 2009-05-04 14:46:40 > UTC (rev 3006) > @@ -16,7 +16,7 @@ > {"highfreq", testhighfreq}, > {"priorityqueue", testpriorityqueue}, > {"queryparser", testQueryParser}, > - //{"mfqueryparser", testMultiFieldQueryParser}, > + {"mfqueryparser", testMultiFieldQueryParser}, > {"search", testsearch}, > {"queries", testqueries}, > {"termvector",testtermvector}, > > > This was sent by the SourceForge.net collaborative development > platform, the world's largest Open Source development site. > > --- > --- > --- > --------------------------------------------------------------------- > Register Now & Save for Velocity, the Web Performance & Operations > Conference from O'Reilly Media. Velocity features a full day of > expert-led, hands-on workshops and two days of sessions from industry > leaders in dedicated Performance & Operations tracks. Use code > vel09scf > and Save an extra 15% before 5/3. http://p.sf.net/sfu/velocityconf > _______________________________________________ > Clucene-cvs mailing list > Clu...@li... > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > iorityqueue", testpriorityqueue}, > {"queryparser", testQueryParser}, > - //{"mfqueryparser", testMultiFieldQueryParser}, > + {"mfqueryparser", testMultiFieldQueryParser}, > {"search", testsearch}, > {"queries", testqueries}, > {"termvector",testtermvector}, > > > This was sent by the SourceForge.net collaborative development > platform, the world's largest Open Source development site. > > --- > --- > --- > --------------------------------------------------------------------- > Register Now & Save for Velocity, the Web Performance & Operations > Conference from O'Reilly Media. Velocity features a full day of > expert-led, hands-on workshops and two days of sessions from industry > leaders in dedicated Performance & Operations tracks. Use code > vel09scf > and Save an extra 15% before 5/3. http://p.sf.net/sfu/velocityconf > _______________________________________________ > Clucene-cvs mailing list > Clu...@li... > https://lists.sourceforge.net/lists/listinfo/clucene-cvs > istinfo/clucene-cvs |
From: <syn...@us...> - 2009-05-04 14:46:48
|
Revision: 3006 http://clucene.svn.sourceforge.net/clucene/?rev=3006&view=rev Author: synhershko Date: 2009-05-04 14:46:40 +0000 (Mon, 04 May 2009) Log Message: ----------- MultiFieldQueryParser is back, now completely conforms with JL 2.3.2 Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/files_list.txt branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CMakeLists.txt branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp branches/lucene2_3_2/src/test/tests.cpp Modified: branches/lucene2_3_2/src/core/CLucene/files_list.txt =================================================================== (Binary files differ) Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) @@ -21,76 +21,18 @@ CL_NS_DEF(queryParser) -MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** fields, CL_NS(analysis)::Analyzer* a, BoostMap* boosts): - QueryParser(NULL,a) +MultiFieldQueryParser::MultiFieldQueryParser(const TCHAR** _fields, CL_NS(analysis)::Analyzer* a, BoostMap* _boosts): + QueryParser(NULL,a), fields(_fields), boosts(_boosts) { - this->fields = fields; - this->boosts = boosts; } MultiFieldQueryParser::~MultiFieldQueryParser(){ } -//static -Query* MultiFieldQueryParser::parse(const TCHAR* query, const TCHAR** fields, Analyzer* analyzer) -{ - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); - int32_t i = 0; - while ( fields[i] != NULL ){ - Query* q = QueryParser::parse(query, fields[i], analyzer); - if (q && (q->getQueryName()!=BooleanQuery::getClassName() || ((BooleanQuery*)q)->getClauseCount() > 0)) { - //todo: Move to using BooleanClause::Occur - bQuery->add(q, true, false, false); - } else { - _CLDELETE(q); - } - - i++; - } - return bQuery; -} - -//static -Query* MultiFieldQueryParser::parse(const TCHAR* query, const TCHAR** fields, const uint8_t* flags, Analyzer* analyzer) -{ - BooleanQuery* bQuery = _CLNEW BooleanQuery( true ); - int32_t i = 0; - while ( fields[i] != NULL ) - { - Query* q = QueryParser::parse(query, fields[i], analyzer); - if (q && (q->getQueryName()!=BooleanQuery::getClassName() || ((BooleanQuery*)q)->getClauseCount() > 0)) { - uint8_t flag = flags[i]; - switch (flag) - { - //todo: Move to using BooleanClause::Occur - case MultiFieldQueryParser::REQUIRED_FIELD: - bQuery->add(q, true, true, false); - break; - case MultiFieldQueryParser::PROHIBITED_FIELD: - bQuery->add(q, true, false, true); - break; - default: - bQuery->add(q, true, false, false); - break; - } - } else { - _CLDELETE(q); - } - - i++; - } - return bQuery; -} - -//not static -CL_NS(search)::Query* MultiFieldQueryParser::parse(const TCHAR* query) { - return parse(query, this->fields, this->analyzer); -} - -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop){ +Query* MultiFieldQueryParser::getFieldQuery(const TCHAR* field, const TCHAR* queryText, const int32_t slop){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { - Query* q = QueryParser::GetFieldQuery(fields[i], queryText); + Query* q = QueryParser::getFieldQuery(fields[i], queryText); if (q != NULL) { //If the user passes a map of boosts if (boosts != NULL) { @@ -103,116 +45,133 @@ if (q->getQueryName() == PhraseQuery::getClassName()) { ((PhraseQuery*)q)->setSlop(slop); } + // TODO: //if (q instanceof MultiPhraseQuery) { // ((MultiPhraseQuery) q).setSlop(slop); //} - q = QueryAddedCallback(fields[i], q); - if ( q ) - clauses.push_back(_CLNEW BooleanClause(q, true, false,false)); + clauses.push_back(_CLNEW BooleanClause(q, true, BooleanClause::SHOULD)); } } if (clauses.size() == 0) // happens for stopwords return NULL; - Query* q = QueryParser::GetBooleanQuery(clauses); - return q; + return QueryParser::getBooleanQuery(clauses, true); }else{ - Query* q = QueryParser::GetFieldQuery(field, queryText); - if ( q ) - q = QueryAddedCallback(field,q); - return q; + return QueryParser::getFieldQuery(field, queryText); } } - -Query* MultiFieldQueryParser::GetFieldQuery(const TCHAR* field, TCHAR* queryText){ - return GetFieldQuery(field, queryText, 0); -} - - -CL_NS(search)::Query* MultiFieldQueryParser::GetFuzzyQuery(const TCHAR* field, TCHAR* termStr){ +Query* MultiFieldQueryParser::getFuzzyQuery(const TCHAR* field, TCHAR* termStr, const float_t minSimilarity){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { - Query* q = QueryParser::GetFuzzyQuery(fields[i], termStr); //todo: , minSimilarity - if ( q ){ - q = QueryAddedCallback(fields[i], q); - if ( q ){ - clauses.push_back(_CLNEW BooleanClause(q,true,false,false) ); - } - } + Query* q = QueryParser::getFuzzyQuery(fields[i], termStr, minSimilarity); + if (q) clauses.push_back(_CLNEW BooleanClause(q,true, BooleanClause::SHOULD) ); } - return QueryParser::GetBooleanQuery(clauses); - }else{ - Query* q = QueryParser::GetFuzzyQuery(field, termStr);//todo: , minSimilarity - if ( q ) - q = QueryAddedCallback(field,q); - return q; + return QueryParser::getBooleanQuery(clauses, true); } + return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); } -Query* MultiFieldQueryParser::GetPrefixQuery(const TCHAR* field, TCHAR* termStr){ +Query* MultiFieldQueryParser::getPrefixQuery(const TCHAR* field, TCHAR* termStr){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { - Query* q = QueryParser::GetPrefixQuery(fields[i], termStr); - if ( q ){ - q = QueryAddedCallback(fields[i],q); - if ( q ){ - clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); - } - } + Query* q = QueryParser::getPrefixQuery(fields[i], termStr); + if (q) clauses.push_back(_CLNEW BooleanClause(q,true,BooleanClause::SHOULD)); } - return QueryParser::GetBooleanQuery(clauses); - }else{ - Query* q = QueryParser::GetPrefixQuery(field, termStr); - if ( q ) - q = QueryAddedCallback(field,q); - return q; + return QueryParser::getBooleanQuery(clauses, true); } + return QueryParser::getPrefixQuery(field, termStr); } -Query* MultiFieldQueryParser::GetWildcardQuery(const TCHAR* field, TCHAR* termStr){ +Query* MultiFieldQueryParser::getWildcardQuery(const TCHAR* field, TCHAR* termStr){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { - Query* q = QueryParser::GetWildcardQuery(fields[i], termStr); - if ( q ){ - q = QueryAddedCallback(fields[i],q); - if ( q ){ - clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); - } - } + Query* q = QueryParser::getWildcardQuery(fields[i], termStr); + if (q) clauses.push_back(_CLNEW BooleanClause(q,true,BooleanClause::SHOULD)); } - return QueryParser::GetBooleanQuery(clauses); - }else{ - Query* q = QueryParser::GetWildcardQuery(field, termStr); - if ( q ) - q = QueryAddedCallback(field,q); - return q; + return QueryParser::getBooleanQuery(clauses, true); } + return QueryParser::getWildcardQuery(field, termStr); } -Query* MultiFieldQueryParser::GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive){ +Query* MultiFieldQueryParser::getRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, const bool inclusive){ if (field == NULL) { vector<BooleanClause*> clauses; for (int i = 0; fields[i]!=NULL; ++i) { - Query* q = QueryParser::GetRangeQuery(fields[i], part1, part2, inclusive); - if ( q ){ - q = QueryAddedCallback(fields[i],q); - if ( q ){ - clauses.push_back(_CLNEW BooleanClause(q,true,false,false)); - } - } + Query* q = QueryParser::getRangeQuery(fields[i], part1, part2, inclusive); + if (q) clauses.push_back(_CLNEW BooleanClause(q,true,BooleanClause::SHOULD)); } - return QueryParser::GetBooleanQuery(clauses); + return QueryParser::getBooleanQuery(clauses, true); }else{ - Query* q = QueryParser::GetRangeQuery(field, part1, part2, inclusive); - if ( q ) - q = QueryAddedCallback(field,q); - return q; + return QueryParser::getRangeQuery(field, part1, part2, inclusive); } } +//static +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const TCHAR** _fields, Analyzer* analyzer) +{ + BooleanQuery* bQuery = _CLNEW BooleanQuery(); + for (size_t i = 0; _fields[i]!=NULL; i++) + { + if (_queries[i] == NULL) { + _CLLDELETE(bQuery); + _CLTHROWA(CL_ERR_IllegalArgument, "_queries.length != _fields.length"); + } + // TODO: Reuse qp instead of creating it over and over again + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); + Query* q = qp->parse(_queries[i]); + if (q!=NULL && // q never null, just being defensive + (!(q->instanceOf(BooleanQuery::getClassName()) || ((BooleanQuery*)q)->getClauseCount() > 0))) { + bQuery->add(q, true, BooleanClause::SHOULD); + } else + _CLLDELETE(q); + _CLLDELETE(qp); + } + return bQuery; +} +// static +Query* MultiFieldQueryParser::parse(const TCHAR* query, const TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer) { + BooleanQuery* bQuery = _CLNEW BooleanQuery(); + for (size_t i = 0; _fields[i]!=NULL; i++) { + if (flags[i] == NULL) { + _CLLDELETE(bQuery); + _CLTHROWA(CL_ERR_IllegalArgument, "_fields.length != flags.length"); + } + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); + Query* q = qp->parse(query); + if (q!=NULL && // q never null, just being defensive + (!(q->instanceOf(BooleanQuery::getClassName())) || ((BooleanQuery*)q)->getClauseCount()>0)) { + bQuery->add(q, true, (BooleanClause::Occur)flags[i]); + } else + _CLLDELETE(q); + _CLLDELETE(qp); + } + return bQuery; +} + +//static +Query* MultiFieldQueryParser::parse(const TCHAR** _queries, const TCHAR** _fields, const uint8_t* flags, Analyzer* analyzer){ + BooleanQuery* bQuery = _CLNEW BooleanQuery(); + for (size_t i = 0; _fields[i]!=NULL; i++) + { + if (_queries[i] == NULL || flags[i] == NULL) { + _CLLDELETE(bQuery); + _CLTHROWA(CL_ERR_IllegalArgument, "_queries, _fields, and flags array have have different length"); + } + QueryParser* qp = _CLNEW QueryParser(_fields[i], analyzer); + Query* q = qp->parse(_queries[i]); + if (q!=NULL && // q never null, just being defensive + (!(q->instanceOf(BooleanQuery::getClassName())) || ((BooleanQuery*)q)->getClauseCount()>0)) { + bQuery->add(q, true, (BooleanClause::Occur)flags[i]); + } else + _CLLDELETE(q); + _CLLDELETE(qp); + } + return bQuery; +} + CL_NS_END Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/MultiFieldQueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) @@ -24,112 +24,142 @@ CL_NS(util)::Deletor::DummyFloat > BoostMap; - /** - * A QueryParser which constructs queries to search multiple fields. - * - */ - class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser - { - protected: - const TCHAR** fields; - BoostMap* boosts; - public: - LUCENE_STATIC_CONSTANT(uint8_t, NORMAL_FIELD=0); - LUCENE_STATIC_CONSTANT(uint8_t, REQUIRED_FIELD=1); - LUCENE_STATIC_CONSTANT(uint8_t, PROHIBITED_FIELD=2); - - /** - * Creates a MultiFieldQueryParser. - * - * <p>It will, when parse(String query) - * is called, construct a query like this (assuming the query consists of - * two terms and you specify the two fields <code>title</code> and <code>body</code>):</p> - * - * <code> - * (title:term1 body:term1) (title:term2 body:term2) - * </code> - * - * <p>When setDefaultOperator(AND_OPERATOR) is set, the result will be:</p> - * - * <code> - * +(title:term1 body:term1) +(title:term2 body:term2) - * </code> - * - * <p>In other words, all the query's terms must appear, but it doesn't matter in - * what fields they appear.</p> - */ - MultiFieldQueryParser(const TCHAR** fields, CL_NS(analysis)::Analyzer* a, BoostMap* boosts = NULL); - virtual ~MultiFieldQueryParser(); - - /** - * <p> - * Parses a query which searches on the fields specified. - * <p> - * If x fields are specified, this effectively constructs: - * <pre> - * <code> - * (field1:query) (field2:query) (field3:query)...(fieldx:query) - * </code> - * </pre> - * - * @param query Query string to parse - * @param fields Fields to search on - * @param analyzer Analyzer to use - * @throws ParserException if query parsing fails - * @throws TokenMgrError if query parsing fails - */ - static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** fields, CL_NS(analysis)::Analyzer* analyzer); - - /** - * <p> - * Parses a query, searching on the fields specified. - * Use this if you need to specify certain fields as required, - * and others as prohibited. - * <p><pre> - * Usage: - * <code> - * TCHAR** fields = {"filename", "contents", "description"}; - * int8_t* flags = {MultiFieldQueryParser::NORMAL FIELD, - * MultiFieldQueryParser::REQUIRED FIELD, - * MultiFieldQueryParser::PROHIBITED FIELD}; - * parse(query, fields, flags, analyzer); - * </code> - * </pre> - *<p> - * The code above would construct a query: - * <pre> - * <code> - * (filename:query) +(contents:query) -(description:query) - * </code> - * </pre> - * - * @param query Query string to parse - * @param fields Fields to search on - * @param flags Flags describing the fields - * @param analyzer Analyzer to use - * @throws ParserException if query parsing fails - * @throws TokenMgrError if query parsing fails - */ - static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** fields, const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); +/** +* A QueryParser which constructs queries to search multiple fields. +* +*/ +class CLUCENE_EXPORT MultiFieldQueryParser: public QueryParser +{ +protected: + const TCHAR** fields; + BoostMap* boosts; +public: + /** + * Creates a MultiFieldQueryParser. + * Allows passing of a map with term to Boost, and the boost to apply to each term. + * + * <p>It will, when parse(String query) + * is called, construct a query like this (assuming the query consists of + * two terms and you specify the two fields <code>title</code> and <code>body</code>):</p> + * + * <code> + * (title:term1 body:term1) (title:term2 body:term2) + * </code> + * + * <p>When setDefaultOperator(AND_OPERATOR) is set, the result will be:</p> + * + * <code> + * +(title:term1 body:term1) +(title:term2 body:term2) + * </code> + * + * <p>When you pass a boost (title=>5 body=>10) you can get </p> + * + * <code> + * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + * </code> + * + * <p>In other words, all the query's terms must appear, but it doesn't matter in + * what fields they appear.</p> + */ + MultiFieldQueryParser(const TCHAR** _fields, CL_NS(analysis)::Analyzer* a, BoostMap* _boosts = NULL); + virtual ~MultiFieldQueryParser(); - // non-static version of the above - CL_NS(search)::Query* parse(const TCHAR* query); +protected: + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const TCHAR* queryText, const int32_t slop); + CL_NS(search)::Query* getFieldQuery(const TCHAR* field, const TCHAR* queryText) { return getFieldQuery(field,queryText,0); } + CL_NS(search)::Query* getFuzzyQuery(const TCHAR* field, TCHAR* termStr, const float_t minSimilarity); + CL_NS(search)::Query* getPrefixQuery(const TCHAR* field, TCHAR* termStr); + CL_NS(search)::Query* getWildcardQuery(const TCHAR* field, TCHAR* termStr); + CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, const bool inclusive); - protected: - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText); - CL_NS(search)::Query* GetFieldQuery(const TCHAR* field, TCHAR* queryText, int32_t slop); - CL_NS(search)::Query* GetFuzzyQuery(const TCHAR* field, TCHAR* termStr); - CL_NS(search)::Query* GetRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, bool inclusive); - CL_NS(search)::Query* GetPrefixQuery(const TCHAR* field, TCHAR* termStr); - CL_NS(search)::Query* GetWildcardQuery(const TCHAR* field, TCHAR* termStr); +public: + /** + * Parses a query which searches on the fields specified. + * <p> + * If x fields are specified, this effectively constructs: + * <pre> + * <code> + * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + * </code> + * </pre> + * @param queries Queries strings to parse + * @param fields Fields to search on + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries array differs + * from the length of the fields array + */ + static CL_NS(search)::Query* parse(const TCHAR** _queries, const TCHAR** _fields, + CL_NS(analysis)::Analyzer* analyzer); - /** - * A special virtual function for the MultiFieldQueryParser which can be used - * to clean up queries. Once the field name is known and the query has been - * created, its passed to this function. - * An example of this usage is to set boosts. - */ - virtual CL_NS(search)::Query* QueryAddedCallback(const TCHAR* field, CL_NS(search)::Query* query){ return query; } - }; + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + * <p><pre> + * Usage: + * <code> + * String[] fields = {"filename", "contents", "description"}; + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + * BooleanClause.Occur.MUST, + * BooleanClause.Occur.MUST_NOT}; + * MultiFieldQueryParser.parse("query", fields, flags, analyzer); + * </code> + * </pre> + *<p> + * The code above would construct a query: + * <pre> + * <code> + * (filename:query) +(contents:query) -(description:query) + * </code> + * </pre> + * + * @param query Query string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the fields array differs + * from the length of the flags array + */ + static CL_NS(search)::Query* parse(const TCHAR* query, const TCHAR** _fields, + const uint8_t* flags, CL_NS(analysis)::Analyzer* analyzer); + + /** + * Parses a query, searching on the fields specified. + * Use this if you need to specify certain fields as required, + * and others as prohibited. + * <p><pre> + * Usage: + * <code> + * String[] query = {"query1", "query2", "query3"}; + * String[] fields = {"filename", "contents", "description"}; + * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + * BooleanClause.Occur.MUST, + * BooleanClause.Occur.MUST_NOT}; + * MultiFieldQueryParser.parse(query, fields, flags, analyzer); + * </code> + * </pre> + *<p> + * The code above would construct a query: + * <pre> + * <code> + * (filename:query1) +(contents:query2) -(description:query3) + * </code> + * </pre> + * + * @param queries Queries string to parse + * @param fields Fields to search on + * @param flags Flags describing the fields + * @param analyzer Analyzer to use + * @throws ParseException if query parsing fails + * @throws IllegalArgumentException if the length of the queries, fields, + * and flags array differ + */ + static CL_NS(search)::Query* parse(const TCHAR** _queries, const TCHAR** _fields, const uint8_t* flags, + CL_NS(analysis)::Analyzer* analyzer); + + CL_NS(search)::Query* parse(const TCHAR* _query){return QueryParser::parse(_query);} +}; CL_NS_END #endif Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-05-04 14:46:40 UTC (rev 3006) @@ -322,7 +322,7 @@ /** * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText); + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText); /** * Base implementation delegates to {@link #getFieldQuery(String,String)}. @@ -331,12 +331,12 @@ * * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText, const int32_t slop); + virtual CL_NS(search)::Query* getFieldQuery(const TCHAR* _field, const TCHAR* queryText, const int32_t slop); /** * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, const bool inclusive); + virtual CL_NS(search)::Query* getRangeQuery(const TCHAR* field, TCHAR* part1, TCHAR* part2, const bool inclusive); /** * Factory method for generating query, given a set of clauses. @@ -375,7 +375,7 @@ * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getWildcardQuery(const TCHAR* _field, TCHAR* termStr); + virtual CL_NS(search)::Query* getWildcardQuery(const TCHAR* _field, TCHAR* termStr); /** * Factory method for generating a query (similar to @@ -400,7 +400,7 @@ * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getPrefixQuery(const TCHAR* _field, TCHAR* _termStr); + virtual CL_NS(search)::Query* getPrefixQuery(const TCHAR* _field, TCHAR* _termStr); /** * Factory method for generating a query (similar to @@ -413,7 +413,7 @@ * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - CL_NS(search)::Query* getFuzzyQuery(const TCHAR* _field, TCHAR* termStr, const float_t minSimilarity); + virtual CL_NS(search)::Query* getFuzzyQuery(const TCHAR* _field, TCHAR* termStr, const float_t minSimilarity); private: /** Modified: branches/lucene2_3_2/src/core/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/core/CMakeLists.txt 2009-05-04 14:46:40 UTC (rev 3006) @@ -29,7 +29,7 @@ ./CLucene/util/StringIntern.cpp ./CLucene/util/BitSet.cpp ./CLucene/queryParser/FastCharStream.cpp - #./CLucene/queryParser/MultiFieldQueryParser.cpp + ./CLucene/queryParser/MultiFieldQueryParser.cpp ./CLucene/queryParser/QueryParser.cpp ./CLucene/queryParser/QueryParserTokenManager.cpp ./CLucene/queryParser/Token.cpp Modified: branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/test/queryParser/TestMultiFieldQueryParser.cpp 2009-05-04 14:46:40 UTC (rev 3006) @@ -6,164 +6,167 @@ ------------------------------------------------------------------------------*/ #include "test.h" -//class MQPTestFilter: public TokenFilter { -//public: -// -// bool inPhrase; -// int32_t savedStart, savedEnd; -// -// /** -// * Filter which discards the token 'stop' and which expands the -// * token 'phrase' into 'phrase1 phrase2' -// */ -// MQPTestFilter(TokenStream* in): -// TokenFilter(in,true), -// inPhrase(false), -// savedStart(0), -// savedEnd(0) -// { -// } -// -// bool next(CL_NS(analysis)::Token* token) { -// if (inPhrase) { -// inPhrase = false; -// token->set( _T("phrase2"), savedStart, savedEnd); -// return true; -// }else{ -// while( input->next(token) ){ -// if ( _tcscmp(token->termBuffer(), _T("phrase")) == 0 ) { -// inPhrase = true; -// savedStart = token->startOffset(); -// savedEnd = token->endOffset(); -// token->set( _T("phrase1"), savedStart, savedEnd); -// return true; -// }else if ( _tcscmp(token->termBuffer(), _T("stop") ) !=0 ){ -// return true; -// } -// } -// } -// return false; -// } -//}; -// -//class MQPTestAnalyzer: public Analyzer { -//public: -// MQPTestAnalyzer() { -// } -// -// /** Filters LowerCaseTokenizer with StopFilter. */ -// TokenStream* tokenStream(const TCHAR* fieldName, Reader* reader) { -// return _CLNEW MQPTestFilter(_CLNEW LowerCaseTokenizer(reader)); -// } -//}; -// -//void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { -// if ( q == NULL ) -// return; -// -// const TCHAR* s = q->toString(); -// int ret = _tcscmp(s,result); -// _CLDELETE(q); -// if ( ret != 0 ) { -// TCHAR buf[HUGE_STRING_LEN]; -// _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query yielded /%s/, expecting /%s/\n"), s, result); -// _CLDELETE_LCARRAY(s); -// CuFail(tc, buf); -// } -// _CLDELETE_LCARRAY(s); -//} -// -//// verify parsing of query using a stopping analyzer -//void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const TCHAR* expectedRes) { -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; -// //Occur occur[] = {Occur.SHOULD, Occur.SHOULD}; -// MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); -// MultiFieldQueryParser mfqp(fields, a); -// -// Query *q = mfqp.parse(qtxt); -// assertEquals(tc, expectedRes, q); -// -// q = MultiFieldQueryParser::parse(qtxt, fields, a); -// assertEquals(tc, expectedRes, q); -// _CLDELETE(a); -//} -// -///** test stop words arsing for both the non static form, and for the -//* corresponding static form (qtxt, fields[]). */ -//void tesStopwordsParsing(CuTest *tc) { -// assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); -// assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); -// assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); -// assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one t:one")); -// assertStopQueryEquals(tc, _T("stop"), _T("")); -// assertStopQueryEquals(tc, _T("(stop)"), _T("")); -// assertStopQueryEquals(tc, _T("((stop))"), _T("")); -//} -// -//void testMFQPSimple(CuTest *tc) { -// const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; -// Analyzer* a = _CLNEW StandardAnalyzer(); -// MultiFieldQueryParser mfqp(fields, a); -// -// Query *q = mfqp.parse(_T("one")); -// assertEquals(tc, _T("b:one t:one"), q); -// -// q = mfqp.parse(_T("one two")); -// assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); -// -// q = mfqp.parse(_T("+one +two")); -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); -// -// q = mfqp.parse(_T("+one -two -three")); -// assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three t:three)"), q); -// -// q = mfqp.parse(_T("one^2 two")); -// assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); -// -// q = mfqp.parse(_T("one~ two")); -// assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); -// -// q = mfqp.parse(_T("one~0.8 two^2")); -// assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)"), q); -// -// q = mfqp.parse(_T("one* two*")); -// assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); -// -// q = mfqp.parse(_T("[a TO c] two")); -// assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two t:two)"), q); -// -// q = mfqp.parse(_T("w?ldcard")); -// assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); -// -// q = mfqp.parse(_T("\"foo bar\"")); -// assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); -// -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); -// assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee\" t:\"dd ee\")"), q); -// -// q = mfqp.parse(_T("\"foo bar\"~4")); -// assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); -// -// // make sure that terms which have a field are not touched: -// q = mfqp.parse(_T("one f:two")); -// assertEquals(tc, _T("(b:one t:one) f:two"), q); -// -// // AND mode: -// mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); -// q = mfqp.parse(_T("one two")); -// assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); -// q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); -// assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd ee\" t:\"dd ee\")"), q); -// -// _CLDELETE(a); -//} -// -//CuSuite *testMultiFieldQueryParser(void) -//{ -// CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); -// -// SUITE_ADD_TEST(suite, tesStopwordsParsing); -// SUITE_ADD_TEST(suite, testMFQPSimple); -// -// return suite; -//} \ No newline at end of file +class MQPTestFilter: public TokenFilter { +public: + + bool inPhrase; + int32_t savedStart, savedEnd; + + /** + * Filter which discards the token 'stop' and which expands the + * token 'phrase' into 'phrase1 phrase2' + */ + MQPTestFilter(TokenStream* in): + TokenFilter(in,true), + inPhrase(false), + savedStart(0), + savedEnd(0) + { + } + + CL_NS(analysis)::Token* next(CL_NS(analysis)::Token*& token) { + if (inPhrase) { + if (token == NULL) token=_CLNEW CL_NS(analysis)::Token(); + inPhrase = false; + token->set( _T("phrase2"), savedStart, savedEnd); + return token; + }else{ + while( input->next(token) ){ + if ( _tcscmp(token->termBuffer(), _T("phrase")) == 0 ) { + inPhrase = true; + savedStart = token->startOffset(); + savedEnd = token->endOffset(); + token->set( _T("phrase1"), savedStart, savedEnd); + return token; + }else if ( _tcscmp(token->termBuffer(), _T("stop") ) !=0 ){ + return token; + } + } + } + _CLDELETE(token); + return NULL; + } +}; + +class MQPTestAnalyzer: public Analyzer { +public: + MQPTestAnalyzer() { + } + + /** Filters LowerCaseTokenizer with StopFilter. */ + TokenStream* tokenStream(const TCHAR* fieldName, Reader* reader) { + return _CLNEW MQPTestFilter(_CLNEW LowerCaseTokenizer(reader)); + } +}; + +void assertEquals(CuTest *tc,const TCHAR* result, Query* q) { + if ( q == NULL ) + return; + + const TCHAR* s = q->toString(); + int ret = _tcscmp(s,result); + _CLDELETE(q); + if ( ret != 0 ) { + TCHAR buf[HUGE_STRING_LEN]; + _sntprintf(buf, HUGE_STRING_LEN, _T("FAILED Query yielded /%s/, expecting /%s/\n"), s, result); + _CLDELETE_LCARRAY(s); + CuFail(tc, buf); + } + _CLDELETE_LCARRAY(s); +} + +// verify parsing of query using a stopping analyzer +void assertStopQueryEquals(CuTest *tc, const TCHAR* qtxt, const TCHAR* expectedRes) { + const TCHAR* fields[] = {_T("b"), _T("t"), NULL }; + const uint8_t occur[] = {BooleanClause::SHOULD, BooleanClause::SHOULD, NULL}; + MQPTestAnalyzer *a = _CLNEW MQPTestAnalyzer(); + MultiFieldQueryParser mfqp(fields, a); + + Query *q = mfqp.parse(qtxt); + assertEquals(tc, expectedRes, q); + + q = MultiFieldQueryParser::parse(qtxt, reinterpret_cast<const TCHAR**>(&fields), + reinterpret_cast<const uint8_t*>(&occur), a); + assertEquals(tc, expectedRes, q); + _CLDELETE(a); +} + +/** test stop words arsing for both the non static form, and for the +* corresponding static form (qtxt, fields[]). */ +void tesStopwordsParsing(CuTest *tc) { + assertStopQueryEquals(tc, _T("one"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one stop"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one (stop)"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("one ((stop))"), _T("b:one t:one")); + assertStopQueryEquals(tc, _T("stop"), _T("")); + assertStopQueryEquals(tc, _T("(stop)"), _T("")); + assertStopQueryEquals(tc, _T("((stop))"), _T("")); +} + +void testMFQPSimple(CuTest *tc) { + const TCHAR* fields[] = {_T("b"), _T("t"), NULL}; + Analyzer* a = _CLNEW StandardAnalyzer(); + MultiFieldQueryParser mfqp(fields, a); + + Query *q = mfqp.parse(_T("one")); + assertEquals(tc, _T("b:one t:one"), q); + + q = mfqp.parse(_T("one two")); + assertEquals(tc, _T("(b:one t:one) (b:two t:two)"),q); + + q = mfqp.parse(_T("+one +two")); + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); + + q = mfqp.parse(_T("+one -two -three")); + assertEquals(tc, _T("+(b:one t:one) -(b:two t:two) -(b:three t:three)"), q); + + q = mfqp.parse(_T("one^2 two")); + assertEquals(tc, _T("((b:one t:one)^2.0) (b:two t:two)"), q); + + q = mfqp.parse(_T("one~ two")); + assertEquals(tc, _T("(b:one~0.5 t:one~0.5) (b:two t:two)"), q); + + q = mfqp.parse(_T("one~0.8 two^2")); + assertEquals(tc, _T("(b:one~0.8 t:one~0.8) ((b:two t:two)^2.0)"), q); + + q = mfqp.parse(_T("one* two*")); + assertEquals(tc, _T("(b:one* t:one*) (b:two* t:two*)"), q); + + q = mfqp.parse(_T("[a TO c] two")); + assertEquals(tc, _T("(b:[a TO c] t:[a TO c]) (b:two t:two)"), q); + + q = mfqp.parse(_T("w?ldcard")); + assertEquals(tc, _T("b:w?ldcard t:w?ldcard"), q); + + q = mfqp.parse(_T("\"foo bar\"")); + assertEquals(tc, _T("b:\"foo bar\" t:\"foo bar\""), q); + + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); + assertEquals(tc, _T("(b:\"aa bb cc\" t:\"aa bb cc\") (b:\"dd ee\" t:\"dd ee\")"), q); + + q = mfqp.parse(_T("\"foo bar\"~4")); + assertEquals(tc, _T("b:\"foo bar\"~4 t:\"foo bar\"~4"), q); + + // make sure that terms which have a field are not touched: + q = mfqp.parse(_T("one f:two")); + assertEquals(tc, _T("(b:one t:one) f:two"), q); + + // AND mode: + mfqp.setDefaultOperator(QueryParser::AND_OPERATOR); + q = mfqp.parse(_T("one two")); + assertEquals(tc, _T("+(b:one t:one) +(b:two t:two)"), q); + q = mfqp.parse(_T("\"aa bb cc\" \"dd ee\"")); + assertEquals(tc, _T("+(b:\"aa bb cc\" t:\"aa bb cc\") +(b:\"dd ee\" t:\"dd ee\")"), q); + + _CLDELETE(a); +} + +CuSuite *testMultiFieldQueryParser(void) +{ + CuSuite *suite = CuSuiteNew(_T("CLucene Multi-Field QP Test")); + + SUITE_ADD_TEST(suite, tesStopwordsParsing); + SUITE_ADD_TEST(suite, testMFQPSimple); + + return suite; +} \ No newline at end of file Modified: branches/lucene2_3_2/src/test/tests.cpp =================================================================== --- branches/lucene2_3_2/src/test/tests.cpp 2009-05-03 21:01:38 UTC (rev 3005) +++ branches/lucene2_3_2/src/test/tests.cpp 2009-05-04 14:46:40 UTC (rev 3006) @@ -16,7 +16,7 @@ {"highfreq", testhighfreq}, {"priorityqueue", testpriorityqueue}, {"queryparser", testQueryParser}, - //{"mfqueryparser", testMultiFieldQueryParser}, + {"mfqueryparser", testMultiFieldQueryParser}, {"search", testsearch}, {"queries", testqueries}, {"termvector",testtermvector}, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |