From: <ust...@us...> - 2009-03-30 21:39:15
|
Revision: 2977 http://clucene.svn.sourceforge.net/clucene/?rev=2977&view=rev Author: ustramooner Date: 2009-03-30 21:38:58 +0000 (Mon, 30 Mar 2009) Log Message: ----------- various contrib code... Added Paths: ----------- branches/lucene2_3_2/src/contribs/ branches/lucene2_3_2/src/contribs/CMakeLists.txt branches/lucene2_3_2/src/contribs/bashscripts/ branches/lucene2_3_2/src/contribs/bashscripts/findPatchThatBrokeUnitTest.sh branches/lucene2_3_2/src/contribs/bashscripts/simpleupdate.sh branches/lucene2_3_2/src/contribs/bashscripts/twofileupdate.sh branches/lucene2_3_2/src/contribs/benchmarker/ branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h branches/lucene2_3_2/src/contribs/benchmarker/Timer.h branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp branches/lucene2_3_2/src/contribs/benchmarker/Unit.h branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h branches/lucene2_3_2/src/contribs/contribs-lib-test/ branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.h branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestSnowball.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestStreams.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/TestUtf8.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/contribTests.cpp branches/lucene2_3_2/src/contribs/contribs-lib-test/test.h branches/lucene2_3_2/src/contribs/contribs-lib-test/testall.cpp Added: branches/lucene2_3_2/src/contribs/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/CMakeLists.txt (rev 0) +++ branches/lucene2_3_2/src/contribs/CMakeLists.txt 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,2 @@ +ADD_SUBDIRECTORY (contribs-lib-test EXCLUDE_FROM_ALL) +ADD_SUBDIRECTORY (benchmarker EXCLUDE_FROM_ALL) Added: branches/lucene2_3_2/src/contribs/bashscripts/findPatchThatBrokeUnitTest.sh =================================================================== --- branches/lucene2_3_2/src/contribs/bashscripts/findPatchThatBrokeUnitTest.sh (rev 0) +++ branches/lucene2_3_2/src/contribs/bashscripts/findPatchThatBrokeUnitTest.sh 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,113 @@ +#! /bin/bash +# +# (c) Jos van den Oever <jo...@va...> +# modified by (c) Ben van Klinken <ust...@us... +# +# This script checks out clucen and runs the given unit test. If the unit test +# fails, it goes back to the previous version, compiles and tests again. +# This goes on until the unit test is not present or runs successfully. + +# check the arguments +if (( $# != 1 && $# != 2)); then + echo Usage: $0 testname + echo Note: This script must be run from the base + exit +fi + +# the path to the unit test executable +TESTNAME=$1 + +## Configuration parameters + +# the maximal number of log entries to download +MAXLOGENTRIES=300 + +# the maximal number of steps you wish to take +MAXSTEPS=300 + +# make exectable with arguments +MAKE="make" + +# should we do a drastic cleanup between runs or not? +FORCEFULLBUILD=0 + +#get the source dir +SOURCEDIR=`grep CMAKE_HOME_DIRECTORY CMakeCache.txt |perl -pi -e 's/.*=(.*)/\1/'` +if [ "$SOURCEDIR" == "" ]; then + echo "Run cmake before running this script" + exit 1 +fi + +#################### + +# function for testing a particular test in a particular revision +function runTest { + REVISION=$1 + echo Testing revision $REVISION. + + + exit 0 + + # go back to the given revision + cd $TESTDIR + svn update $MODULE -r $REVISION $SOURCEDIR + if (( $? != 0 )); then + # if updating failed, we have to get a fresh version + rm -rf $TESTDIR/$MODULE + svn checkout -r $REVISION $SVNURL $SOURCEDIR + if (( $? != 0 )); then exit; fi + fi + + # configure the code + # if we cannot configure the test, we continue to the next revision number + if (( $FORCEFULLBUILD == 1 )); then + rm -rf $TESTDIR/$MODULE/build + fi + mkdir $TESTDIR/$MODULE/build + cd $TESTDIR/$MODULE/build + cmake $SOURCEDIR + if (( $? != 0 )); then return; fi + + # get the name of the unit test and build it + # if we cannot build the test, we continue to the next revision number + echo $MAKE $TESTNAME + $MAKE $TESTNAME + if (( $? != 0 )); then return; fi + + # find the test executable + TESTPATH=`find -name $TESTNAME -type f -perm -u+x` + + # run the unit test and exit if it ran without error + $TESTPATH + if (( $? == 0 )); then + echo The last revision where the test $TESTNAME worked was $REVISION. + BROKEN=`grep -B 1 $REVISION $TESTDIR/revisions |head -1` + echo The first revision that was broken was $BROKEN: + svn log -r $BROKEN $TESTDIR/$MODULE $SOURCEDIR + exit + fi +} + +# determine the URL of the svn repository +SVNURL=`svn info $SOURCEDIR | grep -m 1 '^URL: ' | cut -b 6-` +if (( $? != 0 )); then exit; fi + +echo $SVNURL + +# determine the module name +MODULE=`basename $SVNURL` + +echo $MODULE + +# get the last 100 relevant version numbers +svn log $MODULE --limit $MAXLOGENTRIES --non-interactive $SOURCEDIR\ + | grep -E '^r[0123456789]+' \ + | perl -pi -e 's/^r(\d+).*/\1/' | head -n $MAXSTEPS > revisions +if (( $? != 0 )); then exit; fi + +for REVISION in `cat revisions`; do + runTest $REVISION; +done + +echo No revision was found in which the unit test worked. + Added: branches/lucene2_3_2/src/contribs/bashscripts/simpleupdate.sh =================================================================== --- branches/lucene2_3_2/src/contribs/bashscripts/simpleupdate.sh (rev 0) +++ branches/lucene2_3_2/src/contribs/bashscripts/simpleupdate.sh 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,41 @@ +#! /bin/bash + +# this test checks if the strigicmd utility properly detect creation and +# deletion of a file + +function fail() { + echo Test failed + exit 1 +} + +STRIGICMD="`find -type f -name strigicmd -type f -perm -o+x` " +echo Using $STRIGICMD +NTHREADS=1 + +rm -r x y 2> /dev/null +mkdir x +touch x/y +touch x/z +echo == $STRIGICMD create -t clucene -d y x == +if ! $STRIGICMD create -j 1 -t clucene -d y x; then + fail +fi +echo == $STRIGICMD listFiles -t clucene -d y == +if ! $STRIGICMD listFiles -t clucene -d y; then + fail +fi +rm x/y +echo == $STRIGICMD update -j $NTHREADS -t clucene -d y x == +if ! $STRIGICMD update -j $NTHREADS -t clucene -d y x; then + fail +fi +echo == $STRIGICMD listFiles -t clucene -d y == +if ! $STRIGICMD listFiles -t clucene -d y; then + fail +fi +OUT=`$STRIGICMD listFiles -t clucene -d y` +if [[ $OUT == $'x\nx/z' ]]; then + echo Test succesfull + exit 0 +fi +fail Added: branches/lucene2_3_2/src/contribs/bashscripts/twofileupdate.sh =================================================================== --- branches/lucene2_3_2/src/contribs/bashscripts/twofileupdate.sh (rev 0) +++ branches/lucene2_3_2/src/contribs/bashscripts/twofileupdate.sh 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,42 @@ +#! /bin/bash + +# this test checks if the strigicmd utility properly detect creation and +# deletion of a file + +function fail() { + echo Test failed + exit 1 +} + +#VG="valgrind --db-attach=yes " + +rm -r x y +mkdir x +touch x/y +touch x/z +echo == src/strigicmd/strigicmd create -t clucene -d y x == +if ! $VG src/strigicmd/strigicmd create -t clucene -d y x; then + fail +fi +echo == src/strigicmd/strigicmd listFiles -t clucene -d y == +if ! $VG src/strigicmd/strigicmd listFiles -t clucene -d y; then + fail +fi +sleep 1 +touch x/y +touch x/z +echo == src/strigicmd/strigicmd update -t clucene -d y x == +exit +if ! $VG src/strigicmd/strigicmd update -t clucene -d y x; then + fail +fi +echo == src/strigicmd/strigicmd listFiles -t clucene -d y == +if ! $VG src/strigicmd/strigicmd listFiles -t clucene -d y; then + fail +fi +OUT=`$VG src/strigicmd/strigicmd listFiles -t clucene -d y` +if [[ $OUT == 'x/z' ]]; then + echo Test succesfull + exit 0 +fi +fail Added: branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,33 @@ +#include "stdafx.h" +#include "Benchmarker.h" +#include "Unit.h" + +void Benchmarker::Add(Unit* unit){ + tests.push_back(unit); +} +Benchmarker::Benchmarker(void) +{ + reset(); +} +void Benchmarker::reset(){ + timerTotal.reset(); + testsCountTotal=0; + testsCountSuccess=0; + testsRunTotal=0; + testsRunSuccess=0; +} +bool Benchmarker::run(){ + timerTotal.start(); + printf( ">> running tests...\n" ); + for ( int i=0;i<tests.size();i++ ){ + Unit* unit = tests[i]; + unit->start(this); + unit->stop(); + } + printf( "\n>> benchmarker ran a total of %d test cases(%d successes) in %d ms\n", + testsCountTotal,testsCountSuccess, + (int32_t)timerTotal.interval() ); + timerTotal.stop(); + + return testsCountSuccess > 0; +} \ No newline at end of file Added: branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Benchmarker.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,17 @@ +#pragma once + +class Benchmarker +{ + lucene::util::CLVector<Unit*> tests; +public: + Timer timerTotal; + int testsCountTotal; + int testsCountSuccess; + int testsRunTotal; + int testsRunSuccess; + + Benchmarker(void); + void Add(Unit* unit); + bool run(); + void reset(); +}; Added: branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/CMakeLists.txt 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,20 @@ +PROJECT(clucene-benchmarker) + +INCLUDE (DefineOptions) +DEFINE_OPTIONS(EXTRA_OPTIONS) +ADD_DEFINITIONS(${EXTRA_OPTIONS}) + +file(GLOB_RECURSE benchmarker_HEADERS ${clucene-benchmarker_SOURCE_DIR}/*.h) + +SET(benchmarker_files + ./Benchmarker.cpp + ./Main.cpp + ./stdafx.cpp + ./Unit.cpp + + ./TestCLString.cpp + ${benchmarker_HEADERS} +) + +ADD_EXECUTABLE(cl_benchmarker EXCLUDE_FROM_ALL ${benchmarker_files} ) +TARGET_LINK_LIBRARIES(cl_benchmarker clucene-core clucene-shared) Added: branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Main.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,103 @@ +#include "stdafx.h" +#include "TestCLString.h" + +#ifdef COMPILER_MSVC +#ifdef _DEBUG + #define CRTDBG_MAP_ALLOC + #include <stdlib.h> + #include <crtdbg.h> +#endif +#endif + +#include <stdlib.h> + +#include <fcntl.h> +#ifdef _CL_HAVE_DIRECT_H + #include <direct.h> +#endif +#ifdef _CL_HAVE_SYS_STAT_H + #include <sys/stat.h> +#endif +#ifdef _CL_HAVE_IO_H + #include <io.h> +#endif + +using namespace std; +using namespace lucene::util; + +const char* cl_tempDir; +char clucene_data_location[1024]; + +int main( int argc, char** argv ){ + //Dumper Debug + #ifdef COMPILER_MSVC + #ifdef _DEBUG + _CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF );//| _CRTDBG_CHECK_ALWAYS_DF | _CRTDBG_CHECK_CRT_DF ); + #endif + #endif + + Benchmarker bench; + TestCLString clstring; + bool ret_result = false; + + cl_tempDir = NULL; + if ( Misc::dir_Exists("/tmp") ) + cl_tempDir = "/tmp"; + if ( getenv("TEMP") != NULL ) + cl_tempDir = getenv("TEMP"); + else if ( getenv("TMP") != NULL ) + cl_tempDir = getenv("TMP"); + + char* tmp = _CL_NEWARRAY(char,strlen(cl_tempDir)+9); + strcpy(tmp,cl_tempDir); + strcat(tmp,"/clucene"); + _mkdir(tmp); + if ( Misc::dir_Exists(tmp) ) + cl_tempDir=tmp; + + + clucene_data_location[0]=0; + if ( CL_NS(util)::Misc::dir_Exists(CLUCENE_DATA_LOCATION1 "/reuters-21578-index/segments") ) + strcpy(clucene_data_location, CLUCENE_DATA_LOCATION1); + else if ( CL_NS(util)::Misc::dir_Exists(CLUCENE_DATA_LOCATION2 "/reuters-21578-index/segments") ) + strcpy(clucene_data_location, CLUCENE_DATA_LOCATION2); + else if ( CL_NS(util)::Misc::dir_Exists(CLUCENE_DATA_LOCATION3 "/reuters-21578-index/segments") ) + strcpy(clucene_data_location, CLUCENE_DATA_LOCATION3); + else if ( getenv(CLUCENE_DATA_LOCATIONENV) != NULL ){ + strcpy(clucene_data_location,getenv(CLUCENE_DATA_LOCATIONENV)); + strcat(clucene_data_location,"/data/reuters-21578-index/segments"); + if ( CL_NS(util)::Misc::dir_Exists( clucene_data_location ) ){ + strcpy(clucene_data_location, getenv(CLUCENE_DATA_LOCATIONENV)); + strcat(clucene_data_location, "/data"); + }else + clucene_data_location[0]=0; + } + + /* first check that we are running the test for the correct position */ + //todo: make this configurable + if ( !*clucene_data_location ){ + fprintf(stderr,"%s must be run from a subdirectory of the application's root directory\n",argv[0]); + fprintf(stderr,"ensure that the test data exists in %s or %s or %s\n",CLUCENE_DATA_LOCATION1, CLUCENE_DATA_LOCATION2, CLUCENE_DATA_LOCATION3); + if ( getenv(CLUCENE_DATA_LOCATIONENV) != NULL ) + fprintf(stderr,"%s/data was also checked because of the " CLUCENE_DATA_LOCATIONENV " environment variable", getenv(CLUCENE_DATA_LOCATIONENV)); + ret_result = 1; + goto exit_point; + } + + + bench.Add(&clstring); + ret_result = bench.run(); + + + +exit_point: + _lucene_shutdown(); //clears all static memory + //print lucenebase debug + + return ret_result ? 0 : 1; + + //Debuggin techniques: + //For msvc, use this for breaking on memory leaks: + // _crtBreakAlloc + //for linux, use valgrind +} Added: branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,51 @@ +#include "stdafx.h" + +using namespace lucene::util; +using namespace lucene::analysis; +using namespace lucene::document; +using namespace lucene::index; +using namespace lucene::store; + +int BenchmarkDocumentWriter(Timer* timerCase) +{ + RAMDirectory ram; + SimpleAnalyzer an; + IndexWriter* ndx = _CLNEW IndexWriter(&ram, &an, true); + ndx->setMaxFieldLength(0x7FFFFFFF); + + char fname[1024]; + strcpy(fname, clucene_data_location); + strcat(fname, "reuters-21578/feldman-cia-worldfactbook-data.txt"); + + timerCase->start(); + for ( int i=0;i<10;i++ ){ + + FileReader* reader = _CLNEW FileReader(fname, "ASCII"); + Document doc; + doc.add(*_CLNEW Field(_T("contents"),reader, Field::STORE_YES | Field::INDEX_TOKENIZED)); + + ndx->addDocument(&doc); + } + ndx->close(); + timerCase->stop(); + + ram.close(); + _CLDELETE(ndx); + return 0; +} + +int BenchmarkTermDocs(Timer* timerCase){ + IndexReader* reader = IndexReader::open("index"); + timerCase->start(); + TermEnum* en = reader->terms(); + while (en->next()){ + Term* term = en->term(); + _CLDECDELETE(term); + } + en->close(); + _CLDELETE(en); + timerCase->stop(); + reader->close(); + _CLDELETE(reader); + return 0; +} Added: branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/TestCLString.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,17 @@ +#pragma once + +int BenchmarkDocumentWriter(Timer*); +int BenchmarkTermDocs(Timer* timerCase); + +class TestCLString:public Unit +{ +protected: + void runTests(){ + this->runTest("BenchmarkDocumentWriter",BenchmarkDocumentWriter,10); + //this->runTest("BenchmarkTermDocs",BenchmarkTermDocs,100); + } +public: + const char* getName(){ + return "TestCLString"; + } +}; Added: branches/lucene2_3_2/src/contribs/benchmarker/Timer.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Timer.h (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Timer.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,42 @@ +#pragma once + +class Timer{ +public: + int64_t startTime; + int64_t stopTime; + bool running; + Timer(){ + running=false; + reset(); + } + void reset(){ + startTime=0; + stopTime=0; + running=false; + } + void start(){ + startTime = lucene::util::Misc::currentTimeMillis(); + running=true; + } + int32_t split(){ + return lucene::util::Misc::currentTimeMillis()-startTime; + } + int32_t stop(){ + if ( running ){ + running=false; + stopTime = lucene::util::Misc::currentTimeMillis(); + } + return stopTime-startTime; + } + int32_t interval(){ + if (running) + return lucene::util::Misc::currentTimeMillis()-startTime; + else + return stopTime-startTime; + } + +}; + + +typedef int (*PTEST_ROUTINE)(Timer*); +typedef PTEST_ROUTINE LPTEST_ROUTINE; \ No newline at end of file Added: branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Unit.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,88 @@ +#include "stdafx.h" +#include "Unit.h" + +Unit::Unit() +{ + this->bm=NULL; + testsCountTotal=0; + testsCountSuccess=0; + testsRunTotal=0; + testsRunSuccess=0; + timerCase.reset(); + timerTotal.reset(); +} + +void Unit::stop(){ + timerTotal.stop(); + bm=NULL; + + printf( "> unit ran a total of %d test cases(%d successes) in %d ms\n", + testsCountTotal,testsCountSuccess, + (int)timerTotal.interval() ); +} + +void Unit::start(Benchmarker* bm){ + this->bm = bm; + timerTotal.start(); + + printf( "> running unit %s\n", getName() ); + runTests(); +} +void Unit::runTest(const char* testName,LPTEST_ROUTINE func, int iterations){ + if ( bm == NULL ) + _CLTHROWA(CL_ERR_NullPointer, "Unit not started with benchmarker!"); + float avg=0; + int32_t min=0; + int32_t max=0; + int count=0; + Timer total; + bool success = false; + + try { + total.start(); + printf("\n > running %s %d times...", testName, iterations); + for ( int i=0;i<iterations;i++ ){ + timerCase.reset(); + success = (func(&timerCase) == 0 ); + int32_t t = timerCase.stop(); + if ( count == 0 ){ + min = t; + max = t; + avg = t; + }else{ + if ( t < min ) + min = t; + if ( t > max ) + max = t; + avg = (avg + t)/2; + } + + testsRunTotal++; + bm->testsRunTotal++; + if ( success ){ + testsRunSuccess++; + bm->testsRunSuccess++; + } + count++; + } + success = true; + }catch(CLuceneError& err){ + printf("\n > error occurred: %s\n", err.what()); + }catch(...){ + printf("\n > unexpected error occurred\n >"); + } + testsCountTotal++; + bm->testsCountTotal++; + if ( success ){ + testsCountSuccess++; + bm->testsCountSuccess++; + } + printf(" it took %d milliseconds",total.stop()); + + if ( iterations > 1 ){ + printf("\n\tmin:%d",min); + printf(" max:%d,",max); + printf(" avg:%0.3f milliseconds",avg); + } + printf("\n"); +} \ No newline at end of file Added: branches/lucene2_3_2/src/contribs/benchmarker/Unit.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/Unit.h (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/Unit.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,23 @@ +#pragma once +#include "CLucene/util/Misc.h" + + +class Unit +{ +public: + void start(Benchmarker* benchmarker); + void stop(); + virtual const char* getName()=0; + Unit(); +protected: + Timer timerCase; + Timer timerTotal; + int testsCountTotal; + int testsCountSuccess; + int testsRunTotal; + int testsRunSuccess; + Benchmarker* bm; + + void runTest(const char* testName,LPTEST_ROUTINE func, int iterations); + virtual void runTests()=0; +}; \ No newline at end of file Added: branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/stdafx.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,5 @@ +// stdafx.cpp : source file that includes just the standard includes +// demo.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" Added: branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h =================================================================== --- branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h (rev 0) +++ branches/lucene2_3_2/src/contribs/benchmarker/stdafx.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,31 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// +#ifndef _lucene_examples_benchmark_stdafx_ +#define _lucene_examples_benchmark_stdafx_ + +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#include <stdio.h> +//#include <tchar.h> +#include <string.h> + +#include "CLucene.h" +#include "CLucene/_clucene-config.h" +#include "CLucene/util/Misc.h" +#include "CLucene/store/RAMDirectory.h" + +#define CLUCENE_DATA_LOCATION1 "../../src/test/data/" +#define CLUCENE_DATA_LOCATION2 "../src/test/data/" +#define CLUCENE_DATA_LOCATION3 "../../../src/test/data/" +#define CLUCENE_DATA_LOCATIONENV "srcdir" + +extern const char* cl_tempDir; +extern char clucene_data_location[1024]; + +class Benchmarker; +#include "Timer.h" +#include "Unit.h" +#include "Benchmarker.h" + +#endif Added: branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt (rev 0) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/CMakeLists.txt 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,33 @@ +PROJECT(clucene-contribs-lib-test) + +INCLUDE (DefineOptions) +DEFINE_OPTIONS(EXTRA_OPTIONS) +ADD_DEFINITIONS(${EXTRA_OPTIONS}) + +INCLUDE_DIRECTORIES( ${clucene-contribs-lib-test_SOURCE_DIR} ) +INCLUDE_DIRECTORIES( ${clucene-contribs-lib_SOURCE_DIR} ) + +file(GLOB_RECURSE test_HEADERS ${CMAKE_SOURCE_DIR}/test/*.h) + +SET(test_files + ./contribTests.cpp + ./TestHighlight.cpp + ./TestSnowball.cpp + ./TestStreams.cpp + ./TestUtf8.cpp + ./TestAnalysis.cpp + ./CuTest.cpp + ./testall.cpp + ${test_HEADERS} +) +IF ( USE_SHARED_OBJECT_FILES ) + GET_SHARED_FILES(clucene_shared_Files) +ENDIF ( USE_SHARED_OBJECT_FILES ) + +#todo: do glob header and include header files for IDE. +ADD_EXECUTABLE(cl_contribs-lib-test EXCLUDE_FROM_ALL ${clucene_shared_Files} ${test_files} ) + +#link the executable against the releavent clucene-shared library (if we aren't using the object files) +IF ( NOT USE_SHARED_OBJECT_FILES ) + TARGET_LINK_LIBRARIES(cl_contribs-lib-test clucene-core clucene-shared clucene-contribs-lib) +ENDIF ( NOT USE_SHARED_OBJECT_FILES ) Added: branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,536 @@ +/*------------------------------------------------------------------------------ + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#include "test.h" +#include "CuTest.h" +#include <stdlib.h> +#include <assert.h> + +static int verbose = 0; +static int messyPrinting = 0; + +void CuInit(int argc, char *argv[]) { + int i; + + for (i = 0; i < argc; i++) { + if (!strcmp(argv[i], "-v")) { + verbose = 1; + } + if (!strcmp(argv[i], "-p")) { + messyPrinting = 1; + } + } +} + +/*-------------------------------------------------------------------------* + * CuTcs + *-------------------------------------------------------------------------*/ +TCHAR* CuStrAlloc(int size) { + TCHAR* n = (TCHAR*) malloc(sizeof (TCHAR) * (size)); + return n; +} + +TCHAR* CuTcsCopy(TCHAR* old) { + int len = _tcslen(old); + TCHAR* n = CuStrAlloc(len + 1); + _tcscpy(n, old); + return n; +} + +TCHAR* CuTcsCat(TCHAR* orig, TCHAR* add) { + int len = _tcslen(orig) + _tcslen(add); + TCHAR* n = CuStrAlloc(len + 1); + _tcscpy(n, orig); + _tcscat(n, add); + return n; +} + +/*-------------------------------------------------------------------------* + * CuString + *-------------------------------------------------------------------------*/ + +TCHAR* CuTcsAlloc(int size) { + TCHAR* n = (TCHAR*) malloc(sizeof (TCHAR) * (size)); + return n; +} + +TCHAR* CuTcsCopy(const TCHAR* old) { + int len = _tcslen(old); + TCHAR* n = CuTcsAlloc(len + 1); + _tcscpy(n, old); + return n; +} + +/*-------------------------------------------------------------------------* + * CuString + *-------------------------------------------------------------------------*/ + +void CuStringInit(CuString* str) { + str->length = 0; + str->size = STRING_MAX; + str->buffer = (TCHAR*) malloc(sizeof (TCHAR) * str->size); + str->buffer[0] = '\0'; +} + +CuString* CuStringNew(void) { + CuString* str = (CuString*) malloc(sizeof (CuString)); + str->length = 0; + str->size = STRING_MAX; + str->buffer = (TCHAR*) malloc(sizeof (TCHAR) * str->size); + str->buffer[0] = '\0'; + return str; +} + +void CuStringFree(CuString* str) { + free(str->buffer); + free(str); +} + +void CuStringResize(CuString* str, int newSize) { + str->buffer = (TCHAR*) realloc(str->buffer, sizeof (TCHAR) * newSize); + str->size = newSize; +} + +void CuStringAppend(CuString* str, const TCHAR* text) { + int length = _tcslen(text); + if (str->length + length + 1 >= str->size) + CuStringResize(str, str->length + length + 1 + STRING_INC); + str->length += length; + _tcscat(str->buffer, text); +} + +void CuStringAppendChar(CuString* str, TCHAR ch) { + TCHAR text[2]; + text[0] = ch; + text[1] = '\0'; + CuStringAppend(str, text); +} + +void CuStringAppendFormat(CuString* str, const TCHAR* format, ...) { + TCHAR buf[HUGE_STRING_LEN]; + va_list argp; + va_start(argp, format); + _vsntprintf(buf, HUGE_STRING_LEN, format, argp); + va_end(argp); + CuStringAppend(str, buf); +} + +void CuStringRead(CuString *str, TCHAR *path) { + path = NULL; + CU_TDUP(path, str->buffer); +} + +/*-------------------------------------------------------------------------* + * CuTest + *-------------------------------------------------------------------------*/ + +void CuTestInit(CuTest* t, const TCHAR* name, TestFunction function) { + t->name = CuTcsCopy(name); + t->notimpl = 0; + t->failed = 0; + t->ran = 0; + t->message = NULL; + t->function = function; + // t->jumpBuf = NULL; +} + +CuTest* CuTestNew(const TCHAR* name, TestFunction function) { + CuTest* tc = CU_ALLOC(CuTest); + CuTestInit(tc, name, function); + return tc; +} + +void CuTestDelete(CuTest* tst) { + free(tst->name); + if (tst->message != NULL) + free(tst->message); + free(tst); +} + +void CuNotImpl(CuTest* tc, const TCHAR* message) { + CuString* newstr = CuStringNew(); + CuStringAppend(newstr, message); + CuStringAppend(newstr, _T(" not implemented on this platform")); + tc->notimpl = 1; + CuMessage(tc, newstr->buffer); + CuStringFree(newstr); + // if (tc->jumpBuf != 0) longjmp(*(tc->jumpBuf), 0); +} + +void CuFail(CuTest* tc, const TCHAR* format, ...) { + tc->failed = 1; + + TCHAR buf[HUGE_STRING_LEN]; + va_list argp; + va_start(argp, format); + _vsntprintf(buf, HUGE_STRING_LEN, format, argp); + va_end(argp); + + // CuMessage(tc,buf); + _CLTHROWT(CL_ERR_Runtime, buf); +} + +void CuMessageV(CuTest* tc, const TCHAR* format, va_list& argp) { + TCHAR buf[HUGE_STRING_LEN]; + _vsntprintf(buf, HUGE_STRING_LEN, format, argp); + + TCHAR* old = tc->message; + if (messyPrinting) { + _tprintf(_T("%s"), buf); + } else { + if (old == NULL) { + tc->message = CuTcsCopy(buf); + } else { + tc->message = CuTcsCat(old, buf); + free(old); + } + } +} + +void CuMessage(CuTest* tc, const TCHAR* format, ...) { + va_list argp; + va_start(argp, format); + CuMessageV(tc, format, argp); + va_end(argp); +} + +void CuMessageA(CuTest* tc, const char* format, ...) { + va_list argp; + char buf[HUGE_STRING_LEN]; + TCHAR tbuf[HUGE_STRING_LEN]; + va_start(argp, format); + vsprintf(buf, format, argp); + va_end(argp); + + TCHAR* old = tc->message; + STRCPY_AtoT(tbuf, buf, HUGE_STRING_LEN); + if (messyPrinting) { + _tprintf(_T("%s"), buf); + } else { + if (old == NULL) { + tc->message = CuTcsCopy(tbuf); + } else { + tc->message = CuTcsCat(old, tbuf); + free(old); + } + } +} + +void CuAssert(CuTest* tc, const TCHAR* message, int condition) { + if (condition) return; + CuFail(tc, message); +} + +void CuAssertTrue(CuTest* tc, int condition) { + if (condition) return; + CuFail(tc, _T("assert failed")); +} + +void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual) { + CuString* message; + if (_tcscmp(expected, actual) == 0) return; + message = CuStringNew(); + CuStringAppend(message, preMessage); + CuStringAppend(message, _T(" : ")); + CuStringAppend(message, _T("expected\n---->\n")); + CuStringAppend(message, expected); + CuStringAppend(message, _T("\n<----\nbut saw\n---->\n")); + CuStringAppend(message, actual); + CuStringAppend(message, _T("\n<----")); + CuFail(tc, message->buffer); + CuStringFree(message); +} + +void CuAssertIntEquals(CuTest* tc, const TCHAR* preMessage, int expected, int actual) { + TCHAR buf[STRING_MAX]; + if (expected == actual) return; + _sntprintf(buf, STRING_MAX, _T("%s : expected <%d> but was <%d>"), preMessage, expected, actual); + CuFail(tc, buf); +} + +void CuAssertPtrEquals(CuTest* tc, const TCHAR* preMessage, const void* expected, const void* actual) { + TCHAR buf[STRING_MAX]; + if (expected == actual) return; + _sntprintf(buf, STRING_MAX, _T("%s : expected pointer <%p> but was <%p>"), preMessage, expected, actual); + CuFail(tc, buf); +} + +void CuAssertPtrNotNull(CuTest* tc, const TCHAR* preMessage, const void* pointer) { + TCHAR buf[STRING_MAX]; + if (pointer != NULL) return; + _sntprintf(buf, STRING_MAX, _T("%s : null pointer unexpected, but was <%p>"), preMessage, pointer); + CuFail(tc, buf); +} + +void CuTestRun(CuTest* tc) { + // jmp_buf buf; + // tc->jumpBuf = &buf; + // if (setjmp(buf) == 0) + // { + tc->ran = 1; + (tc->function)(tc); + // } + // tc->jumpBuf = 0; +} + +/*-------------------------------------------------------------------------* + * CuSuite + *-------------------------------------------------------------------------*/ + +void CuSuiteInit(CuSuite* testSuite, const TCHAR *name) { + testSuite->name = NULL; + CU_TDUP(testSuite->name, name); + testSuite->count = 0; + testSuite->failCount = 0; + testSuite->notimplCount = 0; + testSuite->timeTaken = 0; +} + +CuSuite* CuSuiteNew(const TCHAR *name) { + CuSuite* testSuite = CU_ALLOC(CuSuite); + CuSuiteInit(testSuite, name); + return testSuite; +} + +void CuSuiteDelete(CuSuite* suite) { + free(suite->name); + for (int i = 0; i < suite->count; i++) { + CuTestDelete(suite->list[i]); + } + free(suite); +} + +void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase) { + assert(testSuite->count < MAX_TEST_CASES); + testSuite->list[testSuite->count] = testCase; + testSuite->count++; +} + +void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2) { + int i; + for (i = 0; i < testSuite2->count; ++i) { + CuTest* testCase = testSuite2->list[i]; + CuSuiteAdd(testSuite, testCase); + } +} + +void CuSuiteRun(CuSuite* testSuite) { + int i; + uint64_t start = Misc::currentTimeMillis(); + for (i = 0; i < testSuite->count; ++i) { + CuTest* testCase = testSuite->list[i]; + try { + CuTestRun(testCase); + } catch (CLuceneError& err) { + testCase->failed = 1; + CuMessage(testCase, err.twhat()); + } + testSuite->timeTaken = Misc::currentTimeMillis() - start; + if (testCase->failed) { + testSuite->failCount += 1; + } + if (testCase->notimpl) { + testSuite->notimplCount += 1; + } + } +} + +void CuSuiteSummary(CuSuite* testSuite, CuString* summary, bool times) { + int i; + for (i = 0; i < testSuite->count; ++i) { + CuTest* testCase = testSuite->list[i]; + CuStringAppend(summary, testCase->failed ? _T("F") : + testCase->notimpl ? _T("N") : _T(".")); + } + if (times) { + int bufferLen = 25 - summary->length - 10; + for (int i = 0; i < bufferLen; i++) + CuStringAppend(summary, _T(" ")); + CuStringAppendFormat(summary, _T(" - %dms"), testSuite->timeTaken); + } + CuStringAppend(summary, _T("\n")); +} + +void CuSuiteOverView(CuSuite* testSuite, CuString* details) { + CuStringAppendFormat(details, _T("%d %s run: %d passed, %d failed, ") + _T("%d not implemented.\n"), + testSuite->count, + testSuite->count == 1 ? "test" : "tests", + testSuite->count - testSuite->failCount - + testSuite->notimplCount, + testSuite->failCount, testSuite->notimplCount); +} + +void CuSuiteDetails(CuSuite* testSuite, CuString* details) { + int i; + int failCount = 0; + + if (testSuite->failCount != 0 && verbose) { + CuStringAppendFormat(details, _T("\nFailed tests in %s:\n"), testSuite->name); + for (i = 0; i < testSuite->count; ++i) { + CuTest* testCase = testSuite->list[i]; + if (testCase->failed) { + failCount++; + CuStringAppendFormat(details, _T("%d) %s: %s\n"), + failCount, testCase->name, testCase->message); + } + } + } + if (testSuite->notimplCount != 0 && verbose) { + CuStringAppendFormat(details, _T("\nNot Implemented tests in %s:\n"), testSuite->name); + for (i = 0; i < testSuite->count; ++i) { + CuTest* testCase = testSuite->list[i]; + if (testCase->notimpl) { + failCount++; + CuStringAppendFormat(details, _T("%d) %s: %s\n"), + failCount, testCase->name, testCase->message); + } + } + } +} + +/*-------------------------------------------------------------------------* + * CuSuiteList + *-------------------------------------------------------------------------*/ + +CuSuiteList* CuSuiteListNew(const TCHAR *name) { + CuSuiteList* testSuite = CU_ALLOC(CuSuiteList); + testSuite->name = NULL; + CU_TDUP(testSuite->name, name); + testSuite->count = 0; + return testSuite; +} + +void CuSuiteListDelete(CuSuiteList* lst) { + free(lst->name); + for (int i = 0; i < lst->count; i++) { + CuSuiteDelete(lst->list[i]); + } + free(lst); +} + +void CuSuiteListAdd(CuSuiteList *suites, CuSuite *origsuite) { + assert(suites->count < MAX_TEST_CASES); + suites->list[suites->count] = origsuite; + suites->count++; +} + +void CuSuiteListRun(CuSuiteList* testSuite) { + int i; + for (i = 0; i < testSuite->count; ++i) { + CuSuite* testCase = testSuite->list[i]; + CuSuiteRun(testCase); + } +} + +static const TCHAR *genspaces(int i) { + TCHAR *str = (TCHAR*) malloc((i + 1) * sizeof (TCHAR)); + for (int j = 0; j < i; j++) + str[j] = _T(' '); + str[i] = '\0'; + return str; +} + +void CuSuiteListRunWithSummary(CuSuiteList* testSuite, bool verbose, bool times) { + int i; + + _tprintf(_T("%s:\n"), testSuite->name); + for (i = 0; i < testSuite->count; ++i) { + bool hasprinted = false; + CuSuite* testCase = testSuite->list[i]; + CuString *str = CuStringNew(); + + size_t len = _tcslen(testCase->name); + const TCHAR* spaces = len > 31 ? NULL : genspaces(31 - len); + _tprintf(_T(" %s:%s"), testCase->name, len > 31 ? _T("") : spaces); + free((void*) spaces); + fflush(stdout); + + CuSuiteRun(testCase); + if (verbose) { + for (int i = 0; i < testCase->count; i++) { + if (testCase->list[i]->ran) { + if (testCase->list[i]->message != NULL) { + if (!hasprinted) + printf("\n"); + _tprintf(_T(" %s:\n"), testCase->list[i]->name); + + TCHAR* msg = testCase->list[i]->message; + bool nl = true; + //write out message, indenting on new lines + while (*msg != '\0') { + if (nl) { + printf(" "); + nl = false; + } + if (*msg == '\n') + nl = true; + putc(*msg, stdout); + + msg++; + } + + if (testCase->list[i]->message[_tcslen(testCase->list[i]->message) - 1] != '\n') + printf("\n"); + hasprinted = true; + } + } + } + } + CuSuiteSummary(testCase, str, times); + if (hasprinted) + _tprintf(_T(" Result: %s\n"), str->buffer); + else + _tprintf(_T(" %s"), str->buffer); + + CuStringFree(str); + } + _tprintf(_T("\n")); +} + +int CuSuiteListDetails(CuSuiteList* testSuite, CuString* details) { + int i; + int failCount = 0; + int notImplCount = 0; + int count = 0; + + for (i = 0; i < testSuite->count; ++i) { + failCount += testSuite->list[i]->failCount; + notImplCount += testSuite->list[i]->notimplCount; + count += testSuite->list[i]->count; + } + CuStringAppendFormat(details, _T("%d %s run: %d passed, %d failed, ") + _T("%d not implemented.\n"), + count, + count == 1 ? _T("test") : _T("tests"), + count - failCount - notImplCount, + failCount, notImplCount); + + if (failCount != 0 && verbose) { + for (i = 0; i < testSuite->count; ++i) { + CuString *str = CuStringNew(); + CuSuite* testCase = testSuite->list[i]; + if (testCase->failCount) { + CuSuiteDetails(testCase, str); + CuStringAppend(details, str->buffer); + } + CuStringFree(str); + } + } + if (notImplCount != 0 && verbose) { + for (i = 0; i < testSuite->count; ++i) { + CuString *str = CuStringNew(); + CuSuite* testCase = testSuite->list[i]; + if (testCase->notimplCount) { + CuSuiteDetails(testCase, str); + CuStringAppend(details, str->buffer); + } + CuStringFree(str); + } + } + return failCount; +} + Added: branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.h =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.h (rev 0) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/CuTest.h 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,122 @@ +/*------------------------------------------------------------------------------ + * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team + * + * Distributable under the terms of either the Apache License (Version 2.0) or + * the GNU Lesser General Public License, as specified in the COPYING file. +------------------------------------------------------------------------------*/ +#ifndef CU_TEST_H +#define CU_TEST_H + +/* CuString */ + +TCHAR* CuWstrAlloc(int size); +TCHAR* CuWstrCopy(const TCHAR* old); + +#define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE))) +#define CU_TDUP(dest,src) dest=((TCHAR*)malloc(sizeof(TCHAR)*_tcslen(src)+sizeof(TCHAR)));_tcscpy(dest,src); + +#define HUGE_STRING_LEN 8192 +#define STRING_MAX 256 +#define STRING_INC 256 + +#define CLUCENE_ASSERT(x) CuAssert(tc,_T("Assert Failed: ") _T(#x),x) + +typedef struct { + int length; + int size; + TCHAR* buffer; +} CuString; + +void CuStringInit(CuString* str); +CuString* CuStringNew(void); +void CuStringFree(CuString* str); +void CuStringRead(CuString* str, TCHAR* path); +void CuStringAppend(CuString* str, const TCHAR* text); +void CuStringAppendChar(CuString* str, TCHAR ch); +void CuStringAppendFormat(CuString* str, const TCHAR* format, ...); +void CuStringResize(CuString* str, int newSize); + +/* CuTest */ + +typedef struct CuTest CuTest; + +typedef void (*TestFunction)(CuTest *); + +struct CuTest { + TCHAR* name; + TestFunction function; + int notimpl; + int failed; + int ran; + TCHAR* message; + // jmp_buf *jumpBuf; +}; + + +void CuInit(int argc, char *argv[]); +void CuTestInit(CuTest* t, const TCHAR* name, TestFunction function); +CuTest* CuTestNew(const TCHAR* name, TestFunction function); +void CuTestDelete(CuTest* tst); +void CuFail(CuTest* tc, const TCHAR* format, ...); +void CuMessage(CuTest* tc, const TCHAR* message, ...); +void CuMessageV(CuTest* tc, const TCHAR* format, va_list& argp); +void CuMessageA(CuTest* tc, const char* format, ...); +void CuNotImpl(CuTest* tc, const TCHAR* message); +void CuAssert(CuTest* tc, const TCHAR* message, int condition); +void CuAssertTrue(CuTest* tc, int condition); +void CuAssertStrEquals(CuTest* tc, const TCHAR* preMessage, const TCHAR* expected, const TCHAR* actual); +void CuAssertIntEquals(CuTest* tc, const TCHAR* preMessage, int expected, int actual); +void CuAssertPtrEquals(CuTest* tc, const TCHAR* preMessage, const void* expected, const void* actual); +void CuAssertPtrNotNull(CuTest* tc, const TCHAR* preMessage, const void* pointer); + +void CuTestRun(CuTest* tc); + +/* CuSuite */ + +#define MAX_TEST_CASES 1024 + +#define SUITE_ADD_TEST(SUITE,TEST) CuSuiteAdd(SUITE, CuTestNew(_T(#TEST), TEST)) + +extern char clucene_data_location[1024]; + +typedef struct { + TCHAR *name; + int count; + CuTest * list[MAX_TEST_CASES]; + int failCount; + int notimplCount; + uint64_t timeTaken; +} CuSuite; + + +void CuSuiteInit(CuSuite* testSuite, const TCHAR* name); +CuSuite* CuSuiteNew(const TCHAR* name); +void CuSuiteDelete(CuSuite* suite); +void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase); +void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2); +void CuSuiteRun(CuSuite* testSuite); +void CuSuiteSummary(CuSuite* testSuite, CuString* summary, bool times); +void CuSuiteOverView(CuSuite* testSuite, CuString* details); +void CuSuiteDetails(CuSuite* testSuite, CuString* details); + +typedef struct { + TCHAR *name; + int count; + CuSuite * list[MAX_TEST_CASES]; +} CuSuiteList; + +struct unittest { + const char *testname; + CuSuite * (*func)(void); +}; + +CuSuiteList* CuSuiteListNew(const TCHAR* name); +void CuSuiteListDelete(CuSuiteList* lst); +void CuSuiteListAdd(CuSuiteList* testSuite, CuSuite *testCase); +void CuSuiteListRun(CuSuiteList* testSuite); +void CuSuiteListRunWithSummary(CuSuiteList* testSuite, bool verbose, bool times); +//void CuSuiteListSummary(CuSuiteList* testSuite, CuString* summary); +/* Print details of test suite results; returns total number of + * tests which failed. */ +int CuSuiteListDetails(CuSuiteList* testSuite, CuString* details); +#endif /* CU_TEST_H */ Added: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestAnalysis.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,199 @@ +/** + * Copyright 2003-2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "test.h" +#include "CLucene/analysis/cjk/CJKAnalyzer.h" +#include "CLucene/analysis/LanguageBasedAnalyzer.h" +#include "CLucene/snowball/SnowballFilter.h" + +#include <fcntl.h> +#ifdef _CL_HAVE_IO_H +#include <io.h> +#endif +#ifdef _CL_HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef _CL_HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef _CL_HAVE_DIRECT_H +#include <direct.h> +#endif +#include <errno.h> + +CL_NS_USE2(analysis, cjk) +CL_NS_USE2(analysis, snowball) + +void test(CuTest *tc, char* orig, Reader* reader, bool verbose, int64_t bytes) { + StandardAnalyzer analyzer; + TokenStream* stream = analyzer.tokenStream(NULL, reader); + + uint64_t start = Misc::currentTimeMillis(); + + int32_t count = 0; + Token t; + char atmp[LUCENE_MAX_WORD_LEN + 1]; + TCHAR ttmp[LUCENE_MAX_WORD_LEN + 1]; + for (; stream->next(&t);) { + if (verbose) { + CuMessage(tc, _T("Text=%s start=%d end=%d\n"), t.termBuffer(), t.startOffset(), t.endOffset()); + } + int len = t.termLength(); + + //use the lucene strlwr function (so copy to TCHAR first then back) + strncpy(atmp, orig + t.startOffset(), len); + atmp[len] = 0; + STRCPY_AtoT(ttmp, atmp, len + 1); + _tcslwr(ttmp); + + if (_tcsncmp(t.termBuffer(), ttmp, len) != 0) { + TCHAR err[1024]; + _sntprintf(err, 1024, _T("token '%s' didnt match original text at %d-%d"), t.termBuffer(), t.startOffset(), t.endOffset()); + CuAssert(tc, err, false); + } + + // _CLDELETE(t); + count++; + } + + uint64_t end = Misc::currentTimeMillis(); + int64_t time = end - start; + CuMessageA(tc, "%d milliseconds to extract ", time); + CuMessageA(tc, "%d tokens\n", count); + CuMessageA(tc, "%f microseconds/token\n", (time * 1000.0) / count); + CuMessageA(tc, "%f megabytes/hour\n", (bytes * 1000.0 * 60.0 * 60.0) / (time * 1000000.0)); + + _CLDELETE(stream); +} + +void _testFile(CuTest *tc, const char* fname, bool verbose) { + struct fileStat buf; + fileStat(fname, &buf); + int64_t bytes = buf.st_size; + + char* orig = _CL_NEWARRAY(char, bytes); + { + FILE* f = fopen(fname, "rb"); + int64_t r = fread(orig, bytes, 1, f); + fclose(f); + } + + CuMessageA(tc, " Reading test file containing %d bytes.\n", bytes); + jstreams::FileReader fr(fname, "ASCII"); + const TCHAR *start; + size_t total = 0; + int32_t numRead; + do { + numRead = fr.read(start, 1, 0); + if (numRead == -1) + break; + total += numRead; + } while (numRead >= 0); + + jstreams::FileReader reader(fname, "ASCII"); + + test(tc, orig, &reader, verbose, total); + + _CLDELETE_CaARRAY(orig); +} + +void testFile(CuTest *tc) { + char loc[1024]; + strcpy(loc, clucene_data_location); + strcat(loc, "/reuters-21578/feldman-cia-worldfactbook-data.txt"); + CuAssert(tc, _T("reuters-21578/feldman-cia-worldfactbook-data.txt does not exist"), Misc::dir_Exists(loc)); + + _testFile(tc, loc, false); +} + +void _testCJK(CuTest *tc, const char* astr, const char** results, bool ignoreSurrogates = true) { + SimpleInputStreamReader r(new AStringReader(astr), SimpleInputStreamReader::UTF8); + + CJKTokenizer* tokenizer = _CLNEW CJKTokenizer(&r); + tokenizer->setIgnoreSurrogates(ignoreSurrogates); + int pos = 0; + Token tok; + TCHAR tres[LUCENE_MAX_WORD_LEN]; + + while (results[pos] != NULL) { + CLUCENE_ASSERT(tokenizer->next(&tok) == true); + + lucene_utf8towcs(tres, results[pos], LUCENE_MAX_WORD_LEN); + CuAssertStrEquals(tc, _T("unexpected token value"), tres, tok.termBuffer()); + + pos++; + } + CLUCENE_ASSERT(!tokenizer->next(&tok)); + + _CLDELETE(tokenizer); +} + +void testCJK(CuTest *tc) { + //utf16 test + //we have a very large unicode character: + //xEFFFF = utf8(F3 AF BF BF) = utf16(DB7F DFFF) = utf8(ED AD BF, ED BF BF) + static const char* exp3[4] = {"\xED\xAD\xBF\xED\xBF\xBF\xe5\x95\xa4", "\xe5\x95\xa4\xED\xAD\xBF\xED\xBF\xBF", "", NULL}; + _testCJK(tc, "\xED\xAD\xBF\xED\xBF\xBF\xe5\x95\xa4\xED\xAD\xBF\xED\xBF\xBF", exp3, false); + + static const char* exp1[5] = {"test", "t\xc3\xbcrm", "values", NULL}; + _testCJK(tc, "test t\xc3\xbcrm values", exp1); + + static const char* exp2[6] = {"a", "\xe5\x95\xa4\xe9\x85\x92", "\xe9\x85\x92\xe5\x95\xa4", "", "x", NULL}; + _testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2); +} + +void testLanguageBasedAnalyzer(CuTest* tc) { + LanguageBasedAnalyzer a; + CL_NS(util)::StringReader reader(_T("he abhorred accentueren")); + reader.mark(50); + TokenStream* ts; + Token t; + + //test with english + a.setLanguage(_T("English")); + a.setStem(false); + ts = a.tokenStream(_T("contents"), &reader); + + CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("he")) == 0); + CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("abhorred")) == 0); + _CLDELETE(ts); + + //now test with dutch + reader.reset(0); + a.setLanguage(_T("Dutch")); + a.setStem(true); + ts = a.tokenStream(_T("contents"), &reader); + + CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("he")) == 0); + CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("abhorred")) == 0); + CLUCENE_ASSERT(ts->next(&t)); + CLUCENE_ASSERT(_tcscmp(t.termBuffer(), _T("accentuer")) == 0); + _CLDELETE(ts); +} + +CuSuite *testanalysis(void) { + CuSuite *suite = CuSuiteNew(_T("CLucene Analysis Test")); + + SUITE_ADD_TEST(suite, testFile); + SUITE_ADD_TEST(suite, testCJK); + SUITE_ADD_TEST(suite, testLanguageBasedAnalyzer); + + return suite; +} +// EOF Added: branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp =================================================================== --- branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp (rev 0) +++ branches/lucene2_3_2/src/contribs/contribs-lib-test/TestHighlight.cpp 2009-03-30 21:38:58 UTC (rev 2977) @@ -0,0 +1,263 @@ +#include "test.h" + +#include "CLucene/store/RAMDirectory.h" +#include "CLucene/highlighter/QueryTermExtractor.h" +#include "CLucene/highlighter/QueryScorer.h" +#include "CLucene/highlighter/Highlighter.h" +#include "CLucene/highlighter/TokenGroup.h" +#include "CLucene/highlighter/SimpleHTMLFormatter.h" +#include "CLucene/highlighter/SimpleFragmenter.h" + +CL_NS_USE2(search, highlight); + +RAMDirectory hl_ramDir; +StandardAnalyzer hl_analyzer; + + +const TCHAR* hl_FIELD_NAME = _T("contents"); +Query* hl_originalquery = NULL; +Query* hl_query = NULL; +Query* hl_rewrittenquery = NULL; +IndexReader* hl_reader = NULL; +Searcher* hl_searcher = NULL; +Hits* hl_hits = NULL; + +class hl_formatterCls : public Formatter { +public: + int numHighlights; + + hl_formatterCls() { + numHighlights = 0; + } + + ~hl_formatterCls() { + } + + TCHAR* highlightTerm(const TCHAR* originalText, const TokenGroup* group) { + if (group->getTotalScore() <= 0) { + return STRDUP_TtoT(originalText); + } + numHighlights++; //update stats used in assertions + + int len = _tcslen(originalText) + 7; + TCHAR* ret = _CL_NEWARRAY(TCHAR, len + 1); + _tcscpy(ret, _T("<b>")); + _tcscat(ret, originalText); + _tcscat(ret, _T("</b>")); + + return ret; + } +}; +hl_formatterCls hl_formatter; + + +const TCHAR* hl_texts[6] ={ + _T("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"), + _T("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"), + _T("JFK has been shot"), + _T("John Kennedy has been shot"), + _T("This text has a typo in referring to Keneddy"), + NULL +}; + +void doStandardHighlights(CuTest* tc) { + QueryScorer scorer(hl_query); + Highlighter highlighter(&hl_formatter, &scorer); + SimpleFragmenter frag(20); + highlighter.setTextFragmenter(&frag); + + for (int i = 0; i < hl_hits->length(); i++) { + const TCHAR* text = hl_hits->doc(i).get(hl_FIELD_NAME); + int maxNumFragmentsRequired = 2; + const TCHAR* fragmentSeparator = _T("..."); + StringReader reader(text); + TokenStream* tokenStream = hl_analyzer.tokenStream(hl_FIELD_NAME, &reader); + + TCHAR* result = + highlighter.getBestFragments( + tokenStream, + text, + maxNumFragmentsRequired, + fragmentSeparator); + + CuMessage(tc, _T("%s\n"), result == NULL ? _T("") : result); + _CLDELETE_CARRAY(result); + _CLDELETE(tokenStream); + } +} + +void doSearching(CuTest* tc, const TCHAR* queryString) { + if (hl_searcher == NULL) + hl_searcher = _CLNEW IndexSearcher(&hl_ramDir); + + if (hl_rewrittenquery != NULL && hl_originalquery != NULL) { + if (hl_originalquery != hl_rewrittenquery) + _CLDELETE(hl_rewrittenquery); + _CLDELETE(hl_originalquery); + } + hl_originalquery = QueryParser::parse(queryString, hl_FIELD_NAME, &hl_analyzer); + + //for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query! + hl_rewrittenquery = hl_originalquery->rewrite(hl_reader); + hl_query = hl_rewrittenquery; + + TCHAR* s = hl_originalquery->toString(hl_FIELD_NAME); + CuMessage(tc, _T("Searching for: %s\n"), s == NULL ? _T("") : s); + _CLDELETE_CARRAY(s); + + s = hl_rewrittenquery->toString(hl_FIELD_NAME); + CuMessage(tc, _T("Rewritten query: %s\n"), s == NULL ? _T("") : s); + _CLDELETE_CARRAY(s); + + if (hl_hits != NULL) + _CLDELETE(hl_hits); + hl_hits = hl_searcher->search(hl_query); + hl_formatter.numHighlights = 0; +} + +void testSimpleHighlighter(CuTest *tc) { + doSearching(tc, _T("Kennedy")); + QueryScorer scorer(hl_query); + Highlighter highlighter(&scorer); + SimpleFragmenter fragmenter(40); + + highlighter.setTextFragmenter(&fragmenter); + int maxNumFragmentsRequired = 2; + for (int i = 0; i < hl_hits->length(); i++) { + const TCHAR* text = hl_hits->doc(i).get(hl_FIELD_NAME); + StringReader reader(text); + TokenStream* tokenStream = hl_analyzer.tokenStream(hl_FIELD_NAME, &reader); + + TCHAR* result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, _T("...")); + CuMessage(tc, _T("%s\n"), result == NULL ? _T("") : result); + _CLDELETE_CARRAY(result); + _CLDELETE(tokenStream); + } + //Not sure we can assert anything here - just running to check we dont throw any exceptions +} + +void testGetFuzzyFragments(CuTest *tc) { + doSearching(tc, _T("Kinnedy~")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 5); +} + +void testGetWildCardFragments(CuTest *tc) { + doSearching(tc, _T("K?nnedy")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 4); +} + +void testGetBestFragmentsSimpleQuery(CuTest *tc) { + doSearching(tc, _T("Kennedy")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 4); +} + +void testGetMidWildCardFragments(CuTest *tc) { + doSearching(tc, _T("K*dy")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 5); +} + +void testGetBestFragmentsPhrase(CuTest *tc) { + doSearching(tc, _T("\"John Kennedy\"")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 2); +} + +void testGetBestFragmentsMultiTerm(CuTest *tc) { + doSearching(tc, _T("John Ken*")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 6); +} + +void testGetBestFragmentsWithOr(CuTest *tc) { + doSearching(tc, _T("JFK OR Kennedy")); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 5); +} + +void testGetRangeFragments(CuTest *tc) { + TCHAR qry[200]; + _sntprintf(qry, 200, _T("%s:[Kannedy TO Kznnedy]"), hl_FIELD_NAME); //bug?needs lower case + + doSearching(tc, qry); + doStandardHighlights(tc); + + TCHAR msg[1024]; + _sntprintf(msg, 1024, _T("Failed to find correct number of highlights %d found"), hl_formatter.numHighlights); + CuAssert(tc, msg, hl_formatter.numHighlights == 5); +} + +void setupHighlighter(CuTest *tc) { + IndexWriter writer(&hl_ramDir, &hl_analyzer, true); + for (int i = 0; hl_texts[i] != NULL; i++) { + Document d; + d.add(*_CLNEW Field(hl_FIELD_NAME, hl_texts[i], Field::STORE_YES | Field::INDEX_TOKENIZED)); + writer.addD... [truncated message content] |