From: <ust...@us...> - 2009-07-08 10:09:38
|
Revision: 3018 http://clucene.svn.sourceforge.net/clucene/?rev=3018&view=rev Author: ustramooner Date: 2009-07-08 10:09:32 +0000 (Wed, 08 Jul 2009) Log Message: ----------- Fixes mem-leaks in QP, and adds more tests Modified Paths: -------------- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -542,6 +542,7 @@ const size_t inputLen = _tcslen(input); bool outputOwned=false; if (output == NULL){ + // TODO: Perhaps we can re-use an inner buffer instead of creating new char arrays here and in several other places output = _CL_NEWARRAY(TCHAR, inputLen + 1); outputOwned=true; } @@ -565,7 +566,12 @@ for (size_t i = 0; i < inputLen; i++) { TCHAR curChar = input[i]; if (codePointMultiplier > 0) { - codePoint += hexToInt(curChar) * codePointMultiplier; + try { + codePoint += hexToInt(curChar) * codePointMultiplier; + } catch (CLuceneError& e) { + if (outputOwned)_CLDELETE_LCARRAY(output); + throw e; + } codePointMultiplier = codePointMultiplier >> 4; if (codePointMultiplier == 0) { output[length++] = (TCHAR)codePoint; @@ -621,7 +627,7 @@ } //static -TCHAR* QueryParser::escape(TCHAR* s) { +TCHAR* QueryParser::escape(const TCHAR* s) { size_t len = _tcslen(s); // Create a StringBuffer object a bit longer from the length of the query (to prevent some reallocations), // and declare we are the owners of the buffer (to save on a copy) @@ -703,15 +709,16 @@ } Query* QueryParser::TopLevelQuery(TCHAR* _field) { - Query* q; + Query* q = NULL;; try { q = fQuery(_field); + jj_consume_token(0); } catch (CLuceneError& e) { if (_field!=field)_CLDELETE_LCARRAY(_field); + _CLLDELETE(q); throw e; } if (_field!=field)_CLDELETE_LCARRAY(_field); - jj_consume_token(0); return q; } Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:09:32 UTC (rev 3018) @@ -447,7 +447,7 @@ * * @memory caller is responsible to free the returned string */ - static TCHAR* escape(TCHAR* s); + static TCHAR* escape(const TCHAR* s); // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp =================================================================== --- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -1228,6 +1228,7 @@ } // TODO: TokenMgrError.LEXICAL_ERROR ? TCHAR* err = getLexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar); + _CLDELETE_LCARRAY(error_after); _CLTHROWT_DEL(CL_ERR_TokenMgr,err); } } @@ -1235,6 +1236,7 @@ TCHAR* QueryParserTokenManager::getLexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, TCHAR* errorAfter, TCHAR curChar) { + TCHAR* tmp = NULL; CL_NS(util)::StringBuffer sb(100, false); sb.append(_T("Lexical error at line ")); sb.appendInt(errorLine); @@ -1252,7 +1254,11 @@ sb.append(_T("), ")); } sb.append(_T("after : \"")); - sb.append(addEscapes(errorAfter)); + + tmp = addEscapes(errorAfter); + sb.append(tmp); + _CLDELETE_LCARRAY(tmp); + sb.appendChar(_T('"')); return sb.getBuffer(); } Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp =================================================================== --- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017) +++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018) @@ -168,7 +168,7 @@ CuAssertTrue(tc,ret); } -void assertParseException(CuTest *tc,TCHAR* queryString) { +void assertParseException(CuTest *tc,const TCHAR* queryString) { try { Query* q = getQuery(tc,queryString, NULL, CL_ERR_Parse); } catch (CLuceneError&){ @@ -177,6 +177,44 @@ CuFail(tc,_T("ParseException expected, not thrown")); } +void assertEscapedQueryEquals(CuTest *tc,const TCHAR* query, Analyzer* a, const TCHAR* result){ + TCHAR* escapedQuery = QueryParser::escape(query); + if (_tcscmp(escapedQuery, result) != 0) { + TCHAR str[CL_MAX_PATH]; + _tcscpy(str,escapedQuery); + _CLDELETE_LCARRAY(escapedQuery); + CuFail(tc, _T("Query /%s/ yielded /%s/, expecting /%s/\n"), query, escapedQuery, result); + } + _CLDELETE_LCARRAY(escapedQuery); +} + +Query* getQueryDOA(const TCHAR* query, Analyzer* a=NULL) { + bool bOwnsAnalyzer=false; + if (a == NULL){ + a = _CLNEW SimpleAnalyzer(); + bOwnsAnalyzer=true; + } + QueryParser* qp = _CLNEW QueryParser(_T("field"), a); + qp->setDefaultOperator(QueryParser::AND_OPERATOR); + Query* q = qp->parse(query); + _CLLDELETE(qp); + if (bOwnsAnalyzer) _CLLDELETE(a); + return q; +} + +void assertQueryEqualsDOA(CuTest *tc,const TCHAR* query, Analyzer* a, TCHAR* result){ + Query* q = getQueryDOA(query, a); + TCHAR* s = q->toString(_T("field")); + _CLLDELETE(q); + if (_tcscmp(s,result)!=0) { + TCHAR str[CL_MAX_PATH]; + _tcscpy(str,s); + _CLDELETE_LCARRAY(s); + CuFail(tc,_T("Query /%s/ yielded /%s/, expecting /%s/"),query, str, result); + } + _CLDELETE_LCARRAY(s); +} + /// END Helper functions and classes void testSimple(CuTest *tc) { @@ -528,6 +566,122 @@ // TODO: testLegacyDateRange, testDateRange +void testQueryStringEscaping(CuTest *tc) { + WhitespaceAnalyzer a; + + assertEscapedQueryEquals(tc, _T("a-b:c"), &a, _T("a\\-b\\:c")); + assertEscapedQueryEquals(tc,_T("a+b:c"), &a, _T("a\\+b\\:c")); + assertEscapedQueryEquals(tc, _T("a:b:c"), &a, _T("a\\:b\\:c")); + assertEscapedQueryEquals(tc, _T("a\\b:c"), &a, _T("a\\\\b\\:c")); + + assertEscapedQueryEquals(tc,_T("a:b-c"), &a, _T("a\\:b\\-c")); + assertEscapedQueryEquals(tc,_T("a:b+c"), &a, _T("a\\:b\\+c")); + assertEscapedQueryEquals(tc,_T("a:b:c"), &a, _T("a\\:b\\:c")); + assertEscapedQueryEquals(tc,_T("a:b\\c"), &a, _T("a\\:b\\\\c")); + + assertEscapedQueryEquals(tc,_T("a:b-c*"), &a, _T("a\\:b\\-c\\*")); + assertEscapedQueryEquals(tc,_T("a:b+c*"), &a, _T("a\\:b\\+c\\*")); + assertEscapedQueryEquals(tc,_T("a:b:c*"), &a, _T("a\\:b\\:c\\*")); + + assertEscapedQueryEquals(tc,_T("a:b\\\\c*"), &a, _T("a\\:b\\\\\\\\c\\*")); + + assertEscapedQueryEquals(tc,_T("a:b-?c"), &a, _T("a\\:b\\-\\?c")); + assertEscapedQueryEquals(tc,_T("a:b+?c"), &a, _T("a\\:b\\+\\?c")); + assertEscapedQueryEquals(tc,_T("a:b:?c"), &a, _T("a\\:b\\:\\?c")); + + assertEscapedQueryEquals(tc,_T("a:b?c"), &a, _T("a\\:b\\?c")); + + assertEscapedQueryEquals(tc,_T("a:b-c~"), &a, _T("a\\:b\\-c\\~")); + assertEscapedQueryEquals(tc,_T("a:b+c~"), &a, _T("a\\:b\\+c\\~")); + assertEscapedQueryEquals(tc,_T("a:b:c~"), &a, _T("a\\:b\\:c\\~")); + assertEscapedQueryEquals(tc,_T("a:b\\c~"), &a, _T("a\\:b\\\\c\\~")); + + assertEscapedQueryEquals(tc,_T("[ a - TO a+ ]"), NULL, _T("\\[ a \\- TO a\\+ \\]")); + assertEscapedQueryEquals(tc,_T("[ a : TO a~ ]"), NULL, _T("\\[ a \\: TO a\\~ \\]")); + assertEscapedQueryEquals(tc,_T("[ a\\ TO a* ]"), NULL, _T("\\[ a\\\\ TO a\\* \\]")); + + // LUCENE-881 + assertEscapedQueryEquals(tc,_T("|| abc ||"), &a, _T("\\|\\| abc \\|\\|")); + assertEscapedQueryEquals(tc,_T("&& abc &&"), &a, _T("\\&\\& abc \\&\\&")); +} + +void testTabNewlineCarriageReturn(CuTest *tc){ + assertQueryEqualsDOA(tc,_T("+weltbank +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \n +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\r+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\r\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r\n+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r\n +worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \r \n +worlbank"), NULL, + _T("+weltbank +worlbank")); + + assertQueryEqualsDOA(tc,_T("+weltbank\t+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \t+worlbank"), NULL, + _T("+weltbank +worlbank")); + assertQueryEqualsDOA(tc,_T("weltbank \t +worlbank"), NULL, + _T("+weltbank +worlbank")); +} + +void testSimpleDAO(CuTest *tc){ + assertQueryEqualsDOA(tc,_T("term term term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term +term term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term term +term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("term +term +term"), NULL, _T("+term +term +term")); + assertQueryEqualsDOA(tc,_T("-term term term"), NULL, _T("-term +term +term")); +} + +void testBoost(CuTest *tc){ + const TCHAR* stopWords[] = {_T("on"), NULL}; + StandardAnalyzer* oneStopAnalyzer = _CLNEW StandardAnalyzer(reinterpret_cast<const TCHAR**>(&stopWords)); + QueryParser* qp = _CLNEW QueryParser(_T("field"), oneStopAnalyzer); + Query* q = qp->parse(_T("on^1.0")); + CLUCENE_ASSERT(q != NULL); + _CLLDELETE(q); + q = qp->parse(_T("\"hello\"^2.0")); + CLUCENE_ASSERT(q != NULL); + CLUCENE_ASSERT(q->getBoost() == 2.0f); + _CLLDELETE(q); + q = qp->parse(_T("hello^2.0")); + CLUCENE_ASSERT(q != NULL); + CLUCENE_ASSERT(q->getBoost() == 2.0f); + _CLLDELETE(q); + q = qp->parse(_T("\"on\"^1.0")); + CLUCENE_ASSERT(q != NULL); + _CLLDELETE(q); + _CLLDELETE(qp); + _CLLDELETE(oneStopAnalyzer); + + StandardAnalyzer a; + QueryParser* qp2 = _CLNEW QueryParser(_T("field"), &a); + q = qp2->parse(_T("the^3")); + // "the" is a stop word so the result is an empty query: + CLUCENE_ASSERT(q != NULL); + TCHAR* tmp = q->toString(); + CLUCENE_ASSERT( _tcscmp(tmp, _T("")) == 0 ); + _CLDELETE_LCARRAY(tmp); + CLUCENE_ASSERT(1.0f == q->getBoost()); + _CLLDELETE(q); + _CLLDELETE(qp2); +} + void testMatchAllDocs(CuTest *tc) { WhitespaceAnalyzer a; QueryParser* qp = _CLNEW QueryParser(_T("field"), &a); @@ -550,16 +704,19 @@ CuSuite *suite = CuSuiteNew(_T("CLucene Query Parser Test")); SUITE_ADD_TEST(suite, testSimple); + SUITE_ADD_TEST(suite, testPunct); + SUITE_ADD_TEST(suite, testSlop); + SUITE_ADD_TEST(suite, testNumber); + SUITE_ADD_TEST(suite, testWildcard); SUITE_ADD_TEST(suite, testLeadingWildcardType); SUITE_ADD_TEST(suite, testQPA); + SUITE_ADD_TEST(suite, testRange); SUITE_ADD_TEST(suite, testEscaped); - SUITE_ADD_TEST(suite, testNumber); - SUITE_ADD_TEST(suite, testPunct); + SUITE_ADD_TEST(suite, testQueryStringEscaping); + SUITE_ADD_TEST(suite, testTabNewlineCarriageReturn); + SUITE_ADD_TEST(suite, testSimpleDAO); + SUITE_ADD_TEST(suite, testBoost); - SUITE_ADD_TEST(suite, testSlop); - SUITE_ADD_TEST(suite, testRange); - SUITE_ADD_TEST(suite, testWildcard); - SUITE_ADD_TEST(suite, testMatchAllDocs); return suite; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |