|
From: <ust...@us...> - 2009-07-08 10:09:38
|
Revision: 3018
http://clucene.svn.sourceforge.net/clucene/?rev=3018&view=rev
Author: ustramooner
Date: 2009-07-08 10:09:32 +0000 (Wed, 08 Jul 2009)
Log Message:
-----------
Fixes mem-leaks in QP, and adds more tests
Modified Paths:
--------------
branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp
branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h
branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp
branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp
Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017)
+++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018)
@@ -542,6 +542,7 @@
const size_t inputLen = _tcslen(input);
bool outputOwned=false;
if (output == NULL){
+ // TODO: Perhaps we can re-use an inner buffer instead of creating new char arrays here and in several other places
output = _CL_NEWARRAY(TCHAR, inputLen + 1);
outputOwned=true;
}
@@ -565,7 +566,12 @@
for (size_t i = 0; i < inputLen; i++) {
TCHAR curChar = input[i];
if (codePointMultiplier > 0) {
- codePoint += hexToInt(curChar) * codePointMultiplier;
+ try {
+ codePoint += hexToInt(curChar) * codePointMultiplier;
+ } catch (CLuceneError& e) {
+ if (outputOwned)_CLDELETE_LCARRAY(output);
+ throw e;
+ }
codePointMultiplier = codePointMultiplier >> 4;
if (codePointMultiplier == 0) {
output[length++] = (TCHAR)codePoint;
@@ -621,7 +627,7 @@
}
//static
-TCHAR* QueryParser::escape(TCHAR* s) {
+TCHAR* QueryParser::escape(const TCHAR* s) {
size_t len = _tcslen(s);
// Create a StringBuffer object a bit longer from the length of the query (to prevent some reallocations),
// and declare we are the owners of the buffer (to save on a copy)
@@ -703,15 +709,16 @@
}
Query* QueryParser::TopLevelQuery(TCHAR* _field) {
- Query* q;
+ Query* q = NULL;;
try {
q = fQuery(_field);
+ jj_consume_token(0);
} catch (CLuceneError& e) {
if (_field!=field)_CLDELETE_LCARRAY(_field);
+ _CLLDELETE(q);
throw e;
}
if (_field!=field)_CLDELETE_LCARRAY(_field);
- jj_consume_token(0);
return q;
}
Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:08:46 UTC (rev 3017)
+++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParser.h 2009-07-08 10:09:32 UTC (rev 3018)
@@ -447,7 +447,7 @@
*
* @memory caller is responsible to free the returned string
*/
- static TCHAR* escape(TCHAR* s);
+ static TCHAR* escape(const TCHAR* s);
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
Modified: branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:08:46 UTC (rev 3017)
+++ branches/lucene2_3_2/src/core/CLucene/queryParser/QueryParserTokenManager.cpp 2009-07-08 10:09:32 UTC (rev 3018)
@@ -1228,6 +1228,7 @@
}
// TODO: TokenMgrError.LEXICAL_ERROR ?
TCHAR* err = getLexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar);
+ _CLDELETE_LCARRAY(error_after);
_CLTHROWT_DEL(CL_ERR_TokenMgr,err);
}
}
@@ -1235,6 +1236,7 @@
TCHAR* QueryParserTokenManager::getLexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine,
int32_t errorColumn, TCHAR* errorAfter, TCHAR curChar)
{
+ TCHAR* tmp = NULL;
CL_NS(util)::StringBuffer sb(100, false);
sb.append(_T("Lexical error at line "));
sb.appendInt(errorLine);
@@ -1252,7 +1254,11 @@
sb.append(_T("), "));
}
sb.append(_T("after : \""));
- sb.append(addEscapes(errorAfter));
+
+ tmp = addEscapes(errorAfter);
+ sb.append(tmp);
+ _CLDELETE_LCARRAY(tmp);
+
sb.appendChar(_T('"'));
return sb.getBuffer();
}
Modified: branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp
===================================================================
--- branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:08:46 UTC (rev 3017)
+++ branches/lucene2_3_2/src/test/queryParser/TestQueryParser.cpp 2009-07-08 10:09:32 UTC (rev 3018)
@@ -168,7 +168,7 @@
CuAssertTrue(tc,ret);
}
-void assertParseException(CuTest *tc,TCHAR* queryString) {
+void assertParseException(CuTest *tc,const TCHAR* queryString) {
try {
Query* q = getQuery(tc,queryString, NULL, CL_ERR_Parse);
} catch (CLuceneError&){
@@ -177,6 +177,44 @@
CuFail(tc,_T("ParseException expected, not thrown"));
}
+void assertEscapedQueryEquals(CuTest *tc,const TCHAR* query, Analyzer* a, const TCHAR* result){
+ TCHAR* escapedQuery = QueryParser::escape(query);
+ if (_tcscmp(escapedQuery, result) != 0) {
+ TCHAR str[CL_MAX_PATH];
+ _tcscpy(str,escapedQuery);
+ _CLDELETE_LCARRAY(escapedQuery);
+ CuFail(tc, _T("Query /%s/ yielded /%s/, expecting /%s/\n"), query, escapedQuery, result);
+ }
+ _CLDELETE_LCARRAY(escapedQuery);
+}
+
+Query* getQueryDOA(const TCHAR* query, Analyzer* a=NULL) {
+ bool bOwnsAnalyzer=false;
+ if (a == NULL){
+ a = _CLNEW SimpleAnalyzer();
+ bOwnsAnalyzer=true;
+ }
+ QueryParser* qp = _CLNEW QueryParser(_T("field"), a);
+ qp->setDefaultOperator(QueryParser::AND_OPERATOR);
+ Query* q = qp->parse(query);
+ _CLLDELETE(qp);
+ if (bOwnsAnalyzer) _CLLDELETE(a);
+ return q;
+}
+
+void assertQueryEqualsDOA(CuTest *tc,const TCHAR* query, Analyzer* a, TCHAR* result){
+ Query* q = getQueryDOA(query, a);
+ TCHAR* s = q->toString(_T("field"));
+ _CLLDELETE(q);
+ if (_tcscmp(s,result)!=0) {
+ TCHAR str[CL_MAX_PATH];
+ _tcscpy(str,s);
+ _CLDELETE_LCARRAY(s);
+ CuFail(tc,_T("Query /%s/ yielded /%s/, expecting /%s/"),query, str, result);
+ }
+ _CLDELETE_LCARRAY(s);
+}
+
/// END Helper functions and classes
void testSimple(CuTest *tc) {
@@ -528,6 +566,122 @@
// TODO: testLegacyDateRange, testDateRange
+void testQueryStringEscaping(CuTest *tc) {
+ WhitespaceAnalyzer a;
+
+ assertEscapedQueryEquals(tc, _T("a-b:c"), &a, _T("a\\-b\\:c"));
+ assertEscapedQueryEquals(tc,_T("a+b:c"), &a, _T("a\\+b\\:c"));
+ assertEscapedQueryEquals(tc, _T("a:b:c"), &a, _T("a\\:b\\:c"));
+ assertEscapedQueryEquals(tc, _T("a\\b:c"), &a, _T("a\\\\b\\:c"));
+
+ assertEscapedQueryEquals(tc,_T("a:b-c"), &a, _T("a\\:b\\-c"));
+ assertEscapedQueryEquals(tc,_T("a:b+c"), &a, _T("a\\:b\\+c"));
+ assertEscapedQueryEquals(tc,_T("a:b:c"), &a, _T("a\\:b\\:c"));
+ assertEscapedQueryEquals(tc,_T("a:b\\c"), &a, _T("a\\:b\\\\c"));
+
+ assertEscapedQueryEquals(tc,_T("a:b-c*"), &a, _T("a\\:b\\-c\\*"));
+ assertEscapedQueryEquals(tc,_T("a:b+c*"), &a, _T("a\\:b\\+c\\*"));
+ assertEscapedQueryEquals(tc,_T("a:b:c*"), &a, _T("a\\:b\\:c\\*"));
+
+ assertEscapedQueryEquals(tc,_T("a:b\\\\c*"), &a, _T("a\\:b\\\\\\\\c\\*"));
+
+ assertEscapedQueryEquals(tc,_T("a:b-?c"), &a, _T("a\\:b\\-\\?c"));
+ assertEscapedQueryEquals(tc,_T("a:b+?c"), &a, _T("a\\:b\\+\\?c"));
+ assertEscapedQueryEquals(tc,_T("a:b:?c"), &a, _T("a\\:b\\:\\?c"));
+
+ assertEscapedQueryEquals(tc,_T("a:b?c"), &a, _T("a\\:b\\?c"));
+
+ assertEscapedQueryEquals(tc,_T("a:b-c~"), &a, _T("a\\:b\\-c\\~"));
+ assertEscapedQueryEquals(tc,_T("a:b+c~"), &a, _T("a\\:b\\+c\\~"));
+ assertEscapedQueryEquals(tc,_T("a:b:c~"), &a, _T("a\\:b\\:c\\~"));
+ assertEscapedQueryEquals(tc,_T("a:b\\c~"), &a, _T("a\\:b\\\\c\\~"));
+
+ assertEscapedQueryEquals(tc,_T("[ a - TO a+ ]"), NULL, _T("\\[ a \\- TO a\\+ \\]"));
+ assertEscapedQueryEquals(tc,_T("[ a : TO a~ ]"), NULL, _T("\\[ a \\: TO a\\~ \\]"));
+ assertEscapedQueryEquals(tc,_T("[ a\\ TO a* ]"), NULL, _T("\\[ a\\\\ TO a\\* \\]"));
+
+ // LUCENE-881
+ assertEscapedQueryEquals(tc,_T("|| abc ||"), &a, _T("\\|\\| abc \\|\\|"));
+ assertEscapedQueryEquals(tc,_T("&& abc &&"), &a, _T("\\&\\& abc \\&\\&"));
+}
+
+void testTabNewlineCarriageReturn(CuTest *tc){
+ assertQueryEqualsDOA(tc,_T("+weltbank +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+
+ assertQueryEqualsDOA(tc,_T("+weltbank\n+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \n+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \n +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+
+ assertQueryEqualsDOA(tc,_T("+weltbank\r+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \r+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \r +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+
+ assertQueryEqualsDOA(tc,_T("+weltbank\r\n+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \r\n+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \r\n +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \r \n +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+
+ assertQueryEqualsDOA(tc,_T("+weltbank\t+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \t+worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+ assertQueryEqualsDOA(tc,_T("weltbank \t +worlbank"), NULL,
+ _T("+weltbank +worlbank"));
+}
+
+void testSimpleDAO(CuTest *tc){
+ assertQueryEqualsDOA(tc,_T("term term term"), NULL, _T("+term +term +term"));
+ assertQueryEqualsDOA(tc,_T("term +term term"), NULL, _T("+term +term +term"));
+ assertQueryEqualsDOA(tc,_T("term term +term"), NULL, _T("+term +term +term"));
+ assertQueryEqualsDOA(tc,_T("term +term +term"), NULL, _T("+term +term +term"));
+ assertQueryEqualsDOA(tc,_T("-term term term"), NULL, _T("-term +term +term"));
+}
+
+void testBoost(CuTest *tc){
+ const TCHAR* stopWords[] = {_T("on"), NULL};
+ StandardAnalyzer* oneStopAnalyzer = _CLNEW StandardAnalyzer(reinterpret_cast<const TCHAR**>(&stopWords));
+ QueryParser* qp = _CLNEW QueryParser(_T("field"), oneStopAnalyzer);
+ Query* q = qp->parse(_T("on^1.0"));
+ CLUCENE_ASSERT(q != NULL);
+ _CLLDELETE(q);
+ q = qp->parse(_T("\"hello\"^2.0"));
+ CLUCENE_ASSERT(q != NULL);
+ CLUCENE_ASSERT(q->getBoost() == 2.0f);
+ _CLLDELETE(q);
+ q = qp->parse(_T("hello^2.0"));
+ CLUCENE_ASSERT(q != NULL);
+ CLUCENE_ASSERT(q->getBoost() == 2.0f);
+ _CLLDELETE(q);
+ q = qp->parse(_T("\"on\"^1.0"));
+ CLUCENE_ASSERT(q != NULL);
+ _CLLDELETE(q);
+ _CLLDELETE(qp);
+ _CLLDELETE(oneStopAnalyzer);
+
+ StandardAnalyzer a;
+ QueryParser* qp2 = _CLNEW QueryParser(_T("field"), &a);
+ q = qp2->parse(_T("the^3"));
+ // "the" is a stop word so the result is an empty query:
+ CLUCENE_ASSERT(q != NULL);
+ TCHAR* tmp = q->toString();
+ CLUCENE_ASSERT( _tcscmp(tmp, _T("")) == 0 );
+ _CLDELETE_LCARRAY(tmp);
+ CLUCENE_ASSERT(1.0f == q->getBoost());
+ _CLLDELETE(q);
+ _CLLDELETE(qp2);
+}
+
void testMatchAllDocs(CuTest *tc) {
WhitespaceAnalyzer a;
QueryParser* qp = _CLNEW QueryParser(_T("field"), &a);
@@ -550,16 +704,19 @@
CuSuite *suite = CuSuiteNew(_T("CLucene Query Parser Test"));
SUITE_ADD_TEST(suite, testSimple);
+ SUITE_ADD_TEST(suite, testPunct);
+ SUITE_ADD_TEST(suite, testSlop);
+ SUITE_ADD_TEST(suite, testNumber);
+ SUITE_ADD_TEST(suite, testWildcard);
SUITE_ADD_TEST(suite, testLeadingWildcardType);
SUITE_ADD_TEST(suite, testQPA);
+ SUITE_ADD_TEST(suite, testRange);
SUITE_ADD_TEST(suite, testEscaped);
- SUITE_ADD_TEST(suite, testNumber);
- SUITE_ADD_TEST(suite, testPunct);
+ SUITE_ADD_TEST(suite, testQueryStringEscaping);
+ SUITE_ADD_TEST(suite, testTabNewlineCarriageReturn);
+ SUITE_ADD_TEST(suite, testSimpleDAO);
+ SUITE_ADD_TEST(suite, testBoost);
- SUITE_ADD_TEST(suite, testSlop);
- SUITE_ADD_TEST(suite, testRange);
- SUITE_ADD_TEST(suite, testWildcard);
-
SUITE_ADD_TEST(suite, testMatchAllDocs);
return suite;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|