|
From: <syn...@us...> - 2009-01-03 23:03:21
|
Revision: 2948
http://clucene.svn.sourceforge.net/clucene/?rev=2948&view=rev
Author: synhershko
Date: 2009-01-03 23:03:09 +0000 (Sat, 03 Jan 2009)
Log Message:
-----------
Introducing NumberTools
Tweaked StringBuffer a bit
Modified Paths:
--------------
branches/lucene2_3_2/src/core/CLucene/debug/error.h
branches/lucene2_3_2/src/core/CLucene.h
branches/lucene2_3_2/src/core/CMakeLists.txt
branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h
branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp
branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h
branches/lucene2_3_2/src/test/CMakeLists.txt
branches/lucene2_3_2/src/test/CuTest.cpp
branches/lucene2_3_2/src/test/test.h
branches/lucene2_3_2/src/test/tests.cpp
Added Paths:
-----------
branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp
branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h
branches/lucene2_3_2/src/test/document/TestNumberTools.cpp
Modified: branches/lucene2_3_2/src/core/CLucene/debug/error.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/debug/error.h 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/core/CLucene/debug/error.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -25,6 +25,7 @@
#define CL_ERR_UnknownOperator 14
#define CL_ERR_ConcurrentModification 15
#define CL_ERR_CorruptIndex 16
+#define CL_ERR_NumberFormat 17
Added: branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/document/NumberTools.cpp 2009-01-03 23:03:09 UTC (rev 2948)
@@ -0,0 +1,81 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/_ApiHeader.h"
+
+// Defining function macros missing in specific enviroments
+#ifndef _ttoi
+ #define _ttoi(x) (int)_tcstoi64(x,NULL,10)
+#endif
+
+#include "NumberTools.h"
+#include "CLucene/util/Misc.h"
+#include "CLucene/util/StringBuffer.h"
+
+CL_NS_DEF(document)
+
+const TCHAR* NumberTools::MIN_STRING_VALUE = NEGATIVE_PREFIX _T("0000000000000");
+const TCHAR* NumberTools::MAX_STRING_VALUE = POSITIVE_PREFIX _T("1y2p0ij32e8e7");
+
+TCHAR* NumberTools::longToString(int64_t l)
+{
+ if (l == LUCENE_INT64_MIN_SHOULDBE) {
+ // special case, because long is not symetric around zero
+ return stringDuplicate(MIN_STRING_VALUE);
+ }
+
+ TCHAR* buf = _CL_NEWARRAY(TCHAR, STR_SIZE + 1);
+ if (l < 0) {
+ buf[0] = NEGATIVE_PREFIX[0];
+ l = LUCENE_INT64_MAX_SHOULDBE + l + 1;
+ } else {
+ buf[0] = POSITIVE_PREFIX[0];
+ }
+
+ TCHAR tmp[STR_SIZE];
+ _i64tot(l, tmp, NUMBERTOOLS_RADIX);
+ size_t len = _tcslen(tmp);
+ _tcscpy(buf+(STR_SIZE-len),tmp);
+ for ( int32_t i=1;i<STR_SIZE-len;i++ )
+ buf[i] = (int)'0';
+
+ buf[STR_SIZE+1] = 0;
+
+ return buf;
+}
+
+int64_t NumberTools::stringToLong(TCHAR* str) {
+ if (str == NULL) {
+ _CLTHROWA(CL_ERR_NullPointer,"string cannot be null");
+ }
+ if (_tcslen(str) != STR_SIZE) {
+ _CLTHROWA(CL_ERR_NumberFormat,"string is the wrong size");
+ }
+
+ if (_tcscmp(str, MIN_STRING_VALUE) == 0) {
+ return LUCENE_INT64_MIN_SHOULDBE;
+ }
+
+ TCHAR prefix = str[0];
+
+ TCHAR* sentinel = NULL;
+ int64_t l = _tcstoi64(++str, &sentinel, NUMBERTOOLS_RADIX);
+
+ if (prefix == POSITIVE_PREFIX[0]) {
+ // nop
+ } else if (prefix == NEGATIVE_PREFIX[0]) {
+ l = l - LUCENE_INT64_MAX_SHOULDBE - 1;
+ } else {
+ _CLTHROWA(CL_ERR_NumberFormat,"string does not begin with the correct prefix");
+ }
+
+ return l;
+}
+
+NumberTools::~NumberTools(){
+}
+
+CL_NS_END
Added: branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h (rev 0)
+++ branches/lucene2_3_2/src/core/CLucene/document/NumberTools.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -0,0 +1,75 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_document_NumberTools_
+#define _lucene_document_NumberTools_
+
+CL_NS_DEF(document)
+
+/**
+ * Provides support for converting longs to Strings, and back again. The strings
+ * are structured so that lexicographic sorting order is preserved.
+ *
+ * <p>
+ * That is, if l1 is less than l2 for any two longs l1 and l2, then
+ * NumberTools.longToString(l1) is lexicographically less than
+ * NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ *
+ * <p>
+ * This class handles <b>all</b> long values (unlike
+ * {@link org.apache.lucene.document.DateField}).
+ *
+ *
+ */
+class CLUCENE_EXPORT NumberTools :LUCENE_BASE {
+
+ #define NUMBERTOOLS_RADIX 36
+
+ #define NEGATIVE_PREFIX _T("-")
+ // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+ #define POSITIVE_PREFIX _T("0")
+
+public:
+ //NB: this must be less than
+ /**
+ * Equivalent to longToString(Long.MIN_VALUE); STR_SIZE is depandant on the length of it
+ */
+ LUCENE_STATIC_CONSTANT(TCHAR*, MIN_STRING_VALUE);
+
+ /**
+ * Equivalent to longToString(Long.MAX_VALUE)
+ */
+ LUCENE_STATIC_CONSTANT(TCHAR*, MAX_STRING_VALUE);
+
+ /**
+ * The length of (all) strings returned by {@link #longToString}
+ */
+ LUCENE_STATIC_CONSTANT (int32_t, STR_SIZE = 14);
+
+ /**
+ * Converts a long to a String suitable for indexing.
+ *
+ * @memory Caller should free the returned buffer
+ */
+ static TCHAR* longToString(int64_t l);
+
+ /**
+ * Converts a String that was returned by {@link #longToString} back to a
+ * long.
+ *
+ * @throws IllegalArgumentException
+ * if the input is null
+ * @throws NumberFormatException
+ * if the input does not parse (it was not a String returned by
+ * longToString()).
+ */
+ static int64_t stringToLong(TCHAR* str);
+
+ ~NumberTools();
+
+};
+CL_NS_END
+#endif
Modified: branches/lucene2_3_2/src/core/CLucene.h
===================================================================
--- branches/lucene2_3_2/src/core/CLucene.h 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/core/CLucene.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -32,6 +32,7 @@
#include "CLucene/document/Field.h"
#include "CLucene/document/DateField.h"
#include "CLucene/document/DateTools.h"
+#include "CLucene/document/NumberTools.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/FSDirectory.h"
#include "CLucene/queryParser/QueryParser.h"
Modified: branches/lucene2_3_2/src/core/CMakeLists.txt
===================================================================
--- branches/lucene2_3_2/src/core/CMakeLists.txt 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/core/CMakeLists.txt 2009-01-03 23:03:09 UTC (rev 2948)
@@ -52,6 +52,7 @@
./CLucene/document/DateTools.cpp
./CLucene/document/Field.cpp
./CLucene/document/FieldSelector.cpp
+ ./CLucene/document/NumberTools.cpp
./CLucene/index/IndexFileNames.cpp
./CLucene/index/SegmentMergeInfo.cpp
./CLucene/index/SegmentInfos.cpp
Modified: branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h
===================================================================
--- branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/shared/CLucene/_SharedHeader.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -12,6 +12,9 @@
#include "CLucene/_clucene-config.h"
#include "CLucene/SharedHeader.h"
+#define LUCENE_INT64_MAX_SHOULDBE _ILONGLONG(0x7FFFFFFFFFFFFFFF)
+#define LUCENE_INT64_MIN_SHOULDBE (-LUCENE_INT64_MAX_SHOULDBE - _ILONGLONG(1) )
+
//required globally (internally only)
#include <stdio.h>
#include <stdlib.h>
Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp
===================================================================
--- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.cpp 2009-01-03 23:03:09 UTC (rev 2948)
@@ -30,7 +30,7 @@
bufferOwner = true;
}
- StringBuffer::StringBuffer(const size_t initSize){
+ StringBuffer::StringBuffer(const size_t initSize, const bool consumeBuffer){
//Func - Constructor. Allocates a buffer of length initSize + 1
//Pre - initSize > 0
//Post - A buffer has been allocated of length initSize + 1
@@ -40,7 +40,7 @@
len = 0;
//Allocate a buffer of length bufferLength
buffer = _CL_NEWARRAY(TCHAR,bufferLength);
- bufferOwner = true;
+ bufferOwner = !consumeBuffer;
}
StringBuffer::StringBuffer(const TCHAR* value){
@@ -138,15 +138,15 @@
len += appendedLength;
}
- void StringBuffer::appendInt(const size_t value) {
+ void StringBuffer::appendInt(const int64_t value, const int32_t _Radix) {
//Func - Appends an integer (after conversion to a character string)
//Pre - true
//Post - The converted integer value has been appended to the string in buffer
//instantiate a buffer of 30 charactes for the conversion of the integer
TCHAR buf[30];
- //Convert the integer value to a string buf using the radix 10 (duh)
- _i64tot(value, buf, 10);
+ //Convert the integer value to a string buf using _Radix
+ _i64tot(value, buf, _Radix);
//Have the converted integer now stored in buf appended to the string in buffer
append(buf);
}
Modified: branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h
===================================================================
--- branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/shared/CLucene/util/StringBuffer.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -13,7 +13,7 @@
///Constructor. Allocates a buffer with the default length.
StringBuffer();
///Constructor. Allocates a buffer of length initSize + 1
- StringBuffer(const size_t initSize);
+ StringBuffer(const size_t initSize, const bool consumeBuffer = true);
///Constructor. Creates an instance of Stringbuffer containing a copy of
///the string value
StringBuffer(const TCHAR* value);
@@ -31,8 +31,8 @@
void append(const TCHAR* value);
///Appends a copy of the string value
void append(const TCHAR* value, size_t appendedLength);
- ///Appends an integer (after conversion to a character string)
- void appendInt(const size_t value);
+ ///Appends an integer (after conversion to a character string) with a default radix of 10. Radixes lower than 10 not supported.
+ void appendInt(const int64_t value, const int32_t _Radix = 10);
///Appends a float_t (after conversion to a character string)
void appendFloat(const float_t value, const size_t digits);
///Puts a copy of the string value in front of the current string in the StringBuffer
Modified: branches/lucene2_3_2/src/test/CMakeLists.txt
===================================================================
--- branches/lucene2_3_2/src/test/CMakeLists.txt 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/test/CMakeLists.txt 2009-01-03 23:03:09 UTC (rev 2948)
@@ -27,6 +27,7 @@
./analysis/TestAnalyzers.cpp
./debug/TestError.cpp
./document/TestDocument.cpp
+./document/TestNumberTools.cpp
./store/TestStore.cpp
./search/TestDateFilter.cpp
./search/TestForDuplicates.cpp
Modified: branches/lucene2_3_2/src/test/CuTest.cpp
===================================================================
--- branches/lucene2_3_2/src/test/CuTest.cpp 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/test/CuTest.cpp 2009-01-03 23:03:09 UTC (rev 2948)
@@ -250,8 +250,10 @@
CuString* message;
if (_tcscmp(expected, actual) == 0) return;
message = CuStringNew();
- CuStringAppend(message, preMessage);
- CuStringAppend(message, _T(" : ") );
+ if (preMessage) {
+ CuStringAppend(message, preMessage);
+ CuStringAppend(message, _T(" : ") );
+ }
CuStringAppend(message, _T("expected\n---->\n"));
CuStringAppend(message, expected);
CuStringAppend(message, _T("\n<----\nbut saw\n---->\n"));
Added: branches/lucene2_3_2/src/test/document/TestNumberTools.cpp
===================================================================
--- branches/lucene2_3_2/src/test/document/TestNumberTools.cpp (rev 0)
+++ branches/lucene2_3_2/src/test/document/TestNumberTools.cpp 2009-01-03 23:03:09 UTC (rev 2948)
@@ -0,0 +1,76 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "test.h"
+
+ void subtestTwoLongs(CuTest *tc, int64_t i, int64_t j) {
+ // convert to strings
+ TCHAR* a = NumberTools::longToString(i);
+ TCHAR* b = NumberTools::longToString(j);
+
+ // are they the right length?
+ CuAssertTrue(tc, NumberTools::STR_SIZE == _tcslen(a));
+ CuAssertTrue(tc, NumberTools::STR_SIZE == _tcslen(b));
+
+ // are they the right order?
+ if (i < j) {
+ CuAssertTrue(tc, _tcscmp(a,b) < 0);
+ } else if (i > j) {
+ CuAssertTrue(tc, _tcscmp(a,b) > 0);
+ } else {
+ CuAssertTrue(tc, _tcscmp(a,b) == 0);
+ }
+
+ // can we convert them back to longs?
+ int64_t i2 = NumberTools::stringToLong(a);
+ int64_t j2 = NumberTools::stringToLong(b);
+
+ CuAssertTrue(tc, i == i2);
+ CuAssertTrue(tc, j == j2);
+
+ //_CLDELETE_ARRAY(a);
+ //_CLDELETE_ARRAY(b);
+ }
+
+ void testNearZero(CuTest *tc) {
+ for (int32_t i = -100; i <= 100; i++) {
+ for (int32_t j = -100; j <= 100; j++) {
+ subtestTwoLongs(tc, i, j);
+ }
+ }
+ }
+
+ void testMin(CuTest *tc) {
+ // make sure the constants convert to their equivelents
+ CuAssertTrue(tc, LUCENE_INT64_MIN_SHOULDBE == NumberTools::stringToLong(const_cast<TCHAR*>(NumberTools::MIN_STRING_VALUE)));
+ CuAssertStrEquals(tc, _T("Min value"), NumberTools::MIN_STRING_VALUE, NumberTools::longToString(LUCENE_INT64_MIN_SHOULDBE));
+
+ // test near MIN, too
+ for (int64_t l = LUCENE_INT64_MIN_SHOULDBE; l < LUCENE_INT64_MIN_SHOULDBE + 10000; l++) {
+ subtestTwoLongs(tc,l, l + 1);
+ }
+ }
+
+ void testMax(CuTest *tc) {
+ // make sure the constants convert to their equivelents
+ CuAssertTrue(tc, LUCENE_INT64_MAX_SHOULDBE == NumberTools::stringToLong(const_cast<TCHAR*>(NumberTools::MAX_STRING_VALUE)));
+ CuAssertStrEquals(tc, _T("Max value"), NumberTools::MAX_STRING_VALUE, NumberTools::longToString(LUCENE_INT64_MAX_SHOULDBE));
+
+ // test near MAX, too
+ for (int64_t l = LUCENE_INT64_MAX_SHOULDBE; l > LUCENE_INT64_MAX_SHOULDBE - 10000; l--) {
+ subtestTwoLongs(tc,l, l - 1);
+ }
+ }
+
+CuSuite *testNumberTools(void)
+{
+ CuSuite *suite = CuSuiteNew(_T("CLucene Number Tools Test"));
+
+ SUITE_ADD_TEST(suite, testNearZero);
+ SUITE_ADD_TEST(suite, testMin);
+ SUITE_ADD_TEST(suite, testMax);
+ return suite;
+}
Modified: branches/lucene2_3_2/src/test/test.h
===================================================================
--- branches/lucene2_3_2/src/test/test.h 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/test/test.h 2009-01-03 23:03:09 UTC (rev 2948)
@@ -19,6 +19,8 @@
#include "CLucene/index/TermVector.h"
#include "CLucene/queryParser/MultiFieldQueryParser.h"
+#define LUCENE_INT64_MAX_SHOULDBE _ILONGLONG(0x7FFFFFFFFFFFFFFF)
+#define LUCENE_INT64_MIN_SHOULDBE (-LUCENE_INT64_MAX_SHOULDBE - _ILONGLONG(1) )
CL_NS_USE(index)
CL_NS_USE(util)
@@ -51,6 +53,7 @@
CuSuite *testutf8(void);
CuSuite *testreuters(void);
CuSuite *testdocument(void);
+CuSuite *testNumberTools(void);
class English{
public:
Modified: branches/lucene2_3_2/src/test/tests.cpp
===================================================================
--- branches/lucene2_3_2/src/test/tests.cpp 2008-12-09 05:17:59 UTC (rev 2947)
+++ branches/lucene2_3_2/src/test/tests.cpp 2009-01-03 23:03:09 UTC (rev 2948)
@@ -8,6 +8,7 @@
unittest tests[] = {
{"document", testdocument},
+ {"numbertools", testNumberTools},
{"debug", testdebug},
{"analysis", testanalysis},
{"analyzers", testanalyzers},
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|