|
From: <btm...@us...> - 2010-07-23 18:43:07
|
Revision: 3276
http://bigdata.svn.sourceforge.net/bigdata/?rev=3276&view=rev
Author: btmurphy
Date: 2010-07-23 18:43:00 +0000 (Fri, 23 Jul 2010)
Log Message:
-----------
merge -r3143::HEAD(3267) ~/bigdata/trunk ~/bigdata/branches/dev-btm [trunk --> branch dev-btm]
Modified Paths:
--------------
branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/CollatorEnum.java
branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java
branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java
branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java
branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java
branches/dev-btm/bigdata/src/java/com/bigdata/service/AbstractFederation.java
branches/dev-btm/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java
branches/dev-btm/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java
branches/dev-btm/bigdata/src/java/com/bigdata/sparse/Schema.java
branches/dev-btm/bigdata/src/java/com/bigdata/sparse/SparseRowStore.java
branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestICUUnicodeKeyBuilder.java
branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestJDKUnicodeKeyBuilder.java
branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java
branches/dev-btm/bigdata/src/test/com/bigdata/cache/TestRingBuffer.java
branches/dev-btm/bigdata/src/test/com/bigdata/sparse/TestAll.java
branches/dev-btm/bigdata/src/test/com/bigdata/sparse/TestKeyEncodeDecode.java
branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/MonitorCreatePhysicalServiceLocksTask.java
branches/dev-btm/bigdata-rdf/src/java/com/bigdata/rdf/inf/TruthMaintenance.java
branches/dev-btm/bigdata-sails/src/test/com/bigdata/rdf/sail/TestNamedGraphs.java
branches/dev-btm/build.xml
branches/dev-btm/src/resources/config/bigdataCluster.config
branches/dev-btm/src/resources/config/bigdataCluster16.config
branches/dev-btm/src/resources/config/standalone/bigdataStandalone.config
Added Paths:
-----------
branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/AbstractUnicodeKeyBuilderTestCase.java
Removed Paths:
-------------
branches/dev-btm/bigdata/src/test/com/bigdata/btree/AbstractUnicodeKeyBuilderTestCase.java
Property Changed:
----------------
branches/dev-btm/
branches/dev-btm/bigdata-sails/src/java/com/bigdata/rdf/sail/sparql/
branches/dev-btm/dsi-utils/
branches/dev-btm/dsi-utils/src/test/
branches/dev-btm/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/
branches/dev-btm/lgpl-utils/src/test/it/unimi/dsi/fastutil/bytes/custom/
branches/dev-btm/src/resources/bin/config/
Property changes on: branches/dev-btm
___________________________________________________________________
Modified: svn:mergeinfo
- /branches/BTREE_BUFFER_BRANCH:2004-2045
/branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782
/branches/bugfix-btm:2594-2779
/trunk:2575-2594,2596-2877,2882-2903,2910-3143
+ /branches/BTREE_BUFFER_BRANCH:2004-2045
/branches/DEV_BRANCH_27_OCT_2009:2270-2546,2548-2782
/branches/bugfix-btm:2594-3237
/branches/fko:3150-3194
/trunk:2575-2594,2596-2877,2882-2903,2910-3269
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/CollatorEnum.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/CollatorEnum.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/CollatorEnum.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -10,7 +10,8 @@
/**
* The JDK bundles support for generating Unicode sort keys, but that
- * support does NOT include compressed sort keys.
+ * support does NOT include compressed sort keys and embeds <code>nul</code>
+ * bytes into its Unicode sort keys.
*/
JDK,
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -144,6 +144,7 @@
/**
* Representation includes all aspects of the {@link Serializable} state.
*/
+ @Override
public String toString() {
StringBuilder sb = new StringBuilder(getClass().getName());
@@ -270,14 +271,10 @@
/*
* Figure out which collator to use.
- *
- * Note: The default depends on whether or not the ICU library is on
- * the class path. When it is, we always default to the ICU library.
*/
collator = CollatorEnum.valueOf(getProperty(properties,
- Options.COLLATOR, (icu_avail ? CollatorEnum.ICU.toString()
- : CollatorEnum.JDK.toString())));
+ Options.COLLATOR, CollatorEnum.ICU.toString()));
// true iff the collator was _explicitly_ specified.
final boolean explicitCollatorChoice = getProperty(properties,
@@ -349,7 +346,7 @@
* Figure out the collator strength.
*/
- Object strength = null;
+ Object tmpStrength = null;
final String val = getProperty(properties, Options.STRENGTH);
@@ -357,24 +354,24 @@
try {
- strength = StrengthEnum.valueOf(val);
+ tmpStrength = StrengthEnum.valueOf(val);
} catch (RuntimeException ex) {
- strength = Integer.parseInt(val);
+ tmpStrength = Integer.parseInt(val);
}
}
if (log.isInfoEnabled())
- log.info(Options.STRENGTH + "=" + strength);
+ log.info(Options.STRENGTH + "=" + tmpStrength);
/*
* Note: MAY be null (when null, does not override the collator's
* default).
*/
- this.strength = strength;
+ this.strength = tmpStrength;
}
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -28,7 +28,6 @@
import java.util.Locale;
-import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import com.ibm.icu.text.Collator;
@@ -90,18 +89,6 @@
protected static final Logger log = Logger.getLogger(ICUSortKeyGenerator.class);
/**
- * True iff the {@link #log} level is INFO or less.
- */
- final protected boolean INFO = log.getEffectiveLevel().toInt() <= Level.INFO
- .toInt();
-
- /**
- * True iff the {@link #log} level is DEBUG or less.
- */
- final protected boolean DEBUG = log.getEffectiveLevel().toInt() <= Level.DEBUG
- .toInt();
-
- /**
* Used to encode unicode strings into compact byte[]s that have the same
* sort order (aka sort keys).
*/
@@ -128,7 +115,7 @@
this.locale = locale;
- if(INFO) log.info("locale="+locale);
+ if(log.isInfoEnabled()) log.info("locale="+locale);
this.collator = (RuleBasedCollator) Collator.getInstance(locale);
@@ -138,7 +125,7 @@
final int str = ((Integer) strength).intValue();
- if (INFO)
+ if (log.isInfoEnabled())
log.info("strength=" + str);
collator.setStrength(str);
@@ -147,7 +134,7 @@
StrengthEnum str = (StrengthEnum) strength;
- if (INFO)
+ if (log.isInfoEnabled())
log.info("strength=" + str);
switch (str) {
@@ -184,7 +171,7 @@
if (mode != null) {
- if(INFO) log.info("mode="+mode);
+ if(log.isInfoEnabled()) log.info("mode="+mode);
switch (mode) {
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -56,8 +56,8 @@
}
- public JDKSortKeyGenerator(Locale locale, Object strength,
- DecompositionEnum mode) {
+ public JDKSortKeyGenerator(final Locale locale, final Object strength,
+ final DecompositionEnum mode) {
if (locale == null)
throw new IllegalArgumentException();
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -180,9 +180,9 @@
* The buffer reference is used directly rather than making a
* copy of the data.
*/
- protected KeyBuilder(UnicodeSortKeyGenerator sortKeyGenerator, int len,
- byte[] buf) {
-
+ protected KeyBuilder(final UnicodeSortKeyGenerator sortKeyGenerator,
+ final int len, final byte[] buf) {
+
if (len < 0)
throw new IllegalArgumentException("len");
@@ -350,7 +350,7 @@
* The object responsible for generating sort keys from Unicode strings.
*
* The {@link UnicodeSortKeyGenerator} -or- <code>null</code> if Unicode
- * is not supported by this {@link KeyBuilder} instance.
+ * is not supported by this {@link IKeyBuilder} instance.
*/
final public UnicodeSortKeyGenerator getSortKeyGenerator() {
@@ -391,11 +391,11 @@
public KeyBuilder appendASCII(final String s) {
- int len = s.length();
+ int tmpLen = s.length();
- ensureFree(len);
+ ensureFree(tmpLen);
- for(int j=0; j<len; j++) {
+ for(int j=0; j<tmpLen; j++) {
char ch = s.charAt(j);
@@ -1356,6 +1356,11 @@
}
+ /**
+ * Create an instance for ASCII keys.
+ *
+ * @return The new instance.
+ */
public static IKeyBuilder newInstance() {
return newInstance(DEFAULT_INITIAL_CAPACITY);
@@ -1393,10 +1398,8 @@
/**
* Optional property specifies the library that will be used to generate
- * sort keys from Unicode data. The default always supports Unicode, but
- * the library choice depends on whether or not ICU library is found on
- * the classpath. When the ICU library is present, it is the default.
- * Otherwise the JDK library is the default. You may explicitly specify
+ * sort keys from Unicode data. The ICU library is the default.
+ * You may explicitly specify
* the library choice using one of the {@link CollatorEnum} values. The
* {@link CollatorEnum#ASCII} value may be used to disable Unicode
* support entirely, treating the characters as if they were ASCII. If
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/service/AbstractFederation.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/service/AbstractFederation.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/service/AbstractFederation.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -149,6 +149,7 @@
// allow client requests to finish normally.
new ShutdownHelper(threadPool, 10L/*logTimeout*/, TimeUnit.SECONDS) {
+ @Override
public void logTimeout() {
log.warn("Awaiting thread pool termination: elapsed="
@@ -170,6 +171,7 @@
new ShutdownHelper(scheduledExecutorService, 10L/* logTimeout */,
TimeUnit.SECONDS) {
+ @Override
public void logTimeout() {
log.warn("Awaiting sample service termination: elapsed="
@@ -650,12 +652,7 @@
TimeUnit.MILLISECONDS // unit
);
- addScheduledTask(//
- new StartDeferredTasksTask(),// task to run.
- 150, // initialDelay (ms)
- 2000, // delay
- TimeUnit.MILLISECONDS // unit
- );
+ getExecutorService().execute(new StartDeferredTasksTask());
// Setup locator.
resourceLocator = new DefaultResourceLocator(this,
@@ -888,12 +885,12 @@
*/
public boolean isServiceReady() {
- final AbstractClient<T> client = this.client;
+ final AbstractClient<T> thisClient = this.client;
- if (client == null)
+ if (thisClient == null)
return false;
- final IFederationDelegate<T> delegate = client.getDelegate();
+ final IFederationDelegate<T> delegate = thisClient.getDelegate();
if (delegate == null)
return false;
@@ -963,11 +960,11 @@
}
// @todo really, we should test like this everywhere.
- final AbstractClient client = this.client;
+ final AbstractClient thisClient = this.client;
- if (client != null && client.isConnected()) {
+ if (thisClient != null && thisClient.isConnected()) {
- client.getDelegate().serviceLeave(serviceUUID);
+ thisClient.getDelegate().serviceLeave(serviceUUID);
}
@@ -996,7 +993,7 @@
static private String ERR_SERVICE_NOT_READY = "Service is not ready yet.";
/**
- * This task runs periodically. Once {@link #getServiceUUID()} reports a
+ * This task runs once. Once {@link #getServiceUUID()} reports a
* non-<code>null</code> value, it will start an (optional)
* {@link AbstractStatisticsCollector}, an (optional) httpd service, and
* the (required) {@link ReportTask}.
@@ -1008,8 +1005,6 @@
* {@link LoadBalancer} service know which services exist, which is
* important for some of its functions.
* <p>
- * Once these task(s) have been started, this task will throw an exception
- * in order to prevent it from being re-executed.
*
* FIXME This should explicitly await jini registrar discovery, zookeeper
* client connected, and whatever other preconditions must be statisified
@@ -1032,8 +1027,7 @@
*/
final long begin = System.currentTimeMillis();
- public StartDeferredTasksTask() {
-
+ private StartDeferredTasksTask() {
}
/**
@@ -1043,8 +1037,6 @@
*/
public void run() {
- final boolean started;
-
try {
// /*
@@ -1061,9 +1053,7 @@
// return;
//
// }
-
- started = startDeferredTasks();
-
+ startDeferredTasks();
} catch (Throwable t) {
log.warn("Problem in report task?", t);
@@ -1072,60 +1062,56 @@
}
- if (started) {
-
- /*
- * Note: This exception is thrown once this task has executed
- * successfully.
- */
-
- throw new RuntimeException("Normal completion.");
-
- }
-
}
/**
* Starts performance counter collection once the service {@link UUID}
* is known.
*
- * @return <code>true</code> iff performance counter collection was
- * started.
- *
* @throws IOException
* if {@link IDataService#getServiceUUID()} throws this
* exception (it never should since it is a local method
* call).
*/
- protected boolean startDeferredTasks() throws IOException {
+ protected void startDeferredTasks() throws IOException {
// elapsed time since we started running this task.
final long elapsed = System.currentTimeMillis() - begin;
- if (getServiceUUID() == null) {
-
+ // Wait for the service ID to become available, trying every
+ // two seconds, while logging failures.
+ while (true) {
+ if (getServiceUUID() != null) {
+ break;
+ }
if (elapsed > 1000 * 10)
log.warn(ERR_NO_SERVICE_UUID + " : iface="
+ getServiceIface() + ", name=" + getServiceName()
+ ", elapsed=" + elapsed);
else if (log.isInfoEnabled())
- log.info(ERR_NO_SERVICE_UUID);
-
- return false;
-
+ log.info(ERR_NO_SERVICE_UUID);
+ try {
+ Thread.sleep(2000);
+ } catch (InterruptedException e) {
+ }
}
- if (!isServiceReady()) {
-
+ // Wait for the service to become ready, trying every
+ // two seconds, while logging failures.
+ while (true) {
+ if (isServiceReady()) {
+ break;
+ }
if (elapsed > 1000 * 10)
log.warn(ERR_SERVICE_NOT_READY + " : iface="
+ getServiceIface() + ", name=" + getServiceName()
+ ", elapsed=" + elapsed);
else if (log.isInfoEnabled())
log.info(ERR_SERVICE_NOT_READY + " : " + elapsed);
-
- return false;
-
+ try {
+ Thread.sleep(2000);
+ } catch (InterruptedException e) {
+ }
}
/*
@@ -1153,9 +1139,6 @@
// notify delegates that deferred startup has occurred.
AbstractFederation.this.didStart();
-
- return true;
-
}
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -32,8 +32,12 @@
import org.apache.log4j.Logger;
+import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IndexMetadata;
+import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
+import com.bigdata.btree.keys.CollatorEnum;
+import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TimestampUtility;
@@ -99,6 +103,25 @@
indexMetadata
.setSplitHandler(LogicalRowSplitHandler.INSTANCE);
+ if (CollatorEnum.JDK.toString().equals(
+ System.getProperty(KeyBuilder.Options.COLLATOR))) {
+ /*
+ * The JDK RulesBasedCollator embeds nul bytes in the
+ * Unicode sort keys. This makes them unsuitable for the
+ * SparseRowStore, which can not locate the start of the
+ * column name if there are embedded nuls in a Unicode
+ * primary key. As a work around, this forces an ASCII
+ * collation sequence if the JDK collator is the
+ * default. This is not ideal since non-ascii
+ * distinctions will be lost, but it is better than
+ * being unable to decode the column names.
+ */
+ log.warn("Forcing ASCII collator.");
+ indexMetadata
+ .setTupleSerializer(new DefaultTupleSerializer(
+ new ASCIIKeyBuilderFactory()));
+ }
+
// Register the index.
indexManager.registerIndex(indexMetadata);
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -49,17 +49,22 @@
* prefix.
* <p>
* The encoded schema name is followed by the {@link KeyType#getByteCode()} and
- * then by a <code>nul</code> byte. By searching for the <code>nul</code>
- * byte we can identify the end of the encoded schema name and also the data
- * type of the primary key. Most kinds of primary keys have a fixed length
- * encoding, e.g., {@link Long}, {@link Double}, etc. However, Unicode primary
- * keys have a variable length encoding which makes life more ... complex. Since
- * the keys need to reflect the total sort order we can not include the byte
- * count of the primary key in the key itself. The only reasonable approach is
- * to append a byte sequence to the key that never occurs within the generated
- * Unicode sort keys. We use a <code>nul</code> byte for this purpose since it
- * is not emitted by most Unicode collation implementations as it would cause
- * grief for C-language strings.
+ * then by a <code>nul</code> byte. By searching for the <code>nul</code> byte
+ * we can identify the end of the encoded schema name and also the data type of
+ * the primary key. Most kinds of primary keys have a fixed length encoding,
+ * e.g., {@link Long}, {@link Double}, etc.
+ * <p>
+ * Unicode primary keys have a variable length encoding which makes life more
+ * complex. For Unicode primary keys, we break with the collation order and use
+ * the UTF8 encoding of the key. This means that the primary key can be decoded
+ * and preserves hierarchical namespace clustering within the row store but does
+ * not impose a total sort order per Unicode sort key semantics. The only
+ * reasonable approach is to append a byte sequence to the key that never occurs
+ * within the generated Unicode sort keys. Again, we use a <code>nul</code> byte
+ * to mark the end of the Unicode primary key since it is not emitted by most
+ * Unicode collation implementations as it would cause grief for C-language
+ * strings. (However, see SparseRowStore.Options#PRIMARY_KEY_UNICODE_CLEAN} for
+ * information on backward compatibility.)
*
* @see Schema#fromKey(IKeyBuilder, Object)
* @see KeyType#getKeyType(byte)
@@ -166,15 +171,15 @@
* The decoded primary key.
*
* @throws UnsupportedOperationException
- * if the primary key can not be decoded (e.g., for
- * {@link KeyType#Unicode} keys).
+ * if the primary key can not be decoded.
*/
public Object getPrimaryKey() {
if(primaryKey == null) {
-
- throw new UnsupportedOperationException("Can not decode: keyType="+primaryKeyType);
-
+
+ throw new UnsupportedOperationException("Can not decode: keyType="
+ + primaryKeyType);
+
}
return primaryKey;
@@ -220,14 +225,13 @@
* Note: the KeyType byte occurs after the schema name bytes and before
* the [nul].
*/
+ int primaryKeyOffset = 0;
{
boolean found = false;
int schemaBytesLength = 0;
- int primaryKeyOffset = 0;
-
for (int i = 0; i < key.length; i++) {
if (key[i] == (byte) 0) {
@@ -256,7 +260,6 @@
this.primaryKeyTypeOffset = schemaBytesLength;
- this.primaryKeyOffset = primaryKeyOffset;
// note: ArrayIndexOutOfBounds with index==-1 means ICU library not on classpath!
this.primaryKeyType = KeyType.getKeyType(KeyBuilder.decodeByte(key[primaryKeyTypeOffset]));
@@ -273,42 +276,48 @@
primaryKeyLength = primaryKeyType.getEncodedLength();
+ this.primaryKeyOffset = primaryKeyOffset;
+
columnNameOffset = primaryKeyOffset + primaryKeyLength;
} else {
/*
- * Scan for the next [nul] byte.
+ * Scan for the next [nul] byte (ASCII).
*/
boolean found = false;
int primaryKeyLength = 0;
-
+
for (int i = primaryKeyOffset; i < key.length; i++) {
if (key[i] == (byte) 0) {
primaryKeyLength = i - primaryKeyOffset;
-
+
found = true;
-
+
break;
-
+
}
}
-
- if(!found) {
+ if (!found) {
+
throw new RuntimeException(
"Could not locate the end of the encoded schema name: keyType="
- + primaryKeyType+", key="+BytesUtil.toString(key));
+ + primaryKeyType + ", key="
+ + BytesUtil.toString(key));
}
this.primaryKeyLength = primaryKeyLength;
- // Note: also skips the [nul] byte terminating the primary key.
+ this.primaryKeyOffset = primaryKeyOffset;
+
+ // Note: also skips the [nul] byte terminating the primary
+ // key.
this.columnNameOffset = primaryKeyOffset + primaryKeyLength + 1;
}
@@ -327,10 +336,26 @@
primaryKey = KeyBuilder.decodeFloat(key, primaryKeyOffset);
break;
case Unicode:
- /*
- * Note: Decode is not possible for this case.
- */
- primaryKey = null;
+ if (SparseRowStore.primaryKeyUnicodeClean) {
+ final byte[] bytes = new byte[primaryKeyLength];
+ System.arraycopy(key, primaryKeyOffset, bytes, 0, primaryKeyLength);
+ try {
+ primaryKey = new String(bytes, SparseRowStore.UTF8);
+ } catch (UnsupportedEncodingException ex) {
+ throw new RuntimeException(
+ "Could not decode the primary key"
+ + ": primaryKeyOffset="
+ + primaryKeyOffset
+ + ", primaryKeyLength="
+ + primaryKeyLength + ", key="
+ + BytesUtil.toString(key));
+ }
+ } else {
+ /*
+ * Note: Decode is not possible for this case.
+ */
+ primaryKey = null;
+ }
break;
case ASCII:
primaryKey = KeyBuilder.decodeASCII(key, primaryKeyOffset,
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/sparse/Schema.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/sparse/Schema.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/sparse/Schema.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -154,7 +154,7 @@
/*
* Key builder stuff.
*/
-
+
/**
* Helper method appends a typed value to the compound key (this is used to
* get the primary key into the compound key).
@@ -172,8 +172,9 @@
*
* @see KeyDecoder
*/
- final protected IKeyBuilder appendPrimaryKey(IKeyBuilder keyBuilder, Object v, boolean successor) {
-
+ final protected IKeyBuilder appendPrimaryKey(final IKeyBuilder keyBuilder,
+ final Object v, final boolean successor) {
+
final KeyType keyType = getPrimaryKeyType();
if (successor) {
@@ -187,9 +188,27 @@
case Float:
return keyBuilder.append(successor(keyBuilder,((Number) v).floatValue()));
case Double:
- return keyBuilder.append(successor(keyBuilder,((Number) v).doubleValue()));
- case Unicode:
- return keyBuilder.appendText(v.toString(), true/*unicode*/, true/*successor*/).appendNul();
+ return keyBuilder.append(successor(keyBuilder, ((Number) v)
+ .doubleValue()));
+ case Unicode: {
+ final String tmp = v.toString();
+ if (SparseRowStore.primaryKeyUnicodeClean) {
+ try {
+ keyBuilder.append(
+ SuccessorUtil.successor(tmp
+ .getBytes(SparseRowStore.UTF8)))
+ .appendNul();
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ // primary key in backwards compatibility mode.
+ keyBuilder
+ .appendText(tmp, true/* unicode */, true/* successor */)
+ .appendNul();
+ }
+ return keyBuilder;
+ }
case ASCII:
return keyBuilder.appendText(v.toString(), false/*unicode*/, true/*successor*/).appendNul();
case Date:
@@ -212,8 +231,22 @@
return keyBuilder.append(((Number) v).floatValue());
case Double:
return keyBuilder.append(((Number) v).doubleValue());
- case Unicode:
- return keyBuilder.appendText(v.toString(),true/*unicode*/,false/*successor*/).appendNul();
+ case Unicode: {
+ final String tmp = v.toString();
+ if (SparseRowStore.primaryKeyUnicodeClean) {
+ try {
+ keyBuilder.append(tmp.getBytes(SparseRowStore.UTF8))
+ .appendNul();
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ // primary key in backwards compatibility mode.
+ keyBuilder.appendText(v.toString(), true/* unicode */,
+ false/* successor */).appendNul();
+ }
+ return keyBuilder;
+ }
case ASCII:
return keyBuilder.appendText(v.toString(),false/*unicode*/,false/*successor*/).appendNul();
case Date:
@@ -243,8 +276,8 @@
* {@link #toKey(Object)}, which correctly forms the successor
* key in all cases.
*/
- final private Object successor(IKeyBuilder keyBuilder,Object v) {
-
+ final private Object successor(final IKeyBuilder keyBuilder, final Object v) {
+
final KeyType keyType = getPrimaryKeyType();
switch(keyType) {
@@ -290,7 +323,8 @@
*
* @see KeyDecoder
*/
- final protected IKeyBuilder fromKey(IKeyBuilder keyBuilder,Object primaryKey) {
+ final protected IKeyBuilder fromKey(final IKeyBuilder keyBuilder,
+ final Object primaryKey) {
keyBuilder.reset();
@@ -319,8 +353,9 @@
*
* @return
*/
- final public byte[] getPrefix(IKeyBuilder keyBuilder,Object primaryKey) {
-
+ final public byte[] getPrefix(final IKeyBuilder keyBuilder,
+ final Object primaryKey) {
+
return fromKey(keyBuilder, primaryKey).getKey();
}
@@ -384,8 +419,8 @@
* @throws IllegalArgumentException
* if <i>col</i> is not valid as the name of a column.
*/
- public byte[] getKey(IKeyBuilder keyBuilder, Object primaryKey, String col,
- long timestamp) {
+ public byte[] getKey(final IKeyBuilder keyBuilder, final Object primaryKey,
+ final String col, final long timestamp) {
if (keyBuilder == null)
throw new IllegalArgumentException();
Modified: branches/dev-btm/bigdata/src/java/com/bigdata/sparse/SparseRowStore.java
===================================================================
--- branches/dev-btm/bigdata/src/java/com/bigdata/sparse/SparseRowStore.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/java/com/bigdata/sparse/SparseRowStore.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -24,6 +24,7 @@
*/
package com.bigdata.sparse;
+import java.text.RuleBasedCollator;
import java.util.Iterator;
import java.util.Map;
@@ -33,7 +34,9 @@
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
+import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.filter.FilterConstructor;
+import com.bigdata.btree.keys.CollatorEnum;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.journal.ITimestampService;
@@ -186,6 +189,17 @@
* md.setSplitHandler(LogicalRowSplitHandler.INSTANCE);
* </pre>
*
+ * Note: The JDK {@link RuleBasedCollator} embeds <code>nul</code> bytes in
+ * the Unicode sort keys. This makes them unsuitable for the row store which
+ * can not locate the start of the column name if there are embedded
+ * <code>nul</code>s in the primaryKey. Therefore, if you are using the
+ * {@link CollatorEnum#JDK} as your default collator, then you MUST override
+ * the {@link IndexMetadata} for the row store to use either an ASCII
+ * collator or the ICU collator. In general, the ICU collator is superior to
+ * the JDK collator and will be used by default. The ASCII collator is not
+ * ideal since non-ascii distinctions will be lost, but it is better than
+ * being unable to decode the data in the row store.
+ *
* @param ndx
* The index.
*/
@@ -1029,4 +1043,53 @@
}
+ /**
+ * Options for the {@link SparseRowStore}.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan
+ * Thompson</a>
+ * @version $Id$
+ */
+ public interface Options {
+
+ /**
+ * The primary key was originally written using a Unicode sort key.
+ * However, the JDK generates Unicode sort keys with embedded nuls and
+ * that broke the logic to detect the end of the Unicode primary keys.
+ * In order to accommodate this behavior, the Unicode primary key is now
+ * encoded as UTF8 which also has the advantage that we can decode
+ * Unicode primary keys. Standard prefix compression on the B+Tree
+ * should make up for the larger representation of the Unicode primary
+ * key in the B+Tree.
+ * <p>
+ * This change was introduced on 7/15/2010 in the trunk and breaks
+ * compatibility with earlier revisions of the {@link SparseRowStore}.
+ * This flag may be set to <code>false</code> for backward
+ * compatibility.
+ *
+ * @see Options#DEFAULT_PRIMARY_KEY_UNICODE_CLEAN
+ */
+ String PRIMARY_KEY_UNICODE_CLEAN = Schema.class.getName()
+ + ".primaryKey.unicodeClean";
+
+ /**
+ * FIXME Change over the [true] for the next release.
+ *
+ * @see https://sourceforge.net/apps/trac/bigdata/ticket/107
+ */
+ String DEFAULT_PRIMARY_KEY_UNICODE_CLEAN = "false";
+
+ }
+
+ /**
+ * This is a global option since it was always <code>false</code> for
+ * historical stores.
+ *
+ * @see Options#PRIMARY_KEY_UNICODE_CLEAN
+ */
+ final static transient boolean primaryKeyUnicodeClean = Boolean
+ .valueOf(System.getProperty(
+ SparseRowStore.Options.PRIMARY_KEY_UNICODE_CLEAN,
+ SparseRowStore.Options.DEFAULT_PRIMARY_KEY_UNICODE_CLEAN));
+
}
Deleted: branches/dev-btm/bigdata/src/test/com/bigdata/btree/AbstractUnicodeKeyBuilderTestCase.java
===================================================================
--- branches/dev-btm/bigdata/src/test/com/bigdata/btree/AbstractUnicodeKeyBuilderTestCase.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/test/com/bigdata/btree/AbstractUnicodeKeyBuilderTestCase.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -1,183 +0,0 @@
-/*
-
-Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved.
-
-Contact:
- SYSTAP, LLC
- 4501 Tower Road
- Greensboro, NC 27410
- lic...@bi...
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-*/
-/*
- * Created on Nov 29, 2007
- */
-
-package com.bigdata.btree;
-
-import java.util.Locale;
-import java.util.Properties;
-
-import junit.framework.TestCase2;
-
-import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
-import com.bigdata.btree.keys.IKeyBuilder;
-import com.bigdata.btree.keys.KeyBuilder;
-import com.bigdata.btree.keys.StrengthEnum;
-import com.bigdata.btree.keys.KeyBuilder.Options;
-
-/**
- * Base class for the test suites that examine support for Unicode sort keys in
- * {@link KeyBuilder}.
- *
- * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
- * @version $Id$
- */
-abstract public class AbstractUnicodeKeyBuilderTestCase extends TestCase2 {
-
- /**
- *
- */
- public AbstractUnicodeKeyBuilderTestCase() {
- }
-
- /**
- * @param arg0
- */
- public AbstractUnicodeKeyBuilderTestCase(String arg0) {
- super(arg0);
- }
-
- /**
- * Test ability to encode unicode data into a variable length byte[] that
- * allows direct byte-by-byte comparisons which maintain the local-specific
- * sort order of the original strings.
- */
- public void test_keyBuilder_unicode_string_key_us_primary() {
-
- /*
- * Setup an instance for US English with strength := PRIMARY.
- */
-
- Properties properties = getProperties();
-
- properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
-
- properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
-
- properties.setProperty(Options.STRENGTH, StrengthEnum.Primary
- .toString());
-
- final DefaultKeyBuilderFactory factory = new DefaultKeyBuilderFactory(
- properties);
-
- assertEquals(Locale.US.getLanguage(), factory.getLocale().getLanguage());
-
- assertEquals(Locale.US.getCountry(), factory.getLocale().getCountry());
-
- assertEquals(StrengthEnum.Primary, factory.getStrength());
-
- final IKeyBuilder keyBuilder = factory.getKeyBuilder();
-
-// // verify assumption under that configuration.
-// {
-// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
-// .getInstance(Locale.US);
-//
-// usCollator.setStrength(Collator.PRIMARY);
-//
-// assertEquals(0, usCollator.compare("abc", "ABC"));
-// }
-
- byte[] key1 = keyBuilder.reset().append("abc").getKey();
- byte[] key2 = keyBuilder.reset().append("ABC").getKey();
- byte[] key3 = keyBuilder.reset().append("Abc").getKey();
-
- System.err.println("abc: "+BytesUtil.toString(key1));
- System.err.println("ABC: "+BytesUtil.toString(key2));
- System.err.println("Abc: "+BytesUtil.toString(key3));
-
- // all are equal using PRIMARY strength.
- assertEquals(0,BytesUtil.compareBytes(key1, key2));
- assertEquals(0,BytesUtil.compareBytes(key2, key3));
-
- }
-
- public void test_keyBuilder_unicode_string_key_us_identical() {
-
- /*
- * Setup an instance for US English with strength := IDENTICAL.
- */
-
- Properties properties = new Properties();
-
- properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
-
- properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
-
- properties.setProperty(Options.STRENGTH, ""+StrengthEnum.Identical);
-
- IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(properties);
-
-// // verify assumption under that configuration.
-// {
-// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
-// .getInstance(Locale.US);
-//
-// usCollator.setStrength(Collator.IDENTICAL);
-//
-// assertNotSame(0, usCollator.compare("abc", "ABC"));
-// }
-
- // IKeyBuilder keyBuilder = new UnicodeKeyBuilder(usCollator,1000);
-
- byte[] key1 = keyBuilder.reset().append("abc").getKey();
- byte[] key2 = keyBuilder.reset().append("ABC").getKey();
- byte[] key3 = keyBuilder.reset().append("Abc").getKey();
-
- System.err.println("abc: "+BytesUtil.toString(key1));
- System.err.println("ABC: "+BytesUtil.toString(key2));
- System.err.println("Abc: "+BytesUtil.toString(key3));
-
- // verify ordering for IDENTICAL comparison.
- assertTrue(BytesUtil.compareBytes(key1, key2)<0);
- assertTrue(BytesUtil.compareBytes(key2, key3)>0);
-
- }
-
- /**
- * Test verifies that the trailing <code>nul</code> byte is not part of
- * the key when a unicode string is appended to an {@link IKeyBuilder}.
- * <p>
- * Note: The trailing <code>nul</code> byte is appended by the ICU library
- * in order to have compatibility with their C library, but it is not of
- * interest for Java processing. However, note that a <code>nul</code>
- * byte MAY be used to separate components of a complex key.
- */
- public void test_keyBuilder_unicode_String_noTrailingNul() {
-
- IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(getProperties());
-
- keyBuilder.append("Hello World!");
-
- byte[] key = keyBuilder.getKey();
-
- assertNotSame("Not expecting a trailing nul byte.", (byte) 0,
- key[key.length - 1]);
-
- }
-
-}
Copied: branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/AbstractUnicodeKeyBuilderTestCase.java (from rev 3269, trunk/bigdata/src/test/com/bigdata/btree/keys/AbstractUnicodeKeyBuilderTestCase.java)
===================================================================
--- branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/AbstractUnicodeKeyBuilderTestCase.java (rev 0)
+++ branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/AbstractUnicodeKeyBuilderTestCase.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -0,0 +1,342 @@
+/*
+
+Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved.
+
+Contact:
+ SYSTAP, LLC
+ 4501 Tower Road
+ Greensboro, NC 27410
+ lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+*/
+/*
+ * Created on Nov 29, 2007
+ */
+
+package com.bigdata.btree.keys;
+
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.Properties;
+
+import junit.framework.TestCase2;
+
+import com.bigdata.btree.BytesUtil;
+import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
+import com.bigdata.btree.keys.IKeyBuilder;
+import com.bigdata.btree.keys.KeyBuilder;
+import com.bigdata.btree.keys.StrengthEnum;
+import com.bigdata.btree.keys.KeyBuilder.Options;
+
+/**
+ * Base class for the test suites that examine support for Unicode sort keys in
+ * {@link KeyBuilder}.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+abstract public class AbstractUnicodeKeyBuilderTestCase extends TestCase2 {
+
+ /**
+ *
+ */
+ public AbstractUnicodeKeyBuilderTestCase() {
+ }
+
+ /**
+ * @param arg0
+ */
+ public AbstractUnicodeKeyBuilderTestCase(String arg0) {
+ super(arg0);
+ }
+
+ /**
+ * Test ability to encode unicode data into a variable length byte[] that
+ * allows direct byte-by-byte comparisons which maintain the local-specific
+ * sort order of the original strings.
+ */
+ public void test_keyBuilder_unicode_string_key_us_primary() {
+
+ /*
+ * Setup an instance for US English with strength := PRIMARY.
+ */
+
+ final Properties properties = getProperties();
+
+ properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
+
+ properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
+
+ properties.setProperty(Options.STRENGTH, StrengthEnum.Primary
+ .toString());
+
+ final DefaultKeyBuilderFactory factory = new DefaultKeyBuilderFactory(
+ properties);
+
+ assertEquals(Locale.US.getLanguage(), factory.getLocale().getLanguage());
+
+ assertEquals(Locale.US.getCountry(), factory.getLocale().getCountry());
+
+ assertEquals(StrengthEnum.Primary, factory.getStrength());
+
+ final IKeyBuilder keyBuilder = factory.getKeyBuilder();
+
+// // verify assumption under that configuration.
+// {
+// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
+// .getInstance(Locale.US);
+//
+// usCollator.setStrength(Collator.PRIMARY);
+//
+// assertEquals(0, usCollator.compare("abc", "ABC"));
+// }
+
+ byte[] key1 = keyBuilder.reset().append("abc").getKey();
+ byte[] key2 = keyBuilder.reset().append("ABC").getKey();
+ byte[] key3 = keyBuilder.reset().append("Abc").getKey();
+
+ System.err.println("abc: "+BytesUtil.toString(key1));
+ System.err.println("ABC: "+BytesUtil.toString(key2));
+ System.err.println("Abc: "+BytesUtil.toString(key3));
+
+ // all are equal using PRIMARY strength.
+ assertEquals(0,BytesUtil.compareBytes(key1, key2));
+ assertEquals(0,BytesUtil.compareBytes(key2, key3));
+
+ }
+
+ public void test_keyBuilder_unicode_string_key_us_identical() {
+
+ /*
+ * Setup an instance for US English with strength := IDENTICAL.
+ */
+
+ final Properties properties = new Properties();
+
+ properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
+
+ properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
+
+ properties.setProperty(Options.STRENGTH, ""+StrengthEnum.Identical);
+
+ final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(properties);
+
+// // verify assumption under that configuration.
+// {
+// RuleBasedCollator usCollator = (RuleBasedCollator) Collator
+// .getInstance(Locale.US);
+//
+// usCollator.setStrength(Collator.IDENTICAL);
+//
+// assertNotSame(0, usCollator.compare("abc", "ABC"));
+// }
+
+ // IKeyBuilder keyBuilder = new UnicodeKeyBuilder(usCollator,1000);
+
+ byte[] key1 = keyBuilder.reset().append("abc").getKey();
+ byte[] key2 = keyBuilder.reset().append("ABC").getKey();
+ byte[] key3 = keyBuilder.reset().append("Abc").getKey();
+
+ System.err.println("abc: "+BytesUtil.toString(key1));
+ System.err.println("ABC: "+BytesUtil.toString(key2));
+ System.err.println("Abc: "+BytesUtil.toString(key3));
+
+ // verify ordering for IDENTICAL comparison.
+ assertTrue(BytesUtil.compareBytes(key1, key2)<0);
+ assertTrue(BytesUtil.compareBytes(key2, key3)>0);
+
+ }
+
+ /**
+ * Test verifies that the trailing <code>nul</code> byte is not part of
+ * the key when a unicode string is appended to an {@link IKeyBuilder}.
+ * <p>
+ * Note: The trailing <code>nul</code> byte is appended by the ICU library
+ * in order to have compatibility with their C library, but it is not of
+ * interest for Java processing. However, note that a <code>nul</code>
+ * byte MAY be used to separate components of a complex key.
+ */
+ public void test_keyBuilder_unicode_String_noTrailingNul() {
+
+ final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(getProperties());
+
+ keyBuilder.append("Hello World!");
+
+ final byte[] key = keyBuilder.getKey();
+
+ assertNotSame("Not expecting a trailing nul byte.", (byte) 0,
+ key[key.length - 1]);
+
+ }
+
+ /**
+ * Test verifies that the <code>nul</code> byte is not part of the key when
+ * a unicode string is appended to an {@link IKeyBuilder}.
+ * <p>
+ * Note: The {@link SparseRowStore} assumes that Unicode sort keys do not
+ * contain embedded <code>nul</code>s.
+ */
+ public void test_keyBuilder_unicode_String_noEmbeddedNuls() {
+
+ final IKeyBuilder keyBuilder = KeyBuilder
+ .newUnicodeInstance(getProperties());
+
+ keyBuilder.append("Hello World!");
+
+ final byte[] key = keyBuilder.getKey();
+
+ for (int i = 0; i < key.length; i++) {
+ if (key[i] == 0)
+ fail("Embedded nuls: key=" + BytesUtil.toString(key));
+ }
+
+ }
+
+ /**
+ * Test of the ability to normalize trailing pad characters.
+ */
+ public void test_keyBuilder_normalizeTrailingPadCharacters() {
+
+ final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
+ .newUnicodeInstance(getProperties());
+
+ assertEquals(//
+ keyBuilder.normalizeText(""),//
+ keyBuilder.normalizeText(" ")//
+ );
+ assertEquals(//
+ keyBuilder.normalizeText(""),//
+ keyBuilder.normalizeText(" ")//
+ );
+ assertEquals(//
+ keyBuilder.normalizeText(""),//
+ keyBuilder.normalizeText(" ")//
+ );
+ assertEquals(//
+ keyBuilder.normalizeText(" "),//
+ keyBuilder.normalizeText(" ")//
+ );
+ assertEquals(//
+ keyBuilder.normalizeText("abc"),//
+ keyBuilder.normalizeText("abc ")//
+ );
+ assertEquals(//
+ keyBuilder.normalizeText(" abc"),//
+ keyBuilder.normalizeText(" abc ")//
+ );
+ assertNotSame(//
+ keyBuilder.normalizeText("abc"),//
+ keyBuilder.normalizeText(" abc ")//
+ );
+
+ }
+
+ /**
+ * Test verifies that very long strings are truncated.
+ *
+ * @todo verify that trailing whitespace is removed after truncation rather
+ * than before truncation.
+ */
+ public void test_keyBuilder_normalizeTruncatesVeryLongStrings() {
+
+ final KeyBuilder keyBuilder = (KeyBuilder)KeyBuilder.newUnicodeInstance(getProperties());
+
+ final String text = TestKeyBuilder.getMaximumLengthText();
+
+ assertEquals(//
+ keyBuilder.normalizeText(text),//
+ keyBuilder.normalizeText(text+"abc")//
+ );
+
+ }
+
+ /**
+ * Test verifies the order among unicode sort keys, including verifying that
+ * the pad byte causes a prefix such as "bro" to sort before a term which
+ * extends that prefix, such as "brown".
+ */
+ public void test_keyBuilder_unicode_order() {
+
+ final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder.newUnicodeInstance(getProperties());
+
+ final KVO<String>[] a = new KVO[] {
+
+ new KVO<String>(keyBuilder.asSortKey("bro"),null,"bro"),
+ new KVO<String>(keyBuilder.asSortKey("brown"),null,"brown"),
+ new KVO<String>(keyBuilder.asSortKey("bre"),null,"bre"),
+ new KVO<String>(keyBuilder.asSortKey("break"),null,"break"),
+
+ };
+
+ // sort by the assigned sort keys.
+ Arrays.sort(a);
+
+ /*
+ * verify that "bre(ak)" is before "bro(wn)" and that "bre" is before
+ * "break" and "bro" is before "brown".
+ */
+ assertEquals("bre", a[0].obj);
+ assertEquals("break", a[1].obj);
+ assertEquals("bro", a[2].obj);
+ assertEquals("brown", a[3].obj);
+
+ }
+
+ /**
+ * <p>
+ * Test that lexiographic order is maintain when a variable length Unicode
+ * field is followed by another field. This test works by comparing the
+ * original multi-field key with the multi-field key formed from the
+ * successor of the Unicode field followed by the other field:
+ * </p>
+ *
+ * <pre>
+ *
+ * [text][nextValue] LT [successor(text)][nextValue]
+ *
+ * </pre>
+ */
+ public void test_keyBuilder_multiField_unicode() {
+
+ final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
+ .newUnicodeInstance(getProperties());
+
+ TestKeyBuilder.doMultiFieldTests(true/* unicode */, keyBuilder);
+
+ /*
+ * Now test some strings that contain code points outside of the 8-bit
+ * range.
+ */
+
+// final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
+// .newUnicodeInstance();
+
+ final boolean unicode = true;
+ {
+
+ // Note: This is "Japanese" in kanji.
+ String text = "\u65E5\u672C\u8A9E / \u306B\u307B\u3093\u3054";
+
+ TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) 0);
+ TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) 1);
+ TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, (byte) -1);
+ TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, Byte.MIN_VALUE);
+ TestKeyBuilder.doMultiFieldTest(keyBuilder, unicode, text, Byte.MAX_VALUE);
+ }
+
+ }
+
+}
Modified: branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestICUUnicodeKeyBuilder.java
===================================================================
--- branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestICUUnicodeKeyBuilder.java 2010-07-23 18:36:43 UTC (rev 3275)
+++ branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestICUUnicodeKeyBuilder.java 2010-07-23 18:43:00 UTC (rev 3276)
@@ -31,7 +31,6 @@
import java.util.Locale;
import java.util.Properties;
-import com.bigdata.btree.AbstractUnicodeKeyBuilderTestCase;
import com.bigdata.btree.BytesUtil;
import com.bigdata.btree.keys.KeyBuilder.Options;
import com.ibm.icu.text.Collator;
@@ -39,15 +38,16 @@
/**
* Tests for Unicode support in {@link KeyBuilder}.
*
- * @todo test w/ and w/o the ICU integration (can be choosen at run time). Note
- * that some tests are specific to the ICU libraries at this time.
+ * @todo test w/ and w/o the ICU integration (can be chosen at run time via
+ * concrete subclasses). Note that some tests are specific to the ICU
+ * libraries at this time.
*
* @todo write performance test for encoding strings, possibly in the context of
* parsed rdf data, and see if there are any easy wins in how the encoding
* to a sort key is handled or in alignment of the apis.
*
* @todo compare performance of the ICU and JDK libraries in some application
- * contexts. compare performance of the JNI ICU library as well.
+ * contexts. compare performance of the JNI ICU library as well.
*
* @author <a href="mailto:tho...@us...">Bryan Thompson</a>
* @version $Id$
@@ -69,7 +69,7 @@
public Properties getProperties() {
- Properties properties = new Properties(super.getProperties());
+ final Properties properties = new Properties(super.getProperties());
properties.setProperty(Options.COLLATOR,CollatorEnum.ICU.toString());
@@ -79,11 +79,12 @@
public void test_correctCollator() {
- Properties properties = getProperties();
-
- log.info("properties="+properties);
-
- KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
+ final Properties properties = getProperties();
+
+ if (log.isInfoEnabled())
+ log.info("properties=" + properties);
+
+ final KeyBuilder keyBuilder = (KeyBuilder) KeyBuilder
.newUnicodeInstance(properties);
assertEquals(ICUSortKeyGenerator.class, keyBuilder
@@ -109,13 +110,13 @@
* Setup for US English.
*/
- Properties properties = new Properties();
+ final Properties properties = new Properties();
properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());
properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
- int[] strengths = new int[] {
+ final int[] strengths = new int[] {
Collator.PRIMARY,
Collator.SECONDARY,
Collator.TERTIARY,
@@ -174,7 +175,7 @@
* @param collator
* The collator.
*
- * @return True iff the collector differenties between the string and its
+ * @return True iff the collector differentiates between the string and its
* successor (formed by appending a nul character) in its generated
* sort keys.
*/
Modified: branches/dev-btm/bigdata/src/test/com/bigdata/btree/keys/TestJDKUnicodeKeyBuilder.java
===========...
[truncated message content] |