From: <tho...@us...> - 2010-07-27 19:12:21
|
Revision: 3320 http://bigdata.svn.sourceforge.net/bigdata/?rev=3320&view=rev Author: thompsonbry Date: 2010-07-27 19:12:15 +0000 (Tue, 27 Jul 2010) Log Message: ----------- It turns out that the StoreManager for scale-out has a dependency on the specific class used to represent the performance counters on the DiskOnlyStrategy. I therefore had to restore the ability to select the DiskOnlyStrategy class (rather than WORMStrategy) using BufferMode.Disk. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/journal/AbstractJournal.java trunk/bigdata/src/java/com/bigdata/resources/StoreManager.java Modified: trunk/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-07-27 18:51:29 UTC (rev 3319) +++ trunk/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-07-27 19:12:15 UTC (rev 3320) @@ -1027,33 +1027,33 @@ } -// case Disk: { -// -// /* -// * Setup the buffer strategy. -// */ -// -// fileMetadata = new FileMetadata(file, BufferMode.Disk, -// useDirectBuffers, initialExtent, maximumExtent, create, -// isEmptyFile, deleteOnExit, readOnly, forceWrites, -// offsetBits, //readCacheCapacity, readCacheMaxRecordSize, -// //readOnly ? null : writeCache, -// writeCacheEnabled, -// validateChecksum, -// createTime, checker, alternateRootBlock); -// -// _bufferStrategy = new DiskOnlyStrategy( -// 0L/* soft limit for maximumExtent */, -//// minimumExtension, -// fileMetadata); -// -// this._rootBlock = fileMetadata.rootBlock; -// -// break; -// -// } + case Disk: { - case Disk: + /* + * Setup the buffer strategy. + */ + + fileMetadata = new FileMetadata(file, BufferMode.Disk, + useDirectBuffers, initialExtent, maximumExtent, create, + isEmptyFile, deleteOnExit, readOnly, forceWrites, + offsetBits, //readCacheCapacity, readCacheMaxRecordSize, + //readOnly ? null : writeCache, + writeCacheEnabled, + validateChecksum, + createTime, checker, alternateRootBlock); + + _bufferStrategy = new DiskOnlyStrategy( + 0L/* soft limit for maximumExtent */, +// minimumExtension, + fileMetadata); + + this._rootBlock = fileMetadata.rootBlock; + + break; + + } + +// case Disk: case DiskWORM: { /* Modified: trunk/bigdata/src/java/com/bigdata/resources/StoreManager.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/resources/StoreManager.java 2010-07-27 18:51:29 UTC (rev 3319) +++ trunk/bigdata/src/java/com/bigdata/resources/StoreManager.java 2010-07-27 19:12:15 UTC (rev 3320) @@ -2446,7 +2446,8 @@ * the "historical" journals managed by this data service. * * FIXME Must also roll the counters forward for the other journal - * buffer strategies! + * buffer strategies! (The implementation class is different for the + * WORMStrategy, which is causing complications right now.) */ if (getBufferStrategy() instanceof DiskOnlyStrategy) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-07-30 00:37:57
|
Revision: 3366 http://bigdata.svn.sourceforge.net/bigdata/?rev=3366&view=rev Author: thompsonbry Date: 2010-07-30 00:37:51 +0000 (Fri, 30 Jul 2010) Log Message: ----------- Deprecated KeyBuilder.asSortKey(Object). This method is used heavily by the unit tests. However, is not suitable for the core code base for two reasons. First, it uses whatever the default KeyBuilder configuration happens to be which is perfectly Ok unless you are using Unicode sort key components in a key. Second, it uses a static instances protected by the monitor of that instance which causes the lock to be a bottleneck. The correct pattern is to use a thread-local or per task IKeyBuilder instance configured for a specific index or task. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/bfs/BlobOverflowHandler.java trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java Modified: trunk/bigdata/src/java/com/bigdata/bfs/BlobOverflowHandler.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/bfs/BlobOverflowHandler.java 2010-07-30 00:26:55 UTC (rev 3365) +++ trunk/bigdata/src/java/com/bigdata/bfs/BlobOverflowHandler.java 2010-07-30 00:37:51 UTC (rev 3366) @@ -6,8 +6,10 @@ import com.bigdata.btree.IOverflowHandler; import com.bigdata.btree.ITuple; +import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.io.DataOutputBuffer; +import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IBlock; import com.bigdata.rawstore.IRawStore; @@ -33,7 +35,7 @@ } - DataOutputBuffer buf; + private transient DataOutputBuffer buf; public void close() { @@ -62,6 +64,8 @@ } + final IKeyBuilder keyBuilder = new KeyBuilder(Bytes.SIZEOF_LONG); + if (addr == 0L) { /* @@ -69,7 +73,7 @@ * their address. */ - return KeyBuilder.asSortKey(0L); + return keyBuilder.append(0L).getKey(); } @@ -143,7 +147,7 @@ } // the address of the block on the target store. - return KeyBuilder.asSortKey(addr2); + return keyBuilder.append(addr2).getKey(); } Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-30 00:26:55 UTC (rev 3365) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-30 00:37:51 UTC (rev 3366) @@ -1081,17 +1081,25 @@ * Note: This method is thread-safe. * <p> * Note: Strings are Unicode safe for the default locale. See - * {@link Locale#getDefault()}. If you require a specific local or - * different locals at different times or for different indices then you - * MUST provision and apply your own {@link KeyBuilder}. + * {@link Locale#getDefault()}. If you require a specific local or different + * locals at different times or for different indices then you MUST + * provision and apply your own {@link KeyBuilder}. * * @param val * An application key. * - * @return The unsigned byte[] equivilent of that key. This will be - * <code>null</code> iff the <i>key</i> is <code>null</code>. - * If the <i>key</i> is a byte[], then the byte[] itself will be - * returned. + * @return The unsigned byte[] equivalent of that key. This will be + * <code>null</code> iff the <i>key</i> is <code>null</code>. If the + * <i>key</i> is a byte[], then the byte[] itself will be returned. + * + * @deprecated This method circumvents explicit configuration of the + * {@link KeyBuilder} and is used nearly exclusively by unit + * tests. While explicit configuration is not required for keys + * which do not include Unicode sort key components, this method + * also relies on a single global {@link KeyBuilder} instance + * protected by a lock. That lock is therefore a bottleneck. The + * correct practice is to use thread-local or per task + * {@link IKeyBuilder}s to avoid lock contention. */ @SuppressWarnings("unchecked") public static final byte[] asSortKey(Object val) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-08-02 14:48:19
|
Revision: 3390 http://bigdata.svn.sourceforge.net/bigdata/?rev=3390&view=rev Author: thompsonbry Date: 2010-08-02 14:48:13 +0000 (Mon, 02 Aug 2010) Log Message: ----------- Bug fix to DefaultKeyBuilderFactory, which was passing the default into properties.getProperty(key,def) and therefore was never testing System.getProperty(key,def). This was causing the ICU collator to always be chosen by this code path even when the collator had been explicitly set as a JVM property. Added final in a bunch of places to ICUSortKeyGenerator. Removed logic in the GlobalRowStoreHelper which was forcing the ASCII collator when the JDK collator had been requested due to a historical problem with the JDK collator and the global row store. In order to use the JDK collator you must now turn on unicode clean support in the SparseRowStore class. Since this breaks binary compatibility (for both the JDK and ICU collator options), I plan to do this as part of our next release. Improved the inline comments in KeyDecoder regarding the ArrayIndexOutOfBoundsException. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java trunk/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java trunk/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2010-08-02 12:54:49 UTC (rev 3389) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2010-08-02 14:48:13 UTC (rev 3390) @@ -207,7 +207,7 @@ if (properties != null) { - val = properties.getProperty(key, def); + val = properties.getProperty(key);//, def); } Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2010-08-02 12:54:49 UTC (rev 3389) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2010-08-02 14:48:13 UTC (rev 3390) @@ -108,7 +108,7 @@ } - ICUSortKeyGenerator(Locale locale, Object strength, DecompositionEnum mode) { + ICUSortKeyGenerator(final Locale locale, final Object strength, final DecompositionEnum mode) { if (locale == null) throw new IllegalArgumentException(); @@ -132,7 +132,7 @@ } else { - StrengthEnum str = (StrengthEnum) strength; + final StrengthEnum str = (StrengthEnum) strength; if (log.isInfoEnabled()) log.info("strength=" + str); @@ -200,9 +200,9 @@ * Buffer is reused for each {@link String} from which a sort key is * derived. */ - private RawCollationKey raw = new RawCollationKey(128); + final private RawCollationKey raw = new RawCollationKey(128); - public void appendSortKey(KeyBuilder keyBuilder, String s) { + public void appendSortKey(final KeyBuilder keyBuilder, final String s) { // RawCollationKey raw = collator.getRawCollationKey(s, null); Modified: trunk/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java 2010-08-02 12:54:49 UTC (rev 3389) +++ trunk/bigdata/src/java/com/bigdata/sparse/GlobalRowStoreHelper.java 2010-08-02 14:48:13 UTC (rev 3390) @@ -103,24 +103,28 @@ indexMetadata .setSplitHandler(LogicalRowSplitHandler.INSTANCE); - if (CollatorEnum.JDK.toString().equals( - System.getProperty(KeyBuilder.Options.COLLATOR))) { - /* - * The JDK RulesBasedCollator embeds nul bytes in the - * Unicode sort keys. This makes them unsuitable for the - * SparseRowStore, which can not locate the start of the - * column name if there are embedded nuls in a Unicode - * primary key. As a work around, this forces an ASCII - * collation sequence if the JDK collator is the - * default. This is not ideal since non-ascii - * distinctions will be lost, but it is better than - * being unable to decode the column names. - */ - log.warn("Forcing ASCII collator."); - indexMetadata - .setTupleSerializer(new DefaultTupleSerializer( - new ASCIIKeyBuilderFactory())); - } +/* + * This is now handled by using the UTF8 encoding of the primary key regardless + * of the collator mode chosen (this fixes the problem with embedded nuls). + */ +// if (CollatorEnum.JDK.toString().equals( +// System.getProperty(KeyBuilder.Options.COLLATOR))) { +// /* +// * The JDK RulesBasedCollator embeds nul bytes in the +// * Unicode sort keys. This makes them unsuitable for the +// * SparseRowStore, which can not locate the start of the +// * column name if there are embedded nuls in a Unicode +// * primary key. As a work around, this forces an ASCII +// * collation sequence if the JDK collator is the +// * default. This is not ideal since non-ascii +// * distinctions will be lost, but it is better than +// * being unable to decode the column names. +// */ +// log.warn("Forcing ASCII collator."); +// indexMetadata +// .setTupleSerializer(new DefaultTupleSerializer( +// new ASCIIKeyBuilderFactory())); +// } // Register the index. indexManager.registerIndex(indexMetadata); Modified: trunk/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java 2010-08-02 12:54:49 UTC (rev 3389) +++ trunk/bigdata/src/java/com/bigdata/sparse/KeyDecoder.java 2010-08-02 14:48:13 UTC (rev 3390) @@ -278,10 +278,23 @@ this.schemaBytesLength = schemaBytesLength; this.primaryKeyTypeOffset = schemaBytesLength; + + /* + * Note: ArrayIndexOutOfBounds with index==-1 is an indication that + * the schema name or a Unicode primary key contained embedded nul + * bytes. This should no longer be possible when using the unicode + * clean options on the SparseRowStore which encoded those data as + * UTF8 rather than as Unicode sort keys. Historically, these were + * encoded as Unicode sort keys. However, the JDK CollatorEnum + * option does not support compressed Unicode sort keys and embeds + * nul bytes in its generated sort keys. We rely on nul bytes as + * boundary markers when decoding the row store keys. The presence + * of those nul byte within the scheme and and/or the a Unicode + * primary key was causing the ArrayIndexOutOfBoundsException here. + */ + this.primaryKeyType = KeyType.getKeyType(KeyBuilder + .decodeByte(key[primaryKeyTypeOffset])); - // note: ArrayIndexOutOfBounds with index==-1 means ICU library not on classpath! - this.primaryKeyType = KeyType.getKeyType(KeyBuilder.decodeByte(key[primaryKeyTypeOffset])); - } /* This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-03-14 14:24:59
|
Revision: 4293 http://bigdata.svn.sourceforge.net/bigdata/?rev=4293&view=rev Author: thompsonbry Date: 2011-03-14 14:24:52 +0000 (Mon, 14 Mar 2011) Log Message: ----------- Working on https://sourceforge.net/apps/trac/bigdata/ticket/193 (Problem moving journal between machines). DefaultTupleSerializer - deprecated a public method only used by the unit tests. DefaultKeyBuilderFactory - made logger private, added assert that Locale != null, moved error message into class where it is used. ICUSortKeyGenerator - made logger private, added toString() method to show the as configured Collator state, added static methods to decode the StrengthEnum and DecompositionEnum from the ICU collator's self-reported values. JDKSortKeyGenerator - added toString() method to show the as configured Collator state, added static methods to decode the StrengthEnum and DecompositionEnum from the JDK collator's self-reported values. made some variables and method arguments final. KeyBuilder - made logger private, added toString() method which shows the state of the UnicodeSortKeyGenerator (ICU or JDK), moved error message here from the DefaultKeyBuilderFactory, javacdoc edit to STRENGTH Option. StrengthEnum - javadoc edit. ThreadLocalKeyBuilderFactory - added toString() showing the state of the delegate IKeyBuilderFactory. DumpJournal - made logger private, added some logic to detect inconsistencies in the Name2Addr index. JournalMoveDiagnostic - added utility class which examines the Name2Addr index in depth looking for inconsistencies and trying to find an explanation for how the original Name2Addr index keys were generated. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/btree/DefaultTupleSerializer.java trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java trunk/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java trunk/bigdata/src/java/com/bigdata/btree/keys/StrengthEnum.java trunk/bigdata/src/java/com/bigdata/btree/keys/ThreadLocalKeyBuilderFactory.java trunk/bigdata/src/java/com/bigdata/journal/DumpJournal.java Added Paths: ----------- trunk/bigdata/src/java/com/bigdata/journal/JournalMoveDiagnostic.java Modified: trunk/bigdata/src/java/com/bigdata/btree/DefaultTupleSerializer.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/DefaultTupleSerializer.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/DefaultTupleSerializer.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -152,6 +152,9 @@ * Factory for a new instance using default values for the * {@link #getKeyBuilder()}, the {@link #getLeafKeysCoder()}, and the * {@link #getLeafValuesCoder()}. + * + * @deprecated This is only used by unit tests and they should do something + * explicit, even if it is encapsulated by a test helper method. */ public static ITupleSerializer newInstance() { Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/DefaultKeyBuilderFactory.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -51,12 +51,8 @@ */ public class DefaultKeyBuilderFactory implements IKeyBuilderFactory, Serializable { - protected static final transient Logger log = Logger.getLogger(DefaultKeyBuilderFactory.class); + private static final transient Logger log = Logger.getLogger(DefaultKeyBuilderFactory.class); -// protected static final transient boolean INFO = log.isInfoEnabled(); -// -// protected static final transient boolean log.isDebugEnabled() = log.isDebugEnabled(); - /** * */ @@ -313,7 +309,10 @@ if (language == null) { locale = Locale.getDefault(); - + + if( locale == null) + throw new AssertionError(); + } else { if (country == null) { @@ -423,12 +422,6 @@ } /** - * Text of the exception thrown when the ICU library is required but is not - * available. - */ - final public static String ICU_NOT_AVAILABLE = "The ICU library is not available."; - - /** * Figures out whether or not the ICU library is available. * * @return <code>true</code> iff the ICU library is available. Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/ICUSortKeyGenerator.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -86,7 +86,8 @@ */ class ICUSortKeyGenerator implements UnicodeSortKeyGenerator { - protected static final Logger log = Logger.getLogger(ICUSortKeyGenerator.class); + transient private static final Logger log = Logger + .getLogger(ICUSortKeyGenerator.class); /** * Used to encode unicode strings into compact byte[]s that have the same @@ -167,6 +168,17 @@ } + } else { + + /* + * Note: This is the default strength per the ICU documentation. + */ + if (collator.getStrength() != Collator.TERTIARY) { + throw new AssertionError("Strength: " + collator.getStrength() + + ", but expected: " + Collator.TERTIARY); + } + //collator.setStrength(Collator.TERTIARY); + } if (mode != null) { @@ -192,6 +204,16 @@ } + } else { + /* + * Note: This is the default decomposition per the ICU documentation. + */ + if (collator.getDecomposition() != Collator.NO_DECOMPOSITION) { + throw new AssertionError("Decomposition: " + + collator.getDecomposition() + ", but expected: " + + Collator.NO_DECOMPOSITION); + } + //collator.setDecomposition(Collator.NO_DECOMPOSITION); } } @@ -213,4 +235,75 @@ } + /** + * Human readable representation, including the {@link Locale}, Strength, + * and Decomposition mode for the backing ICU collator. + */ + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append("{locale=" + getLocale()); // Note: Not self-reported by Collator. + sb.append(",strength=" + collator.getStrength() + "(" + + getStrength(collator.getStrength()) + ")"); + sb.append(",decomposition=" + collator.getDecomposition() + "(" + + getDecomposition(collator.getDecomposition()) + ")"); + sb.append("}"); + return sb.toString(); + } + + /** + * Decode an ICU collator strength, returning the corresponding type safe + * enumeration value. + * + * @param strength + * The ICU collator strength. + * + * @return The type safe enumeration value. + * + * @throws IllegalArgumentException + * if <i>strength</i> is not a known value. + */ + public static StrengthEnum getStrength(final int strength) { + switch (strength) { + case Collator.PRIMARY: + return StrengthEnum.Primary; + case Collator.SECONDARY: + return StrengthEnum.Secondary; + case Collator.TERTIARY: + return StrengthEnum.Tertiary; + case Collator.QUATERNARY: + return StrengthEnum.Quaternary; + case Collator.IDENTICAL: + return StrengthEnum.Identical; + default: + throw new IllegalArgumentException("Unknown value: " + strength); + } + } + + /** + * Decode an ICU collator decomposition mode, returning the corresponding type safe + * enumeration value. + * + * @param decomposition + * The ICU collator decomposition mode. + * + * @return The type safe enumeration value. + * + * @throws IllegalArgumentException + * if <i>decomposition</i> is not a known value. + */ + public static DecompositionEnum getDecomposition(final int decomposition) { + switch (decomposition) { + case Collator.CANONICAL_DECOMPOSITION: + return DecompositionEnum.Canonical; + case Collator.NO_DECOMPOSITION: + return DecompositionEnum.None; + case Collator.FULL_DECOMPOSITION: + return DecompositionEnum.Full; + default: + throw new IllegalArgumentException("Unknown value: " + + decomposition); + } + } + } Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/JDKSortKeyGenerator.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -26,8 +26,6 @@ import java.text.Collator; import java.util.Locale; - - /** * Implementation that uses the JDK library (does not support compressed sort * keys). @@ -74,7 +72,7 @@ } else { - StrengthEnum str = (StrengthEnum) strength; + final StrengthEnum str = (StrengthEnum) strength; switch (str) { @@ -133,7 +131,7 @@ } - public void appendSortKey(KeyBuilder keyBuilder, String s) { + public void appendSortKey(final KeyBuilder keyBuilder, final String s) { /* * Note: the collation key is expressed as signed bytes since that @@ -148,4 +146,69 @@ } + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append("{locale=" + getLocale()); // Note: Not self-reported by Collator. + sb.append(",strength=" + collator.getStrength() + "(" + + getStrength(collator.getStrength()) + ")"); + sb.append(",decomposition=" + collator.getDecomposition() + "(" + + getDecomposition(collator.getDecomposition()) + ")"); + sb.append("}"); + return sb.toString(); + } + + /** + * Decode a JDK collator strength, returning the corresponding type safe + * enumeration value. + * + * @param strength + * The JDK collator strength. + * + * @return The type safe enumeration value. + * + * @throws IllegalArgumentException + * if <i>strength</i> is not a known value. + */ + public static StrengthEnum getStrength(final int strength) { + switch (strength) { + case Collator.PRIMARY: + return StrengthEnum.Primary; + case Collator.SECONDARY: + return StrengthEnum.Secondary; + case Collator.TERTIARY: + return StrengthEnum.Tertiary; + case Collator.IDENTICAL: + return StrengthEnum.Identical; + default: + throw new IllegalArgumentException("Unknown value: " + strength); + } + } + + /** + * Decode a JDK collator decomposition mode, returning the corresponding + * type safe enumeration value. + * + * @param decomposition + * The JDK collator decomposition mode. + * + * @return The type safe enumeration value. + * + * @throws IllegalArgumentException + * if <i>decomposition</i> is not a known value. + */ + public static DecompositionEnum getDecomposition(final int decomposition) { + switch (decomposition) { + case Collator.CANONICAL_DECOMPOSITION: + return DecompositionEnum.Canonical; + case Collator.NO_DECOMPOSITION: + return DecompositionEnum.None; + case Collator.FULL_DECOMPOSITION: + return DecompositionEnum.Full; + default: + throw new IllegalArgumentException("Unknown value: " + + decomposition); + } + } + } Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -66,13 +66,16 @@ */ public class KeyBuilder implements IKeyBuilder { - protected static final Logger log = Logger.getLogger(KeyBuilder.class); - -// protected static final boolean INFO = log.isInfoEnabled(); + private static final transient Logger log = Logger + .getLogger(KeyBuilder.class); -// protected static final boolean DEBUG = log.isDebugEnabled(); - /** + * Text of the exception thrown when the ICU library is required but is not + * available. + */ + final private static transient String ERR_ICU_NOT_AVAILABLE = "The ICU library is not available."; + + /** * The default capacity of the key buffer. */ final public static int DEFAULT_INITIAL_CAPACITY = 1024; @@ -1595,6 +1598,8 @@ * While both libraries define <strong>IDENTICAL</strong> they use * different values for this strength, hence the use of the type safe * enums is recommended. + * + * @see StrengthEnum */ public String STRENGTH = KeyBuilder.class.getName()+".collator.strength"; @@ -1779,7 +1784,7 @@ * Windows and Un*x. */ - throw new UnsupportedOperationException(DefaultKeyBuilderFactory.ICU_NOT_AVAILABLE); + throw new UnsupportedOperationException(ERR_ICU_NOT_AVAILABLE); } @@ -1809,5 +1814,13 @@ } } - + + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append("{sortKeyGenerator=" + getSortKeyGenerator()); + sb.append("}"); + return sb.toString(); + } + } Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/StrengthEnum.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/StrengthEnum.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/StrengthEnum.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -6,7 +6,8 @@ * Type safe enumeration for the strength. * <p> * Note: ICU and the JDK use different integer constants for the - * #IDENTICAL strength + * <code>IDENTICAL</code> strength. The appropriate integer constant will be + * used in each case if you specify the symbolic value {@link #Identical}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ Modified: trunk/bigdata/src/java/com/bigdata/btree/keys/ThreadLocalKeyBuilderFactory.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/keys/ThreadLocalKeyBuilderFactory.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/btree/keys/ThreadLocalKeyBuilderFactory.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -76,4 +76,10 @@ } + public String toString() { + + return getClass().getName() + "{" + delegate + "}"; + + } + } Modified: trunk/bigdata/src/java/com/bigdata/journal/DumpJournal.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2011-03-14 12:44:07 UTC (rev 4292) +++ trunk/bigdata/src/java/com/bigdata/journal/DumpJournal.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -39,6 +39,7 @@ import com.bigdata.btree.AbstractBTree; import com.bigdata.btree.BTree; import com.bigdata.btree.BytesUtil; +import com.bigdata.btree.DefaultTupleSerializer; import com.bigdata.btree.IIndex; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleIterator; @@ -48,7 +49,6 @@ import com.bigdata.relation.IRelation; import com.bigdata.relation.RelationSchema; import com.bigdata.sparse.GlobalRowStoreHelper; -import com.bigdata.sparse.GlobalRowStoreSchema; import com.bigdata.sparse.ITPS; import com.bigdata.sparse.ITPV; import com.bigdata.sparse.Schema; @@ -84,7 +84,7 @@ */ public class DumpJournal { - protected static final Logger log = Logger.getLogger(DumpJournal.class); + private static final Logger log = Logger.getLogger(DumpJournal.class); // protected static final boolean INFO = log.isInfoEnabled(); @@ -378,19 +378,58 @@ } } + + System.err.println(name2Addr.getIndexMetadata().getTupleSerializer().toString()); // the named indices final ITupleIterator<?> itr = name2Addr.rangeIterator(); while (itr.hasNext()) { - // a registered index. + final ITuple<?> tuple = itr.next(); + + /* + * A registered index. Entry.name is the actual name for the + * index and is serialized using Java default serialization as a + * String. The key for the entry in the Name2Addr index should + * be the Unicode sort key for Entry.name. That Unicode sort key + * should be generated by the collation rules as defined by the + * IndexMetadata record for the Name2Addr index. + */ final Name2Addr.Entry entry = Name2Addr.EntrySerializer.INSTANCE - .deserialize(itr.next().getValueStream()); + .deserialize(tuple.getValueStream()); + /* + * Using the TupleSerializer for the Name2Addr index, generate the + * Unicode sort key for Entry.name. This *should* be the same as the + * unsigned byte[] key for the tuple in the Name2Addr index. If it + * is NOT the same, then there is a problem with the preservation of + * the Unicode collation rules such that the same input string + * (Entry.name) is resulting in a different unsigned byte[] key. If + * this happens, then the indices can appear to become "lost" + * because the "spelling rules" for the Name2Addr index have + * changed. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/193 + */ + final byte[] b = name2Addr.getIndexMetadata().getTupleSerializer().serializeKey(entry.name); + System.err.println("name=" + entry.name + ", addr=" + journal.toString(entry.checkpointAddr)); + if(!BytesUtil.bytesEqual(b, tuple.getKey())) { + /* + * The Name2Addr index has an entry which we will be unable to + * locate when given the name of the index because the generated + * unsigned byte[] key is NOT the same as the unsigned byte[] + * key under which the Entry is stored in the index. + */ + System.err.println("ERROR : Name2Addr inconsistent: Entry.name="+entry.name); + System.err.println("tuple : "+BytesUtil.toString(tuple.getKey())); + System.err.println("recode: "+BytesUtil.toString(b)); + System.err.println("-----"); + } + // load B+Tree from its checkpoint record. final BTree ndx; try { Added: trunk/bigdata/src/java/com/bigdata/journal/JournalMoveDiagnostic.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/journal/JournalMoveDiagnostic.java (rev 0) +++ trunk/bigdata/src/java/com/bigdata/journal/JournalMoveDiagnostic.java 2011-03-14 14:24:52 UTC (rev 4293) @@ -0,0 +1,457 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Mar 13, 2011 + */ + +package com.bigdata.journal; + +import java.io.File; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.log4j.Logger; + +import com.bigdata.btree.BytesUtil; +import com.bigdata.btree.IIndex; +import com.bigdata.btree.ITuple; +import com.bigdata.btree.ITupleIterator; +import com.bigdata.btree.keys.CollatorEnum; +import com.bigdata.btree.keys.DecompositionEnum; +import com.bigdata.btree.keys.DefaultKeyBuilderFactory; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.IKeyBuilderFactory; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.btree.keys.StrengthEnum; + +/** + * A diagnostic utility for problems with Unicode collation issues which can + * appear when a journal is moved to another machine. This utility is designed + * to be run on both the source machine and the target machine. It reports back + * specific key values from the {@link Name2Addr} index, metadata about the + * Unicode collation rules in use for that index, and metadata about the + * {@link Locale} as self-reported by the JVM. These information are intended + * for analysis in support of a trouble ticket. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/193 + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class JournalMoveDiagnostic { + + private static final Logger log = Logger + .getLogger(JournalMoveDiagnostic.class); + + /** + * You must specify the name of the Journal file. In addition, you may + * specify one or more index names. If no index names are specified, it will + * report metadata for all Name2Addr entries. + * + * @param args + * <code> + * journalFile (indexName)* + * </code> + */ + public static void main(final String[] args) { + + if (args.length == 0) { + + System.err + .println("usage: <filename> (indexName)*"); + + System.exit(1); + + } + + final File journalFile = new File(args[0]); + + if(!journalFile.exists()) { + + System.err.println("Not found: "+journalFile); + + System.exit(1); + + } + + { + + System.err.println("Default Locale: " + dumpLocale(Locale.getDefault())); + +// for (Locale tmp : Locale.getAvailableLocales()) +// System.err.println("Available Locale: " + tmp);\ + + } + + // collect the set of index names on which we will report. + final Set<String> indexNames = new LinkedHashSet<String>(); + + for (int i = 1; i < args.length; i++) { + + indexNames.add(args[i]); + + } + + final Properties properties = new Properties(); + + { + + properties.setProperty(Options.FILE, journalFile.toString()); + + properties.setProperty(Options.READ_ONLY, "" + true); + + // FIXME We should auto-discover this from the root blocks! + properties.setProperty(Options.BUFFER_MODE,BufferMode.Disk.toString()); + + } + + System.err.println("Opening (read-only): " + journalFile); + + final Journal jnl = new Journal(properties); + + try { + + dumpName2Addr(jnl, indexNames, jnl.getLastCommitTime()); + + } finally { + + jnl.shutdownNow(); + + } + + } + + /** + * Dump out all data associated with the {@link Locale}. + * + * @param l + * The {@link Locale}. + * + * @return A string representation of its data. + */ + private static final String dumpLocale(final Locale l) { + + final StringBuilder sb = new StringBuilder(); + + sb.append("\n Locale : [" + l + "]"); + sb.append("\n Country : [" + l.getCountry() + "]"); + sb.append("\n Language : [" + l.getLanguage() + "]"); + sb.append("\n Variant : [" + l.getVariant() + "]"); + sb.append("\n ISO3 Country : [" + l.getISO3Country() + "]"); + sb.append("\n ISO3 Language: [" + l.getISO3Language() + "]"); + sb.append("\n"); + + return sb.toString(); + + } + + /** + * Dump out some detailed information about the {@link Name2Addr} index, the + * manner in which it should be encoding Unicode Strings into unsigned + * byte[] keys, and, for each named index, the actual index name, the actual + * unsigned byte[] key found in the Name2Addr index, and the unsigned byte[] + * key under which the machine on which this utility is running would + * attempt to resolve the index name - this last key SHOULD be the same as + * the key under which the index entry was found. If it is NOT the same then + * this indicates an error in the way in which the keys are being generated + * from the index names. Information is written onto stderr. + * + * @param jnl + * The journal. + * @param indexNames + * The name of one or more indices on which the per-index + * metadata will be reported. + * @param timestamp + * The timestamp of the commit record for which this information + * will be reported. + */ + private static final void dumpName2Addr(final Journal jnl, + final Set<String> indexNames, final long timestamp) { + + final IIndex name2Addr = jnl.getName2Addr(timestamp); + + // The key builder actually used by Name2Addr. + final IKeyBuilder theKeyBuilder; + // A key builder from the default factory. + final IKeyBuilder aKeyBuilder; + { + /* + * Show the key builder factory that the Name2Addr instance is + * actually using (this shows the tupleSerializer, but that shows + * the key builder factory which is what we really care about). + */ + theKeyBuilder = name2Addr.getIndexMetadata().getKeyBuilder(); + + /* + * A key builder factory as it would be configured on this machine + * for a new Name2Addr index, e.g., if we created a new Journal. + */ + final IKeyBuilderFactory aKeyBuilderFactory = new DefaultKeyBuilderFactory( + new Properties()); + + System.err.println("KeyBuilderFactory if created new:\n" + + aKeyBuilderFactory); + + /* + * A key builder generated by that factory. This key builder should + * have the same behavior that we observe for Name2Addr IF the + * KeyBuilderFactory inherits the same Locale, [collator], + * [strength], and [decompositionMode] attributes which were used to + * create the Journal. Differences in Locale (e.g., language), + * collator (e.g., JDK versus ICU), strength (e.g., IDENTICAL vs + * PRIMARY), or decompositionMode (e.g., None versus Full) can all + * cause the unsigned byte[] keys generated by this key builder to + * differ from those generated on the machine where (and when) the + * journal was originally created. + */ + aKeyBuilder = aKeyBuilderFactory.getKeyBuilder(); + + System.err.println("Name2Addr effective key builder:\n" + + theKeyBuilder); + + System.err.println("Name2Addr if-new key builder:\n" + + aKeyBuilder); + } + + // Names of indices and the #of times they were found. + final Map<String, AtomicInteger> dups = new LinkedHashMap<String, AtomicInteger>(); + + // the named indices + final ITupleIterator<?> itr = name2Addr.rangeIterator(); + + while (itr.hasNext()) { + + final ITuple<?> tuple = itr.next(); + + /* + * A registered index. Entry.name is the actual name for the index + * and is serialized using Java default serialization as a String. + * The key for the entry in the Name2Addr index should be the + * Unicode sort key for Entry.name. That Unicode sort key should be + * generated by the collation rules as defined by the IndexMetadata + * record for the Name2Addr index. + */ + final Name2Addr.Entry entry = Name2Addr.EntrySerializer.INSTANCE + .deserialize(tuple.getValueStream()); + + // Track #of times we visit an index having this name. + { + + AtomicInteger tmp = dups.get(entry.name); + + if (tmp == null) { + + dups.put(entry.name, tmp = new AtomicInteger(0)); + + } + + tmp.incrementAndGet(); + + } + + if (!indexNames.isEmpty() && !indexNames.contains(entry.name)) { + /* + * A specific set of index names was given and this is not one + * of those indices. + */ + continue; + } + + System.err.println("-----"); + + System.err.println("Considering: " + tuple); + + /* + * The actual unsigned byte[] under which the Name2Addr entry is + * indexed. + */ + final byte[] theKey = tuple.getKey(); + + /* + * Using the TupleSerializer for the Name2Addr index, generate the + * Unicode sort key for Entry.name. This *should* be the same as the + * unsigned byte[] key for the tuple in the Name2Addr index. If it + * is NOT the same, then there is a problem with the preservation of + * the Unicode collation rules such that the same input string + * (Entry.name) is resulting in a different unsigned byte[] key. If + * this happens, then the indices can appear to become "lost" + * because the "spelling rules" for the Name2Addr index have + * changed. + * + * @see https://sourceforge.net/apps/trac/bigdata/ticket/193 + */ + final byte[] b = name2Addr.getIndexMetadata().getTupleSerializer() + .serializeKey(entry.name); + final byte[] b2 = theKeyBuilder.reset().append(entry.name).getKey(); + if(!BytesUtil.bytesEqual(b, b2)) { + System.err.println("ERROR: tupleSer and keyBuilder do not agree"); + } + +// /* +// * This uses the key builder which would be created for a new +// * Name2Addr instance on this host. +// */ +// final byte[] c = aKeyBuilder.reset().append(entry.name).getKey(); + + System.err.println("name=" + entry.name); + + System.err.println("tuple : " + BytesUtil.toString(theKey)); + + final boolean consistent = BytesUtil.bytesEqual(theKey, b); + +// final boolean consistent2 = BytesUtil.bytesEqual(theKey,c); + + if (!consistent) { + /* + * The Name2Addr index has an entry which we will be unable to + * locate when given the name of the index because the generated + * unsigned byte[] key is NOT the same as the unsigned byte[] + * key under which the Entry is stored in the index. + */ + System.err.println("recode: " + BytesUtil.toString(b)); + System.err.println("ERROR : Name2Addr inconsistent for [" + + entry.name + "]"); + searchForConsistentConfiguration(entry.name, theKey); + } +// if (!consistent2) { +// /* +// * @todo javadoc. +// */ +// System.err.println("recod2: " + BytesUtil.toString(c)); +// System.err.println("ERROR : Name2Addr inconsistent for [" +// + entry.name + "]"); +// } + + } + + System.err.println("\n==========="); + + /* + * Show any indices for which are have more than one entry. There is + * an encoding problem for the names of any such indices. + */ + for (Map.Entry<String, AtomicInteger> e : dups.entrySet()) { + + if (e.getValue().get() != 1) { + + System.err.println("ERROR: name=[" + e.getKey() + "] has " + + e.getValue().get() + " Name2Addr entries."); + + } + + } + + } // dumpName2Addr + + /** + * Search for a configuration of an {@link IKeyBuilderFactory} which is + * consistent with the given key when encoding the given string into an + * unsigned byte[]. + * + * @param str + * The given string. + * @param expected + * The given key. + */ + private static void searchForConsistentConfiguration(final String str, + final byte[] expected) { + +// final byte[] expected = keyBuilder.reset().append(str).getKey(); + + // To test all. + final Locale[] locales = Locale.getAvailableLocales(); + // To test just the default locale. +// final Locale[] locales = new Locale[]{Locale.getDefault()}; + + int nconsistent = 0; + + // Consider each Locale + for(Locale l : locales) { + + // Consider all Collator implementations (JDK, ICU, ICU4JNI) + for(CollatorEnum c : CollatorEnum.values()) { + + // Consider all Sollator strengths. + for(StrengthEnum s : StrengthEnum.values()) { + + // Consider all Collator decomposition modes. + for(DecompositionEnum d : DecompositionEnum.values()) { + + // Setup the collator. + final Properties p = new Properties(); + p.setProperty(KeyBuilder.Options.USER_COUNTRY, l.getCountry()); + p.setProperty(KeyBuilder.Options.USER_LANGUAGE, l.getLanguage()); + p.setProperty(KeyBuilder.Options.USER_VARIANT, l.getVariant()); + p.setProperty(KeyBuilder.Options.COLLATOR, c.toString()); + p.setProperty(KeyBuilder.Options.STRENGTH, s.toString()); + p.setProperty(KeyBuilder.Options.DECOMPOSITION, d.toString()); + + final IKeyBuilderFactory f; + final IKeyBuilder tmp; + try { + f = new DefaultKeyBuilderFactory(p); + tmp = f.getKeyBuilder(); + } catch (IllegalArgumentException t) { + if (log.isDebugEnabled()) + log.debug("Illegal configuration: " + t); + continue; + } catch (UnsupportedOperationException t) { + if (log.isDebugEnabled()) + log.debug("Illegal configuration: " + t); + continue; + } + + final byte[] actual = tmp.reset().append(str).getKey(); + + if (BytesUtil.bytesEqual(expected, actual)) { + + System.out + .println("Consistent configuration: " + p); + + nconsistent++; + + } + + } + + } + + } + + } + + if (nconsistent == 0) { + + System.err.println("No consistent configuration was found."); + + } + + } // searchForConsistentConfiguration() + +} Property changes on: trunk/bigdata/src/java/com/bigdata/journal/JournalMoveDiagnostic.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-05-04 18:59:11
|
Revision: 4447 http://bigdata.svn.sourceforge.net/bigdata/?rev=4447&view=rev Author: thompsonbry Date: 2011-05-04 18:59:04 +0000 (Wed, 04 May 2011) Log Message: ----------- Modified the UnisolatedReadWriteIndex constructor to strongly type the parameter as a BTree and modified AbstractRelation to explicitly cast to a BTree when invoking the UnisolatedReadWriteIndex ctor. This change already exists in the QUADS branch. Modified Paths: -------------- trunk/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java trunk/bigdata/src/java/com/bigdata/relation/AbstractRelation.java Modified: trunk/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2011-05-04 15:08:38 UTC (rev 4446) +++ trunk/bigdata/src/java/com/bigdata/btree/UnisolatedReadWriteIndex.java 2011-05-04 18:59:04 UTC (rev 4447) @@ -293,7 +293,7 @@ * @throws IllegalArgumentException * if the index is <code>null</code>. */ - public UnisolatedReadWriteIndex(IIndex ndx) { + public UnisolatedReadWriteIndex(BTree ndx) { this(ndx, DEFAULT_CAPACITY); @@ -329,7 +329,7 @@ * the computed solutions onto the relations. It is likely that a * read-write lock will do well for this situation. */ - public UnisolatedReadWriteIndex(final IIndex ndx, final int capacity) { + public UnisolatedReadWriteIndex(final BTree ndx, final int capacity) { if (ndx == null) throw new IllegalArgumentException(); Modified: trunk/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- trunk/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2011-05-04 15:08:38 UTC (rev 4446) +++ trunk/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2011-05-04 18:59:04 UTC (rev 4447) @@ -31,6 +31,7 @@ import java.util.Properties; import java.util.UUID; +import com.bigdata.btree.BTree; import com.bigdata.btree.IIndex; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.UnisolatedReadWriteIndex; @@ -159,7 +160,7 @@ } - ndx = new UnisolatedReadWriteIndex(ndx); + ndx = new UnisolatedReadWriteIndex((BTree) ndx); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |