From: <tho...@us...> - 2011-06-28 16:31:47
|
Revision: 4810 http://bigdata.svn.sourceforge.net/bigdata/?rev=4810&view=rev Author: thompsonbry Date: 2011-06-28 16:31:38 +0000 (Tue, 28 Jun 2011) Log Message: ----------- Working on the htree Modified Paths: -------------- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableDirectoryPageData.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractMockNodeData.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/TestExtensibleHashing.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/TestHTree.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java Added Paths: ----------- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/raba/MutableKeyBuffer.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/raba/ branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/raba/TestAll.java branches/TERMS_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/htree/raba/TestMutableKeyBuffer.java Removed Paths: ------------- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -32,10 +32,16 @@ import java.io.ObjectOutput; import java.util.UUID; +import com.bigdata.btree.IndexMetadata; +import com.sun.org.apache.xml.internal.utils.Hashtree2Node; + /** * Configuration options. * * @todo Reconcile with IndexMetadata. + * + * @deprecated with the {@link HashTree} but we still need to reconcile the + * {@link HTree} with {@link IndexMetadata}. */ public class HTableMetadata implements Externalizable { Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HTree.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -45,14 +45,13 @@ import com.bigdata.btree.ITupleSerializer; import com.bigdata.btree.Leaf; import com.bigdata.btree.LeafTupleIterator; -import com.bigdata.btree.MutableLeafData; import com.bigdata.btree.Node; import com.bigdata.btree.PO; import com.bigdata.btree.data.DefaultLeafCoder; import com.bigdata.btree.data.IAbstractNodeData; import com.bigdata.btree.data.ILeafData; import com.bigdata.btree.raba.IRaba; -import com.bigdata.btree.raba.MutableKeyBuffer; +import com.bigdata.htree.raba.MutableKeyBuffer; import com.bigdata.btree.raba.MutableValueBuffer; import com.bigdata.cache.HardReferenceQueue; import com.bigdata.htree.data.IDirectoryData; @@ -80,6 +79,10 @@ * hash tree. You have to use an order preserving hash function, which * is external to the HTree implementation. Internally, the HTree must * either double-link the pages or crawl the directory structure. + * + * TODO The keys should be declared as a computed key based on the data + * fields in the record. The {@link HTree} supports arbitrary bit + * length keys, but can be optimized for int32 keys easily enough. */ public class HTree extends AbstractHTree // implements @@ -185,7 +188,12 @@ * an expected page size of 8k before compression. */ public HTree(final IRawStore store, final int addressBits) { + this(store, addressBits, true/* rawRecords */); + } + public HTree(final IRawStore store, final int addressBits, + final boolean rawRecords) { + // super(store, nodeFactory, readOnly, addressBits, metadata, recordCompressorFactory); super(store, false/*readOnly*/, addressBits); @@ -194,7 +202,7 @@ // @todo from IndexMetadata this.versionTimestamps = false; this.deleteMarkers = false; - this.rawRecords = true; + this.rawRecords = rawRecords; /* * The initial setup of the hash tree is a root directory page whose @@ -618,8 +626,9 @@ * if the parent of the <i>oldChild</i> is not the given * <i>parent</i>. */ - private void splitBucketsOnPage(final DirectoryPage parent, - final int buddyOffset, final BucketPage oldChild) { + // Note: package private for unit tests. + void splitBucketsOnPage(final DirectoryPage parent, final int buddyOffset, + final BucketPage oldChild) { if (parent == null) throw new IllegalArgumentException(); @@ -866,11 +875,17 @@ + dstBuddyIndex + ")" + ", slot(" + srcSlot + "=>" + dstSlot + ")"); - // Move the tuple at that slot TODO move metadata also. + // Move the tuple at that slot TODO move metadata also. + if(srcKeys.keys[srcSlot] == null) + continue; dstKeys.keys[dstSlot] = srcKeys.keys[srcSlot]; dstVals.values[dstSlot] = srcVals.values[srcSlot]; srcKeys.keys[srcSlot] = null; srcVals.values[srcSlot] = null; + dstKeys.nkeys++; // one more in that page. + srcKeys.nkeys--; // one less in this page. + dstVals.nvalues++; // one more in that page. + srcVals.nvalues--; // one less in this page. } @@ -935,10 +950,13 @@ + "=>" + dstSlot + ")"); // Move the tuple at that slot TODO move metadata also. + if(srcKeys.keys[srcSlot] == null) + continue; dstKeys.keys[dstSlot] = srcKeys.keys[srcSlot]; dstVals.values[dstSlot] = srcVals.values[srcSlot]; srcKeys.keys[srcSlot] = null; srcVals.values[srcSlot] = null; + // Note: movement within same page: nkeys/nvals don't change } @@ -947,7 +965,7 @@ } } - + /** * Adds a new {@link DirectoryPage} when we need to split a child but * <code>globalDepth == localDepth</code>. The caller must retry the insert @@ -991,8 +1009,8 @@ * <pre> * root := [2] (d,d,b,b) * d := [1] (a,a;c,c) // two ptrs to (d) so 2 buddies on the page - * a := [0] (1,2;3,4) // depth changes since now 2 ptrs to (a) - * c := [0] (-,-;-,-) // depth changes since now 2 ptrs to (c) + * a := [0] (1;2;3;4) // depth changes since now 2 ptrs to (a) + * c := [0] (-;-;-;-) // depth changes since now 2 ptrs to (c) * b := [1] (-,-;-,-) * </pre> * @@ -2048,10 +2066,10 @@ static class BucketPage extends AbstractPage implements ILeafData { // TODO IBucketData /** - * The data record. {@link MutableLeafData} is used for all mutation + * The data record. {@link MutableBucketData} is used for all mutation * operations. {@link ReadOnlyLeafData} is used when the {@link Leaf} is * made persistent. A read-only data record is automatically converted into - * a {@link MutableLeafData} record when a mutation operation is requested. + * a {@link MutableBucketData} record when a mutation operation is requested. * <p> * Note: This is package private in order to expose it to {@link Node}. */ @@ -2158,14 +2176,12 @@ super(htree, true/* dirty */, globalDepth); - // TODO keysRaba must allow nulls! - data = new MutableLeafData(// + data = new MutableBucketData(// (1<<htree.addressBits), // fan-out htree.versionTimestamps,// htree.deleteMarkers,// htree.rawRecords// ); -// new MutableBucketData(data) } @@ -2351,29 +2367,26 @@ // TODO if(!mutable) copyOnWrite().insert(key,val,parent,buddyOffset); - /* - * Locate the first unassigned tuple in the buddy bucket. - * - * Note: Given the IRaba data structure, this will require us to - * examine the keys for a null. The "keys" rabas do not allow nulls, - * so we will need to use a "values" raba (nulls allowed) for the - * bucket keys. Unless we keep the entries in a buddy bucket dense - * (maybe making them dense when they are persisted for faster - * scans, but why bother for mutable buckets?) we will have to scan - * the entire buddy bucket to find an open slot (or just to count - * the #of slots which are currently in use). - * - * FIXME We need a modified MutableKeysRaba for this purpose. It - * will have to allow nulls in the middle (unless we compact - * everything). [We will have to compact prior to coding in order - * for the assumptions of the values coder to be maintained.] It - * would be pretty easy to just swap in the first undeleted tuple - * but that will not keep things dense. Likewise, Monet style - * cracking could only be done within a buddy bucket rather than - * across all buddies on a page. - */ - final MutableKeyBuffer keys = (MutableKeyBuffer)getKeys(); - final MutableValueBuffer vals = (MutableValueBuffer)getValues(); + /* + * Locate the first unassigned tuple in the buddy bucket. + * + * Note: Given the IRaba data structure, this will require us to + * examine the keys for a null. The "keys" rabas do not allow nulls, + * so we will need to use a "values" raba (nulls allowed) for the + * bucket keys. Unless we keep the entries in a buddy bucket dense + * (maybe making them dense when they are persisted for faster + * scans, but why bother for mutable buckets?) we will have to scan + * the entire buddy bucket to find an open slot (or just to count + * the #of slots which are currently in use). + * + * FIXME The current raba coders assume that the keys and values are + * compact. That assumption is violate for an htree since the buddy + * buckets are inherently sparse. We can not really compact a bucket + * page unless we change its coding to indicate the run length of + * the non-null tuples within each buddy bucket. + */ + final MutableKeyBuffer keys = (MutableKeyBuffer) getKeys(); + final MutableValueBuffer vals = (MutableValueBuffer) getValues(); for (int i = buddyOffset; i < lastSlot; i++) { if (keys.isNull(i)) { Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -84,6 +84,8 @@ * During a bulk index build, the raw record must be copied to the target * index store, e.g., an {@link IndexSegment} using an * {@link IOverflowHandler}. + * + * @deprecated with the {@link HashTree}. */ public class HashBucket extends AbstractHashPage<HashBucket>// // implements IBucketData// Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -43,6 +43,8 @@ /** * A simple (flat) directory for an extensible hashing. + * + * @deprecated with the {@link HashTree}. */ public class HashDirectory extends AbstractHashPage<HashDirectory> { Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -49,6 +49,10 @@ * * @todo The hash function should be a configuration option store with the index * metadata. + * + * @deprecated with {@link HTree}. what we really need is a computed key based + * on the data fields in the record. that key can be any bit length + * as the {@link HTree} supports arbitrary bit length keys. */ public interface HashFunction { Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -111,6 +111,8 @@ * * @todo Integrate a ring buffer for retention of frequently accessed pages per * the weak reference policy observed by the BTree with touch() + * + * @deprecated This has been replaced by the {@link HTree}. */ public class HashTree implements ISimpleBTree // @todo rename ISimpleBTree interface Deleted: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -1,32 +0,0 @@ -package com.bigdata.htree; - -import com.bigdata.btree.ITuple; - -/** - * Extended interface to support hash buckets. - * - * @author tho...@us... - * - * @param <E> - * - * @todo The reason for having this on ITuple is to make it practical for the - * hash code to be defined in terms of application specific data types - * rather than the unsigned byte[] key (but the latter could of course be - * decoded by the hash function before computing the hash of the - * application data type, except for things like Unicode keys). - * <p> - * This should probably be lifted onto {@link ITuple} and - * {@link #getKeyHash()} should be declared to throw an - * {@link UnsupportedOperationException} if the hash code of the key is - * not being stored. - */ -public interface IHashTuple<E> extends ITuple<E> { - -// int getHashBitLength(); - - /** - * The int32 hash value of the key. - */ - int getKeyHash(); - -} Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -27,30 +27,30 @@ package com.bigdata.htree; -import java.util.Iterator; - -import cern.colt.map.OpenIntIntHashMap; - +import com.bigdata.btree.AbstractBTree; import com.bigdata.btree.ITuple; +import com.bigdata.btree.IndexSegment; import com.bigdata.btree.MutableLeafData; +import com.bigdata.btree.data.ILeafData; import com.bigdata.btree.raba.IRaba; +import com.bigdata.btree.raba.MutableValueBuffer; import com.bigdata.htree.data.IBucketData; +import com.bigdata.htree.raba.MutableKeyBuffer; import com.bigdata.io.AbstractFixedByteArrayBuffer; -import com.bigdata.io.ByteArrayBuffer; import com.bigdata.io.IDataRecord; -import com.bigdata.rawstore.Bytes; +import com.bigdata.rawstore.IRawStore; /** * Implementation maintains Java objects corresponding to the persistent data * and defines methods for a variety of mutations on the {@link IBucketData} - * record which operate by direct manipulation of the Java objects. + * record which operate by direct manipulation of the Java objects. The bucket + * page is logically divided into buddy hash buckets. Unlike a B+Tree, the + * tuples are neither ordered nor dense and the same key MAY appear multiple + * times within a given buddy bucket. * <p> - * Note: The primary differences between maintaining a bucket and maintaining a - * B+Tree leaf are that: (a) tuples in the bucket do not have an intrinsic - * order; (b) tuples with the same key are permitted; and (c) overflow pages - * must be provided for when the #of duplicates in a bucket exceeds the capacity - * of the bucket (this can be done by conversion of the bucket to a blob record - * or by chaining the bucket). + * Note: Overflow pages must be provided for when the #of duplicates in a bucket + * exceeds the capacity of the bucket (this can be done by conversion of the + * bucket to a blob record or by chaining the bucket). * <p> * Note: package private fields are used so that they may be directly accessed * by the {@link HashBucket} class. @@ -58,510 +58,372 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: MutableLeafData.java 2265 2009-10-26 12:51:06Z thompsonbry $ * - * @todo Consider mutable implementation based on a compacting record ala GOM. - * This is especially attractive for the hash tree. The implementation - * would have to be wholly different from the {@link MutableLeafData} - * class. Instead of managing the {@link IRaba} for the keys and values - * separately, each {@link ITuple} would be appended into a byte[] (or - * {@link IDataRecord}). There would be a budget for that backing buffer - * which is the maximum in-memory size of the bucket. An index would - * provide random access into the buffer for only those entries which are - * "live" and a counter is maintain of the #of entries in the buffer which - * are no longer in use. When the buffer capacity is reached, the buffer - * is compacted by copying all of the entries accessible from the index - * into a new buffer and the old buffer is released. - * <p> - * Records which are too large for the buffer should be moved out of line. - * <p> - * This can be used in combination with a dynamically maintained trie for - * fast hash lookup, or we could just scan the entries. - * <p> - * Lexicon key search can scan the entries using the index. Scanning can - * have a side-effect in which the position of the entry offsets in the - * index is swapped if the keys are out of order. This would give us - * MonetDB style "cracking". The keys would have to be put into full order - * no later than when the record is made persistent. - * <p> - * Even though mutation is not thread safe, compacting the data record - * must not cause the assignment of indices to tuples to change when the - * caller is iterating through the entries by index. + * TODO Consider mutable implementation based on a compacting record + * ala GOM. This is especially attractive for the hash tree. The + * implementation would have to be wholly different from the + * {@link MutableLeafData} class. Instead of managing the {@link IRaba} + * for the keys and values separately, each {@link ITuple} would be + * appended into a byte[] (or {@link IDataRecord}). There would be a + * budget for that backing buffer which is the maximum in-memory size + * of the bucket. An index would provide random access into the buffer + * for only those entries which are "live" and a counter is maintain of + * the #of entries in the buffer which are no longer in use. When the + * buffer capacity is reached, the buffer is compacted by copying all + * of the entries accessible from the index into a new buffer and the + * old buffer is released. + * <p> + * Records which are too large for the buffer should be moved out of + * line. + * <p> + * This can be used in combination with a dynamically maintained trie + * for fast hash lookup, or we could just scan the entries. + * <p> + * Lexicon key search can scan the entries using the index. Scanning + * can have a side-effect in which the position of the entry offsets in + * the index is swapped if the keys are out of order. This would give + * us MonetDB style "cracking". The keys would have to be put into full + * order no later than when the record is made persistent. + * <p> + * Even though mutation is not thread safe, compacting the data record + * must not cause the assignment of indices to tuples to change when + * the caller is iterating through the entries by index. * - * @todo When the record is serialized, do we need to allow a decision function - * to examine the record and decide whether it must be split? Since we do - * not have a fixed target for the page size, but only a budget, and since - * compression of keys, values, metadata, and the encoded record can all - * be applied, it seems that the decision function should be in terms of - * the buffer budget and a maximum #of entries (e.g., B+Tree branching - * factor or an equivalent hash bucket threshold). + * TODO When the record is serialized, do we need to allow a decision + * function to examine the record and decide whether it must be split? + * Since we do not have a fixed target for the page size, but only a + * budget, and since compression of keys, values, metadata, and the + * encoded record can all be applied, it seems that the decision + * function should be in terms of the buffer budget and a maximum #of + * entries (e.g., B+Tree branching factor or an equivalent hash bucket + * threshold). + * + * TODO Consider disallowing delete markers and version timestamps for + * the {@link HTree} if it is to be used only with in unisolated + * contexts in which case we can drop support for that stuff from this + * class. */ -public class MutableBucketData implements IBucketData { +public class MutableBucketData implements ILeafData { // IBucketData { - private IDataRecord buf; + /** + * A keys associated with each tuple in the bucket page. Each key is a + * variable length unsigned byte[]. The keys are neither ordered nor dense. + */ + final MutableKeyBuffer keys; - private /*@todo final*/ OpenIntIntHashMap index; - - /** - * Constructor used when converting a persistent data record into a mutable - * one. - * - * @param data - */ - public MutableBucketData(final IBucketData data) { + /** + * The values associated with each tuple in the bucket page. Each value is a + * variable length byte[].The indices into the values are correlated with + * the indices into the {@link #keys}. + */ + final MutableValueBuffer vals; - /* - * @todo this is the initial capacity. the buffer can grow as necessary - * until we have reached the maximum number of entries permitted by the - * branching factor (and for HTree we should let the branching factor - * grow as well if we are under the target page size!). - * - * The caller should be passing in the estimated bufferSize to achieve a - * target page size. - */ - buf = new ByteArrayBuffer(Bytes.kilobyte32*4); + /** + * The deletion markers IFF isolation is supported by the index. The indices + * into the delete markers are correlated with the indices into the + * {@link #keys}. + */ + final boolean[] deleteMarkers; - /* - * @todo this is an exact fit. The caller should be passing in the - * configured branchingFactor. - * - * Actually, an ArrayList or simply an extendable or pre-extended array - * would do fine here. All we need to do is translate the index of the - * entry into the offset into the buffer. There are very specialized - * data structures which can do better for this than an array by leaving - * "holes" to minimize the amount of copying when inserting, but that - * should not be a problem with a compacting pass over the buffer. We - * can either compact over each hole, copy down things into "good" fits - * in the holes, or copy things into a new array. This would be a pretty - * natural fit with a single free buffer which is available on the - * HTree/BTree and swapped with the buffer in a leaf/bucket each time it - * is compacted. Any of these schemes will do less copying that the - * current B+Tree scheme, which copies up/down all the time. - */ - index = new OpenIntIntHashMap(data.getKeyCount()); + /** + * The version timestamps IFF isolation is supported by the index. The + * indices into the version timestamps are correlated with the indices into + * the {@link #keys} . + */ + final long[] versionTimestamps; - } + /** + * The minimum version timestamp. + * + * @todo these fields at 16 bytes to each {@link MutableBucketData} object + * even when we do not use them. It would be better to use a subclass + * or tack them onto the end of the {@link #versionTimestamps} array. + */ + long minimumVersionTimestamp; + long maximumVersionTimestamp; - /** - * - * @param bufferSize - * The initial size of the backing byte[]. - * @param branchingFactor - * The maximum #of tuples which may be stored in the data record. - * - * @todo The buffer must be permitted grow until it is sufficient to encode - * approximately one page worth of tuples. - * <p> - * is typically on the order of the size of a page, e.g., 4k. Since - * the data record will be encoded and possible compressed before it - * is written onto the store, this can be larger than the target. - * <p> - * In order to avoid problems where the objects are much smaller than - * expected, we should allow the backing buffer to grow or we should - * fit a model which estimates the size of the resulting page based on - * the size of the buffer and then grow the buffer until we can - * satisfy the target page size. - * - * FIXME The buffer should be LARGER than the target value required to - * model a full page in order to have some efficiency with update - * operations when the page is nearly full. Probably we should use - * 1.5x to 2x the target page size for the bufferSize. - */ - public MutableBucketData(final int bufferSize, final int branchingFactor) { + /** + * Bit markers indicating whether the value associated with the tuple is a + * raw record or an inline byte[] stored within {@link #getValues() values + * raba}. + */ + final boolean[] rawRecords; - /* - * Initialize the buffer and the index using the given values. - * - * The bufferSize should be adaptive. - * - * The branchingFactor could be adaptive as well (but the B+Tree logic - * would have to be updated to handle leaves which are over or under the - * nominal branching factor). - */ - buf = new ByteArrayBuffer(bufferSize); + /** + * Create an empty data record with internal arrays dimensioned for the + * specified branching factor. + * + * @param branchingFactor + * The branching factor for the owning B+Tree. + * @param hasVersionTimestamps + * <code>true</code> iff version timestamps will be maintained. + * @param hasDeleteMarkers + * <code>true</code> iff delete markers will be maintained. + * @param hasRawRecords + * <code>true</code> iff raw record markers will be maintained. + */ + public MutableBucketData(final int branchingFactor, + final boolean hasVersionTimestamps, final boolean hasDeleteMarkers, + final boolean hasRawRecords) { - index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */); + keys = new MutableKeyBuffer(branchingFactor /*+ 1*/); - } - - public int getHash(int index) { - // TODO Auto-generated method stub - return 0; - } + vals = new MutableValueBuffer(branchingFactor /*+ 1*/); - public int getKeyCount() { - // TODO Auto-generated method stub - return 0; - } + versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor /*+ 1*/] + : null); - public int getLengthMSB() { - // TODO Auto-generated method stub - return 0; - } + // init per API specification. + minimumVersionTimestamp = Long.MAX_VALUE; + maximumVersionTimestamp = Long.MIN_VALUE; + + deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor /*+ 1*/] + : null); + + rawRecords = (hasRawRecords ? new boolean[branchingFactor /*+ 1*/] : null); - public Iterator<Integer> hashIterator(int h) { - // TODO Auto-generated method stub - return null; - } + } - public boolean getDeleteMarker(int index) { - // TODO Auto-generated method stub - return false; - } + /** + * Copy ctor. + * + * @param branchingFactor + * The branching factor for the owning B+Tree. + * @param src + * The source leaf. + */ + public MutableBucketData(final int branchingFactor, final ILeafData src) { - public long getNextAddr() { - // TODO Auto-generated method stub - return 0; - } + keys = new MutableKeyBuffer(branchingFactor /*+ 1*/, src.getKeys()); - public long getPriorAddr() { - // TODO Auto-generated method stub - return 0; - } + vals = new MutableValueBuffer(branchingFactor /*+ 1*/, src.getValues()); - public int getValueCount() { - // TODO Auto-generated method stub - return 0; - } + versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor /*+ 1*/] + : null); - public IRaba getValues() { - // TODO Auto-generated method stub - return null; - } + deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor /*+ 1*/] + : null); - public long getVersionTimestamp(int index) { - // TODO Auto-generated method stub - return 0; - } + rawRecords = (src.hasRawRecords() ? new boolean[branchingFactor /*+ 1*/] + : null); - public boolean hasDeleteMarkers() { - // TODO Auto-generated method stub - return false; - } +// final int nkeys = keys.size(); + + if (versionTimestamps != null) { - public boolean hasVersionTimestamps() { - // TODO Auto-generated method stub - return false; - } + for (int i = 0; i < branchingFactor; i++) { - public boolean isDoubleLinked() { - // TODO Auto-generated method stub - return false; - } + versionTimestamps[i] = src.getVersionTimestamp(i); + + } - public AbstractFixedByteArrayBuffer data() { - // TODO Auto-generated method stub - return null; - } + minimumVersionTimestamp = src.getMinimumVersionTimestamp(); - public IRaba getKeys() { - // TODO Auto-generated method stub - return null; - } + maximumVersionTimestamp = src.getMaximumVersionTimestamp(); - public long getMaximumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; - } + } else { - public long getMinimumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; - } + minimumVersionTimestamp = Long.MAX_VALUE; -// public int getSpannedTupleCount() { -// // TODO Auto-generated method stub -// return 0; -// } + maximumVersionTimestamp = Long.MIN_VALUE; - public boolean isCoded() { - // TODO Auto-generated method stub - return false; - } + } + + if (deleteMarkers != null) { - public boolean isLeaf() { - // TODO Auto-generated method stub - return false; - } + for (int i = 0; i < branchingFactor; i++) { - public boolean isReadOnly() { - // TODO Auto-generated method stub - return false; - } + deleteMarkers[i] = src.getDeleteMarker(i); + + } + + } - public long getRawRecord(int index) { - // TODO Auto-generated method stub - return 0; - } + if (rawRecords != null) { - public boolean hasRawRecords() { - // TODO Auto-generated method stub - return false; - } - -// /** -// * The keys for the entries in the bucket. Unlike a B+Tree, the keys are NOT -// * maintained in a sorted order. Search proceeds by scanning for matching -// * hash codes and filtering for matching keys. -// */ -// final MutableKeyBuffer keys; -// -// /** -// * The values for the entries in the bucket. There is one value per key. The -// * value MAY be null. -// */ -// final MutableValueBuffer vals; -// + for (int i = 0; i < branchingFactor; i++) { + + rawRecords[i] = src.getRawRecord(i) != IRawStore.NULL; + + } + + } + + } + + /** + * Ctor based on just "data" -- used by unit tests. + * + * @param keys + * A representation of the defined keys in the leaf. + * @param values + * An array containing the values found in the leaf. + * @param versionTimestamps + * An array of the version timestamps (iff the version metadata + * is being maintained). + * @param deleteMarkers + * An array of the delete markers (iff the version metadata is + * being maintained). + */ + MutableBucketData(final MutableKeyBuffer keys, + final MutableValueBuffer values, final long[] versionTimestamps, + final boolean[] deleteMarkers, final boolean[] rawRecords) { + + assert keys != null; + assert values != null; + assert keys.capacity() == values.capacity(); + if (versionTimestamps != null) { + assert versionTimestamps.length == keys.capacity(); + } + if (deleteMarkers != null) { + assert deleteMarkers.length == keys.capacity(); + } + if (rawRecords != null) { + assert rawRecords.length == keys.capacity(); + } + + this.keys = keys; + this.vals = values; + this.versionTimestamps = versionTimestamps; + this.deleteMarkers = deleteMarkers; + this.rawRecords = rawRecords; + + if (versionTimestamps != null) + recalcMinMaxVersionTimestamp(); + + } + + /** + * The capacity of the bucket page (<code>2^addressBits</code>). + */ + final public int capacity() { + + return keys.capacity(); + + } + + /** + * Range check a tuple index. + * + * @param index + * The index of a tuple in [0:#capacity()-1]. + * @return <code>true</code> + * + * @throws IndexOutOfBoundsException + * if the index is not in the legal range. + */ + final protected boolean rangeCheckTupleIndex(final int index) { + + if (index < 0 || index > keys.capacity()) + throw new IndexOutOfBoundsException(); + + return true; + + } + + /** + * No - this is a mutable data record. + */ + final public boolean isReadOnly() { + + return false; + + } + + /** + * No. + */ + final public boolean isCoded() { + + return false; + + } + + final public AbstractFixedByteArrayBuffer data() { + + throw new UnsupportedOperationException(); + + } + + public final long getVersionTimestamp(final int index) { + + if (versionTimestamps == null) + throw new UnsupportedOperationException(); + + assert rangeCheckTupleIndex(index); + + return versionTimestamps[index]; + + } + + final public long getMinimumVersionTimestamp() { + + if (versionTimestamps == null) + throw new UnsupportedOperationException(); + + return minimumVersionTimestamp; + + } + + final public long getMaximumVersionTimestamp() { + + if (versionTimestamps == null) + throw new UnsupportedOperationException(); + + return maximumVersionTimestamp; + + } + + public final boolean getDeleteMarker(final int index) { + + if (deleteMarkers == null) + throw new UnsupportedOperationException(); + + assert rangeCheckTupleIndex(index); + + return deleteMarkers[index]; + + } + + public final long getRawRecord(final int index) { + + if (rawRecords == null) + throw new UnsupportedOperationException(); + + assert rangeCheckTupleIndex(index); + + if (!rawRecords[index]) + return IRawStore.NULL; + + final byte[] val = vals.get(index); + + final long addr = AbstractBTree.decodeRecordAddr(val); + + return addr; + + } + + final public IRaba getValues() { + + return vals; + + } + + final public IRaba getKeys() { + + return keys; + + } + + /** + * Always returns <code>true</code>. + */ + final public boolean isLeaf() { + + return true; + + } + // /** -// * The deletion markers IFF isolation is supported by the {@link HTree}. -// */ -// final boolean[] deleteMarkers; -// -// /** -// * The version timestamps IFF isolation is supported by the {@link HTree}. -// */ -// final long[] versionTimestamps; -// -// /** -// * The minimum version timestamp. -// * -// * @todo these fields add 16 bytes to each {@link MutableBucketData} object -// * even when we do not use them. It would be better to use a subclass -// * or tack them onto the end of the {@link #versionTimestamps} array. -// */ -// long minimumVersionTimestamp; -// long maximumVersionTimestamp; -// -// /** -// * Create an empty data record with internal arrays dimensioned for the -// * specified branching factor. -// * -// * @param branchingFactor -// * The maximum #of entries in the hash bucket before it will -// * overflow or be split. Since the goal is to manage the size -// * of the bucket on the disk and since we do not known the size -// * of the bucket's data record until it is being evicted, this -// * value places an upper bound after which the bucket will be -// * @param hasVersionTimestamps -// * <code>true</code> iff version timestamps will be maintained. -// * @param hasDeleteMarkers -// * <code>true</code> iff delete markers will be maintained. -// */ -// public MutableBucketData(final int branchingFactor, -// final boolean hasVersionTimestamps, final boolean hasDeleteMarkers) { -// -// keys = new MutableKeyBuffer(branchingFactor + 1); -// -// vals = new MutableValueBuffer(branchingFactor + 1); -// -// versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor + 1] -// : null); -// -// // init per API specification. -// minimumVersionTimestamp = Long.MAX_VALUE; -// maximumVersionTimestamp = Long.MIN_VALUE; -// -// deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor + 1] -// : null); -// -// } -// -// /** -// * Copy ctor. -// * -// * @param branchingFactor -// * The branching factor for the owning B+Tree. -// * @param src -// * The source leaf. -// */ -// public MutableBucketData(final int branchingFactor, final ILeafData src) { -// -// keys = new MutableKeyBuffer(branchingFactor + 1, src.getKeys()); -// -// vals = new MutableValueBuffer(branchingFactor + 1, src.getValues()); -// -// versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor + 1] -// : null); -// -// deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor + 1] -// : null); -// -// final int nkeys = keys.size(); -// -// if (versionTimestamps != null) { -// -// for (int i = 0; i < nkeys; i++) { -// -// versionTimestamps[i] = src.getVersionTimestamp(i); -// -// } -// -// minimumVersionTimestamp = src.getMinimumVersionTimestamp(); -// -// maximumVersionTimestamp = src.getMaximumVersionTimestamp(); -// -// } else { -// -// minimumVersionTimestamp = Long.MAX_VALUE; -// -// maximumVersionTimestamp = Long.MIN_VALUE; -// -// -// } -// -// if (deleteMarkers != null) { -// -// for (int i = 0; i < nkeys; i++) { -// -// deleteMarkers[i] = src.getDeleteMarker(i); -// -// } -// -// } -// -// } -// -// /** -// * Ctor based on just "data" -- used by unit tests. -// * -// * @param keys -// * A representation of the defined keys in the leaf. -// * @param values -// * An array containing the values found in the leaf. -// * @param versionTimestamps -// * An array of the version timestamps (iff the version metadata -// * is being maintained). -// * @param deleteMarkers -// * An array of the delete markers (iff the version metadata is -// * being maintained). -// */ -// public MutableBucketData(final MutableKeyBuffer keys, -// final MutableValueBuffer values, final long[] versionTimestamps, -// final boolean[] deleteMarkers) { -// -// assert keys != null; -// assert values != null; -// assert keys.capacity() == values.capacity(); -// if (versionTimestamps != null) { -// assert versionTimestamps.length == keys.capacity(); -// } -// if (deleteMarkers != null) { -// assert deleteMarkers.length == keys.capacity(); -// } -// -// this.keys = keys; -// this.vals = values; -// this.versionTimestamps = versionTimestamps; -// this.deleteMarkers = deleteMarkers; -// -// if (versionTimestamps != null) -// recalcMinMaxVersionTimestamp(); -// -// } -// -// /** -// * Range check a tuple index. -// * -// * @param index -// * The index of a tuple in [0:nkeys]. -// * @return <code>true</code> -// * -// * @throws IndexOutOfBoundsException -// * if the index is not in the legal range. -// */ -// final protected boolean rangeCheckTupleIndex(final int index) { -// -// if (index < 0 || index > getKeys().size()) -// throw new IndexOutOfBoundsException(); -// -// return true; -// -// } -// -// /** -// * No - this is a mutable data record. -// */ -// final public boolean isReadOnly() { -// -// return false; -// -// } -// -// /** -// * No. -// */ -// final public boolean isCoded() { -// -// return false; -// -// } -// -// final public AbstractFixedByteArrayBuffer data() { -// -// throw new UnsupportedOperationException(); -// -// } -// -// public final long getVersionTimestamp(final int index) { -// -// if (versionTimestamps == null) -// throw new UnsupportedOperationException(); -// -// assert rangeCheckTupleIndex(index); -// -// return versionTimestamps[index]; -// -// } -// -// final public long getMinimumVersionTimestamp() { -// -// if (versionTimestamps == null) -// throw new UnsupportedOperationException(); -// -// return minimumVersionTimestamp; -// -// } -// -// final public long getMaximumVersionTimestamp() { -// -// if (versionTimestamps == null) -// throw new UnsupportedOperationException(); -// -// return maximumVersionTimestamp; -// -// } -// -// public final boolean getDeleteMarker(final int index) { -// -// if (deleteMarkers == null) -// throw new UnsupportedOperationException(); -// -// assert rangeCheckTupleIndex(index); -// -// return deleteMarkers[index]; -// -// } -// -// final public IRaba getValues() { -// -// return vals; -// -// } -// -// final public IRaba getKeys() { -// -// return keys; -// -// } -// -// /** -// * Always returns <code>true</code>. -// */ -// final public boolean isLeaf() { -// -// return true; -// -// } -// -// /** // * For a leaf the #of entries is always the #of keys. // */ // final public int getSpannedTupleCount() { @@ -569,86 +431,92 @@ // return getKeys().size(); // // } -// -// final public int getValueCount() { -// -// return vals.size(); -// -// } -// -// final public boolean hasDeleteMarkers() { -// -// return deleteMarkers != null; -// -// } -// -// final public boolean hasVersionTimestamps() { -// -// return versionTimestamps != null; -// -// } -// -// final public int getKeyCount() { -// -// return keys.size(); -// -// } -// -// /** -// * No - this class does not support double-linked leaves (only the -// * {@link IndexSegment} actually uses double-linked leaves). -// */ -// final public boolean isDoubleLinked() { -// -// return false; -// -// } -// -// final public long getNextAddr() { -// -// throw new UnsupportedOperationException(); -// -// } -// -// final public long getPriorAddr() { -// -// throw new UnsupportedOperationException(); -// -// } -// -// /** -// * Recalculate the min/max version timestamp on the leaf. The caller is -// * responsible for propagating the new min/max to the ancestors of the leaf. -// * -// * @throws UnsupportedOperationException -// * if the leaf is not maintaining per-tuple version timestamps. -// */ -// void recalcMinMaxVersionTimestamp() { -// -// // must be maintaining version timestamps. -// if (versionTimestamps == null) -// throw new UnsupportedOperationException(); -// -// final int nkeys = keys.nkeys; -// -// long min = Long.MAX_VALUE; -// long max = Long.MIN_VALUE; -// -// for (int i = 0; i < nkeys; i++) { -// -// final long t = versionTimestamps[i]; -// -// if (t < min) -// min = t; -// -// if (t > max) -// max = t; -// -// } -// -// minimumVersionTimestamp = min; -// maximumVersionTimestamp = max; -// -// } + + final public int getValueCount() { + + return vals.size(); + + } + + final public boolean hasRawRecords() { + + return rawRecords != null; + + } + + final public boolean hasDeleteMarkers() { + + return deleteMarkers != null; + + } + + final public boolean hasVersionTimestamps() { + + return versionTimestamps != null; + + } + final public int getKeyCount() { + + return keys.size(); + + } + + /** + * No - this class does not support double-linked leaves (only the + * {@link IndexSegment} actually uses double-linked leaves). + */ + final public boolean isDoubleLinked() { + + return false; + + } + + final public long getNextAddr() { + + throw new UnsupportedOperationException(); + + } + + final public long getPriorAddr() { + + throw new UnsupportedOperationException(); + + } + + /** + * Recalculate the min/max version timestamp on the leaf. The caller is + * responsible for propagating the new min/max to the ancestors of the leaf. + * + * @throws UnsupportedOperationException + * if the leaf is not maintaining per-tuple version timestamps. + */ + void recalcMinMaxVersionTimestamp() { + + // must be maintaining version timestamps. + if (versionTimestamps == null) + throw new UnsupportedOperationException(); + + final int nkeys = keys.nkeys; + + long min = Long.MAX_VALUE; + long max = Long.MIN_VALUE; + + for (int i = 0; i < nkeys; i++) { + + final long t = versionTimestamps[i]; + + if (t < min) + min = t; + + if (t > max) + max = t; + + } + + minimumVersionTimestamp = min; + maximumVersionTimestamp = max; + + } + } Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableDirectoryPageData.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableDirectoryPageData.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/MutableDirectoryPageData.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -17,6 +17,8 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: MutableNodeData.java 2265 2009-10-26 12:51:06Z thompsonbry $ + * + * @deprecated with the {@link HashTree}. */ public class MutableDirectoryPageData implements IDirectoryData { Modified: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2011-06-28 16:15:49 UTC (rev 4809) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -32,6 +32,8 @@ import com.bigdata.btree.Checkpoint; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.data.ILeafData; +import com.bigdata.htree.HTree; +import com.bigdata.htree.HashTree; /** * Interface for the data record of a hash bucket. The hash bucket extends the @@ -51,70 +53,77 @@ */ public interface IBucketData extends ILeafData { - /** - * {@inheritDoc} - * <p> - * For clarification, this returns the #of entries in the hash bucket (not - * just the number of distinct keys). - */ + /** + * {@inheritDoc} + * <p> + * For clarification, this returns the #of entries in the hash bucket (not + * the number of distinct keys since duplicate keys are permitted in a hash + * bucket). + */ int getKeyCount(); /** * The length (in bits) of the MSB prefix shared by the hash values of the * keys on this page. + * + * @deprecated with {@link HashTree} */ int getLengthMSB(); - /** - * Return the hash value of the key at the given index. - * - * @param index - * The index of the key. - * - * @return The hash value of that key. - * - * @deprecated This is just the int32 key for that tuple. It should be - * generalized as a byte[]. This will require us to generalize - * the handle of the LSB and MSB bits as well, all of which must - * be efficient and compact. For a {@link HTree}, we need to - * declare the bits length the key. That might be part of the - * {@link Checkpoint} or the {@link HTree}'s - * {@link IndexMetadata}. {@link HTree} should be usable with - * int32 hash codes, int64, and {@link MessageDigest} based hash - * codes, which are 20+ bytes as well as with a fixed prefix of - * a {@link MessageDigest} for shorter keys which still have - * strongly random distributions, plus order-preserving hash - * codes. - */ + /** + * Return the hash value of the key at the given index. + * + * @param index + * The index of the key. + * + * @return The hash value of that key. + * + * @deprecated with {@link HashTree}. + * <p> + * This is just the int32 key for that tuple. It should be + * generalized as a byte[]. This will require us to generalize + * the handle of the LSB and MSB bits as well, all of which must + * be efficient and compact. For a {@link HTree}, we need to + * declare the bits length the key. That might be part of the + * {@link Checkpoint} or the {@link HTree}'s + * {@link IndexMetadata}. {@link HTree} should be usable with + * int32 hash codes, int64, and {@link MessageDigest} based hash + * codes, which are 20+ bytes as well as with a fixed prefix of + * a {@link MessageDigest} for shorter keys which still have + * strongly random distributions, plus order-preserving hash + * codes. + */ int getHash(int index); - /** - * Return an {@link Iterator} which visits the index of each entry in the - * hash bucket having the given hash code. - * - * @param h - * The hash code. - * - * @todo Note: There is a design tradeoff between autoboxing of the - * <code>int</code> index and allowing the {@link IBucketData} class - * to encapsulate the iterator pattern together with any setup which - * can be done once per scan for a given hash code. For example, using - * a BitInputStream. The iterator allows us to amortize the cost of - * that setup, but we pay for the autoboxing of the index values. - * However, autobox primitives tend to be quite cheap as they are - * rapidly reclaimed by GC. - * <p> - * It is possible to implement an extension interface which returns - * the [int] index without autoboxing. If this method signature is - * modified to return that interface then the implementation can avoid - * autoboxing. - * - * @deprecated Since hash codes will be unsigned byte[] keys, this will have - * to be visit a read-only byte[] slice or a copy of the byte[] - * key. We already have methods for this. Those methods (and the - * leaf coder) might be more compact, e.g., for int32 keys, - * since we code a (short) fixed length key in more simply. - */ + /** + * Return an {@link Iterator} which visits the index of each entry in the + * hash bucket having the given hash code. + * + * @param h + * The hash code. + * + * @todo Note: There is a design tradeoff between autoboxing of the + * <code>int</code> index and allowing the {@link IBucketData} class + * to encapsulate the iterator pattern together with any setup which + * can be done once per scan for a given hash code. For example, using + * a BitInputStream. The iterator allows us to amortize the cost of + * that setup, but we pay for the autoboxing of the index values. + * However, autobox primitives tend to be quite cheap as they are + * rapidly reclaimed by GC. + * <p> + * It is possible to implement an extension interface which returns + * the [int] index without autoboxing. If this method signature is + * modified to return that interface then the implementation can avoid + * autoboxing. + * + * @deprecated With {@link HashTree}. + * <p> + * Since hash codes will be unsigned byte[] keys, this will have + * to be visit a read-only byte[] slice or a copy of the byte[] + * key. We already have methods for this. Those methods (and the + * leaf coder) might be more compact, e.g., for int32 keys, + * since we code a (short) fixed length key in more simply. + */ Iterator<Integer> hashIterator(int h); // /** Added: branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/raba/MutableKeyBuffer.java =================================================================== --- branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/raba/MutableKeyBuffer.java (rev 0) +++ branches/TERMS_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/htree/raba/MutableKeyBuffer.java 2011-06-28 16:31:38 UTC (rev 4810) @@ -0,0 +1,399 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree.raba; + +import java.io.DataInput; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Iterator; + +import com.bigdata.btree.raba.AbstractRaba; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.HTree; + +/** + * A flyweight mutable implementation for an {@link HTree} bucket page using a + * backing <code>byte[][]</code>. Unlike the keys in a B+Tree, the {@link HTree} + * keys are NOT ordered and need not be dense. Further, each bucket page is + * logically divided into a set of buddy hash buckets. All operations therefore + * take place within a buddy bucket. The buddy bucket is identified by its + * offset and its extent is identified by the global depth of the bucket page. + * <p> + * While the total #of non-null keys is reported by {@link #size()}, this is the + * value for the bucket page as a whole. The {@link HTree} must explicitly + * examine a buddy hash bucket and count the non-<code>null</code> keys in order + * to know the "size" of a given buddy hash buck... [truncated message content] |