[Bigdata-commit] SF.net SVN: bigdata:[3990] branches/QUADS_QUERY_BRANCH/bigdata/src/test/com /bigda

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3990
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3990&view=rev
Author:   thompsonbry
Date:     2010-12-01 21:43:35 +0000 (Wed, 01 Dec 2010)

Log Message:
-----------
Checkpoint for the extendible hash index.  This begins to migrate the code to a persistence capable index using an approach similar to the B+Tree to manage the directory page and buckets (data pages).

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashFunction.java

Removed Paths:
-------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java	2010-12-01 21:43:35 UTC (rev 3990)
@@ -0,0 +1,100 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htbl;
+
+import java.lang.ref.Reference;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.PO;
+
+/**
+ * Abstract class for both directory and data pages for a hash index.
+ */
+abstract public class AbstractHashPage <T extends AbstractHashPage
+/*
+ * DO-NOT-USE-GENERIC-HERE. The compiler will fail under Linux (JDK 1.6.0_14,
+ * _16).
+ */
+> extends PO //implements IAbstractNode, IAbstractNodeData 
+{
+	
+	private final static transient Logger log = Logger
+			.getLogger(AbstractHashPage.class);
+	
+	/**
+	 * Transient back reference to the index to which this directory belongs.
+	 */
+	protected transient ExtensibleHashMap htbl;
+
+	/**
+	 * <p>
+	 * A {@link Reference} to this page. This is created when the page is
+	 * created and effectively provides a canonical {@link Reference} object for
+	 * any given page.
+	 * </p>
+	 * 
+	 * @todo Do we need back references for recursive directories?
+	 */
+    transient protected final Reference<? extends AbstractHashPage<T>> self;
+
+    /**
+     * Disallowed.
+     */
+    private AbstractHashPage() {
+
+        throw new UnsupportedOperationException();
+        
+    }
+
+	protected AbstractHashPage(final ExtensibleHashMap htbl, final boolean dirty) {
+
+		if(htbl == null)
+			throw new IllegalArgumentException();
+		
+		this.htbl = htbl;
+
+        // reference to self: reused to link parents and children.
+        this.self = htbl.newRef(this);
+        
+        if (!dirty) {
+
+            /*
+             * Nodes default to being dirty, so we explicitly mark this as
+             * clean. This is ONLY done for the de-serialization constructors.
+             */
+
+            setDirty(false);
+
+        }
+        
+//        @todo Add to the hard reference queue.
+//        btree.touch(this);
+
+	}
+	
+}

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java (from rev 3988, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleExtensibleHashMap.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java	2010-12-01 21:43:35 UTC (rev 3990)
@@ -0,0 +1,849 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Nov 29, 2010
+ */
+package com.bigdata.htbl;
+
+import java.lang.ref.Reference;
+import java.lang.ref.SoftReference;
+import java.lang.ref.WeakReference;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.AbstractBTree;
+import com.bigdata.btree.AbstractNode;
+import com.bigdata.btree.ISimpleBTree;
+import com.bigdata.btree.Node;
+import com.bigdata.io.SerializerUtil;
+import com.bigdata.rawstore.Bytes;
+import com.bigdata.rawstore.IRawStore;
+import com.bigdata.rawstore.SimpleMemoryRawStore;
+
+import cutthecrap.utils.striterators.Expander;
+import cutthecrap.utils.striterators.IStriterator;
+import cutthecrap.utils.striterators.Striterator;
+
+/**
+ * <p>
+ * An implementation of an extensible hash map using a 32 bit hash code and a
+ * fixed length int[] for the bucket. The keys are int32 values. The data stored
+ * in the hash map is just the key. Buckets provide a perfect fit for N keys.
+ * This is used to explore the dynamics of the extensible hashing algorithm
+ * using some well known examples.
+ * </p>
+ * <h2>Extensible Hashing</h2>
+ * <p>
+ * Extensible (or extendable) hashing uses a directory and pages (or buckets) to
+ * store the data. Directories make it possible to share pages, which can
+ * improve the storage efficiency, but directory schemes can suffer from
+ * exponential growth when the hash values are not uniformly distributed (a
+ * variety of schemes may be used to compensate for that growth, including
+ * overflow pages, multi-level directories, better hash functions, etc).
+ * Extensible hashing uses a global depth, <i>d</i>, which corresponds to the
+ * <i>d</i>-bits of the hash value used to select the directory entry. Each
+ * bucket has a local depth which is used to track the #of directory entries
+ * which target the same bucket. When an insert would fail due to insufficient
+ * space in the target bucket, the bucket must either be split, increased in
+ * size, or linked with an overflow page. If the directory has only a single
+ * reference to a given page, then the directory must be doubled when the page
+ * is split. Overflow pages can be used to defer doubling the size of the
+ * directory (and are required when all hash bits are the same, such as when the
+ * keys are identical), but overflow pages can degrade point test performance
+ * since a miss on the primary bucket will cause a read through to the page(s)
+ * of the overflow chain. Increasing the size of the bucket can also be used to
+ * defer the doubling of the address space (or handle duplicate keys) and can
+ * translate into better IO efficiency. Extensible hash trees use recursive
+ * directory structures. Recursive directories essentially allow a hash table to
+ * be imposed on the overflow pages which would otherwise be associated with the
+ * primary page and thereby increase the efficiency of access to hash values
+ * with skewed distributions. Order preserving hash functions may be used to
+ * create hash indices which cluster keys (which can improve efficiency at all
+ * levels of the memory hierarchy) and support range scans. Order preserving
+ * multi-dimensional extensible hashing is a generalization of extensible
+ * hashing suitable for spatial data and clustered aggregations. The design of
+ * the directory (and handling of split/overflow decision) for both single and
+ * multi-dimensional extensible hashing can have an enormous impact on the size
+ * of the directory and the storage and retrieval efficiency of the index.
+ * </p>
+ * <h2>Implementation Design</h2>
+ * <p>
+ * </p>
+ * <p>
+ * This implementation is not thread-safe. I have not attempted to provide for
+ * visibility guarantees when resizing the map and I have not attempted to
+ * provide for concurrent updates. The implementation exists solely to explore
+ * the extensible hashing algorithm.
+ * </p>
+ * 
+ * @todo Name htable implies single level directory where htree implies a
+ *       multi-level directory. Figure out the directory scheme and lock in the
+ *       name for the class.
+ * 
+ * @todo Backfill in the notes on the implementation design as it gets locked
+ *       in.
+ * 
+ * @todo We can not directly implement {@link Map} unless the hash table is
+ *       configured to NOT permit duplicates.
+ * 
+ * @todo Integrate a ring buffer for retention of frequently accessed pages per
+ *       the weak reference policy observed by the BTree with touch()
+ */
+public class ExtensibleHashMap 
+implements ISimpleBTree // @todo rename ISimpleBTree interface
+// @todo implement IAutoBoxBTree and rename IAutoBoxBTree interface.
+// @todo implements IRangeQuery (iff using order preserving hashing)
+, HashFunction
+{
+
+	private final transient static Logger log = Logger
+			.getLogger(ExtensibleHashMap.class);
+
+//	/**
+//	 * The buckets. The first bucket is pre-allocated when the address table is
+//	 * setup and all addresses in the table are initialized to point to that
+//	 * bucket. Thereafter, buckets are allocated when a bucket is split.
+//	 * 
+//	 * @todo This needs to become an {@link IRawStore} reference, but first we
+//	 *       need to provide {@link Reference}s from the directory to the
+//	 *       buckets and then we can allow for persistence of dirty pages.
+//	 */
+//	protected final ArrayList<SimpleBucket> buckets;
+
+	/**
+	 * The backing store.
+	 */
+	private final IRawStore store;
+
+	/**
+	 * The root directory. This is replaced each time copy-on-write triggers a
+	 * cascade of updates.
+	 * <p>
+	 * This hard reference is cleared to <code>null</code> if an index is
+	 * {@link #close() closed}. {@link #getRoot()} automatically uses
+	 * {@link #reopen()} to reload the root so that closed indices may be
+	 * transparently made ready for further use (indices are closed to reduce
+	 * their resource burden, not to make their references invalid). The
+	 * {@link AbstractHashPage} and derived classes <em>assume</em> that the
+	 * root is non-null. This assumption is valid if {@link #close()} is invoked
+	 * by the application in a manner consistent with the single-threaded
+	 * contract for the index.
+	 * <p>
+	 * Note: This field MUST be marked as [volatile] in order to guarantee
+	 * correct semantics for double-checked locking in {@link #reopen()}.
+	 * 
+	 * @see http://en.wikipedia.org/wiki/Double-checked_locking
+	 */
+    protected volatile HashDirectory root;
+
+	/**
+	 * The root directory. The root is replaced each time copy-on-write triggers
+	 * a cascade of updates.
+	 * <p>
+	 * The hard reference to the root node is cleared if the index is
+	 * {@link #close() closed}. This method automatically {@link #reopen()}s the
+	 * index if it is closed, making it available for use.
+	 */
+    final protected HashDirectory getRoot() {
+
+        // make sure that the root is defined.
+        if (root == null)
+            reopen();
+
+        return root;
+
+    }
+	
+    /**
+     * This is part of a {@link #close()}/{@link #reopen()} protocol that may
+     * be used to reduce the resource burden of an {@link AbstractBTree}. The
+     * method delegates to {@link #_reopen()} if double-checked locking
+     * demonstrates that the {@link #root} is <code>null</code> (indicating
+     * that the index has been closed). This method is automatically invoked by
+     * a variety of methods that need to ensure that the index is available for
+     * use.
+     * 
+     * @see #close()
+     * @see #isOpen()
+     * @see #getRoot()
+     * 
+     * @todo import the rest of this protocol.
+     */
+    final protected void reopen() {
+
+        if (root == null) {
+
+            /*
+             * reload the root node.
+             * 
+             * Note: This is synchronized to avoid race conditions when
+             * re-opening the index from the backing store.
+             * 
+             * Note: [root] MUST be marked as [volatile] to guarantee correct
+             * semantics.
+             * 
+             * See http://en.wikipedia.org/wiki/Double-checked_locking
+             */
+
+            synchronized(this) {
+            
+                if (root == null) {
+
+                    // invoke with lock on [this].
+                    _reopen();
+                    
+                }
+                
+            }
+
+        }
+
+    }
+
+	/**
+	 * This method is invoked by {@link #reopen()} once {@link #root} has been
+	 * show to be <code>null</code> with double-checked locking. When invoked in
+	 * this context, the caller is guaranteed to hold a lock on <i>this</i>.
+	 * This is done to ensure that at most one thread gets to re-open the index
+	 * from the backing store.
+	 */
+    /*abstract*/ protected void _reopen() {
+    	// @todo ...
+    }
+
+	/*
+	 * Checkpoint metadata.
+	 */
+
+    /**
+     * The height of the index. The height is the #of levels minus one.
+     */
+	final public int getHeight() {
+        
+        return 1;
+        
+    }
+
+    /**
+     * The #of directory pages in the index.
+     */
+    final public int getDirectoryCount() {
+        
+        return 1;
+        
+    }
+
+    /**
+     * The #of buckets in the index.
+     */
+    final public int getBucketCount() {
+        
+        return nbuckets;
+//    	return buckets.size();
+        
+    }
+    protected int nbuckets; // @todo init from checkpoint
+
+	/**
+	 * The #of entries (aka tuples) in the index. When the index supports delete
+	 * markers, this value also includes tuples which have been marked as
+	 * deleted but not yet purged from the index.
+	 */
+    final public int getEntryCount() {
+        
+        return nentries;
+        
+    }
+    protected int nentries; // @todo init from checkpoint
+
+    /**
+     * The backing store.
+     */
+    final public IRawStore getStore() {
+
+        return store;
+
+    }
+	
+	/*
+	 * Static utility methods and data.
+	 */
+
+    /**
+	 * An array of mask values. The index in the array is the #of bits of the
+	 * hash code to be considered. The value at that index in the array is the
+	 * mask to be applied to mask off to zero the high bits of the hash code
+	 * which are to be ignored.
+	 */
+	static private final int[] masks;
+	static {
+
+		masks = new int[32];
+		
+		// Populate the array of masking values.
+		for (int i = 0; i < 32; i++) {
+
+			masks[i] = getMaskBits(i);
+
+		}
+	}
+
+	/**
+	 * Return a bit mask which reveals only the low N bits of an int32 value.
+	 * 
+	 * @param nbits
+	 *            The #of bits to be revealed.
+	 * @return The mask.
+	 */
+	static int getMaskBits(final int nbits) {
+
+		if (nbits < 0 || nbits > 32)
+			throw new IllegalArgumentException();
+
+		int mask = 0;
+		int bit;
+
+		for (int i = 0; i < nbits; i++) {
+
+			bit = (1 << i);
+
+			mask |= bit;
+
+		}
+
+		// System.err.println(nbits +" : "+Integer.toBinaryString(mask));
+
+		return mask;
+
+	}
+
+	/**
+	 * Find the first power of two which is GTE the given value. This is used to
+	 * compute the size of the address space (in bits) which is required to
+	 * address a hash table with that many buckets.
+	 */
+	static int getMapSize(final int initialCapacity) {
+
+		if (initialCapacity <= 0)
+			throw new IllegalArgumentException();
+
+		int i = 1;
+
+		while ((1 << i) < initialCapacity)
+			i++;
+
+		return i;
+
+	}
+
+	/**
+	 * Mask off all but the lower <i>nbits</i> of the hash value.
+	 * 
+	 * @param h
+	 *            The hash value.
+	 * @param nbits
+	 *            The #of bits to consider.
+	 * 
+	 * @return The hash value considering only the lower <i>nbits</i>.
+	 */
+	static protected int maskOff(final int h, final int nbits) {
+
+		if (nbits < 0 || nbits > 32)
+			throw new IllegalArgumentException();
+
+		final int v = h & masks[nbits];
+		
+		return v;
+
+	}
+
+	/**
+	 * Return <code>2^n</code>.
+	 * 
+	 * @param n
+	 *            The exponent.
+	 *            
+	 * @return The result.
+	 */
+	static protected int pow2(final int n) {
+		
+//		return (int) Math.pow(2d, n);
+		return 1 << n;
+		
+	}
+	
+	/**
+	 * Return the #of entries in the address map for a page having the given
+	 * local depth. This is <code>2^(globalHashBits - localHashBits)</code>. The
+	 * following table shows the relationship between the global hash bits (gb),
+	 * the local hash bits (lb) for a page, and the #of directory entries for
+	 * that page (nentries).
+	 * 
+	 * <pre>
+	 * gb  lb   nentries
+	 * 1	0	2
+	 * 1	1	1
+	 * 2	0	4
+	 * 2	1	2
+	 * 2	2	1
+	 * 3	0	8
+	 * 3	1	4
+	 * 3	2	2
+	 * 3	3	1
+	 * 4	0	16
+	 * 4	1	8
+	 * 4	2	4
+	 * 4	3	2
+	 * 4	4	1
+	 * </pre>
+	 * 
+	 * @param localHashBits
+	 *            The local depth of the page in [0:{@link #globalHashBits}].
+	 * 
+	 * @return The #of directory entries for that page.
+	 * 
+	 * @throws IllegalArgumentException
+	 *             if either argument is less than ZERO (0).
+	 * @throws IllegalArgumentException
+	 *             if <i>localHashBits</i> is greater than
+	 *             <i>globalHashBits</i>.
+	 */
+	static protected int getSlotsForPage(final int globalHashBits,
+			final int localHashBits) {
+
+		if(localHashBits < 0)
+			throw new IllegalArgumentException();
+
+		if(globalHashBits < 0)
+			throw new IllegalArgumentException();
+
+		if(localHashBits > globalHashBits)
+			throw new IllegalArgumentException();
+
+		// The #of address map entries for this page.
+		final int numSlotsForPage = pow2(globalHashBits - localHashBits);
+
+		return numSlotsForPage;
+
+	}
+
+	/**
+	 * Human friendly representation.
+	 */
+	public String toString() {
+
+		final StringBuilder sb = new StringBuilder();
+		
+		sb.append(getClass().getName());
+
+		sb.append("{globalHashBits=" + getGlobalHashBits());
+
+		sb.append(",addrSpaceSize=" + getAddressSpaceSize());
+
+		sb.append(",entryCount=" + getEntryCount());
+
+		sb.append(",bucketCount=" + getBucketCount());
+
+		// @todo checkpoint record.
+		
+		// @todo index metadata record.
+		
+		sb.append("}");
+		
+		return sb.toString();
+
+	}
+
+	/**
+	 * Dump a representation of the hash index.
+	 */
+	public String dump() {
+		
+		final StringBuilder sb = new StringBuilder();
+		
+		// basic information.
+		sb.append(toString());
+		
+		// Dump the data in the index.
+		getRoot().dump(sb);
+		
+		return sb.toString();
+
+	}
+
+	/**
+	 * 
+	 * @param initialCapacity
+	 *            The initial capacity is the #of buckets which may be stored in
+	 *            the hash table before it must be resized. It is expressed in
+	 *            buckets and not tuples because there is not (in general) a
+	 *            fixed relationship between the size of a bucket and the #of
+	 *            tuples which can be stored in that bucket. This will be
+	 *            rounded up to the nearest power of two.
+	 * @param bucketSize
+	 *            The #of int tuples which may be stored in a bucket.
+	 * 
+	 * @todo Configuration options:
+	 *       <p>
+	 *       Split, Grow, or Overflow decision function (this subsumes the
+	 *       bucketSize parameter since a decision function could consider only
+	 *       the #of keys on the page).
+	 *       <p>
+	 *       Initial and maximum directory size (the latter only for hash tables
+	 *       rather than hash trees). [It is perfectly Ok to set the #of global
+	 *       bits initially to the #of distinctions which can be persisted in a
+	 *       directory page.]
+	 *       <p>
+	 *       Hash function. Order preserving hashing requires more than just an
+	 *       appropriate hashing function. The directory may need to be managed
+	 *       differently and pages may need to be chained. Also, hash functions
+	 *       with more than 32-bits will require other changes (hash:int to
+	 *       hash:Object, some tables must be larger, etc.)
+	 *       <p>
+	 *       Decision function to inline the key/value of the tuple or to write
+	 *       them as a raw record and link to that record using its in-store
+	 *       address.
+	 *       <p>
+	 *       Directory and page encoders.
+	 *       <p>
+	 *       IRaba implementations for the keys and values (including an option
+	 *       for a pure append binary representation with compaction of deleted
+	 *       tuples). [Some hash table schemes depend on page transparency for
+	 *       the dictionary.] 
+	 */
+	public ExtensibleHashMap(final int initialCapacity,
+			final int bucketSize) {
+
+		// @todo pass in the store reference per AbstractBTree.
+		this.store = new SimpleMemoryRawStore();
+
+		root = new HashDirectory(this, initialCapacity,
+				Bytes.kilobyte32 * 4/* maximumCapacity */, bucketSize);
+
+//		/*
+//		 * Now work backwards to determine the size of the address space (in
+//		 * buckets).
+//		 */
+//		final int addressSpaceSize = SimpleExtensibleHashMap.pow2(root
+//				.getGlobalHashBits());
+//
+//		buckets = new ArrayList<SimpleBucket>(addressSpaceSize/* initialCapacity */);
+//
+//		// Note: the local bits of the first bucket is set to ZERO (0).
+//		buckets.add(new SimpleBucket(this, 0/* localHashBits */, bucketSize));
+
+	}
+
+	/**
+	 * @todo Generalize w/ hash function from index metadata (the current
+	 *       implementation assumes that the hash of an int key is that int).
+	 */
+	public int hash(final Object o) {
+		return ((Integer)o).intValue();
+	}
+
+	/**
+	 * @deprecated by {@link #hash(Object)} or maybe hash(byte[]).
+	 */
+	public int hash(final int key) {
+
+		return key;
+
+	}
+
+	/**
+	 * Return the pre-allocated bucket having the given address.
+	 * <p>
+	 * Note: The caller is responsible for ensuring that duplicate instances of
+	 * a given bucket or directory are not loaded from the backing store for the
+	 * same hash index object.
+	 * 
+	 * @param addr
+	 *            The address of the bucket on the backing store.
+	 * 
+	 * @return The bucket.
+	 */
+	protected HashBucket getBucketAtStoreAddr(final long addr) {
+
+		// @todo various timing and counter stuff.
+		
+		return (HashBucket) SerializerUtil.deserialize(store.read(addr));
+
+	}
+
+	/**
+	 * The #of hash bits which are being used by the address table.
+	 */
+	public int getGlobalHashBits() {
+
+		return getRoot().getGlobalHashBits();
+
+	}
+
+	/**
+	 * The size of the address space is <code>2^{@link #globalHashBits}</code>.
+	 */
+	public int getAddressSpaceSize() {
+
+		return pow2(getGlobalHashBits());
+
+	}
+
+	/*
+	 * IAutoBoxBTree
+	 * 
+	 * @todo API Alignment with IAutoBoxBTree
+	 */
+	
+	/**
+	 * Return <code>true</code> iff the hash table contains the key.
+	 * <p>
+	 * Lookup: Compute h(K) and right shift (w/o sign extension) by i bits. Use
+	 * this to index into the bucket address table. The address in the table is
+	 * the bucket address and may be used to directly read the bucket.
+	 * 
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>true</code> iff the key was found.
+	 */
+	public boolean contains(final int key) {
+
+		return getRoot().contains(key);
+
+	}
+
+	/**
+	 * Insert the key into the hash table. Duplicates are allowed.
+	 * <p>
+	 * Insert: Per lookup. On overflow, we need to split the bucket moving the
+	 * existing records (and the new record) into new buckets.
+	 * 
+	 * @see #split(int, int, HashBucket)
+	 * 
+	 * @param key
+	 *            The key.
+	 * 
+	 * @todo rename as append() method. insert() should retain the semantics of
+	 *       replacing the existing tuple for the key.  might rename insert to
+	 *       put().
+	 */
+	public void insert(final int key) {
+
+		getRoot().insert(key);
+
+	}
+
+	/**
+	 * Delete the key from the hash table (in the case of duplicates, a random
+	 * entry having that key is deleted).
+	 * <p>
+	 * Delete: Buckets may be removed no later than when they become empty and
+	 * doing this is a local operation with costs similar to splitting a bucket.
+	 * Likewise, it is clearly possible to coalesce buckets which underflow
+	 * before they become empty by scanning the 2^(i-j) buckets indexed from the
+	 * entries in the bucket address table using i bits from h(K). [I need to
+	 * research handling deletes a little more, including under what conditions
+	 * it is cost effective to reduce the size of the bucket address table
+	 * itself.]
+	 * 
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>true</code> iff a tuple having that key was deleted.
+	 * 
+	 * @todo return the deleted tuple.
+	 * 
+	 * @todo merge buckets when they underflow/become empty? (but note that we
+	 *       do not delete anything from the hash map for a hash join, just
+	 *       insert, insert, insert).
+	 */
+	public boolean delete(final int key) {
+
+		return getRoot().delete(key);
+		
+	}
+
+	/*
+	 * ISimpleBTree
+	 */
+	
+	@Override
+	public boolean contains(byte[] key) {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public byte[] insert(byte[] key, byte[] value) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public byte[] lookup(byte[] key) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public byte[] remove(byte[] key) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	/*
+	 * Core CRUD methods.
+	 * 
+	 * @todo The core B+Tree methods accept and return ITuple objects. The core
+	 * hash index methods should do the same thing.
+	 */
+
+	/**
+	 * Visit the buckets.
+	 * <p>
+	 * Note: This is NOT thread-safe!
+	 */
+	public Iterator<HashBucket> buckets() {
+
+		return getRoot().buckets();
+
+	}
+
+	/**
+	 * Return an iterator which visits all entries in the hash table.
+	 */
+	@SuppressWarnings("unchecked")
+	public Iterator<Integer> getEntries() {
+		final IStriterator sitr = new Striterator(buckets())
+				.addFilter(new Expander() {
+					private static final long serialVersionUID = 1L;
+
+					@Override
+					protected Iterator expand(final Object obj) {
+						return ((HashBucket) obj).getEntries();
+					}
+				});
+		return (Iterator<Integer>) sitr;
+	}
+
+    /**
+     * Create the reference that will be used by a {@link Node} to refer to its
+     * children (nodes or leaves).
+     * 
+     * @param child
+     *            A node.
+     * 
+     * @return A reference to that node.
+     * 
+     * @see AbstractNode#self
+     * @see SoftReference
+     * @see WeakReference
+     */
+    final <T extends AbstractHashPage<T>> Reference<AbstractHashPage<T>> newRef(
+            final AbstractHashPage<T> child) {
+        
+        /*
+         * Note: If the parent refers to its children using soft references the
+         * the presence of the parent will tend to keep the children wired into
+         * memory until the garbage collector is forced to sweep soft references
+         * in order to make room on the heap. Such major garbage collections
+         * tend to make the application "hesitate".
+         * 
+         * @todo it may be that frequently used access paths in the btree should
+         * be converted dynamically from a weak reference to soft reference in
+         * order to bias the garbage collector to leave those paths alone. if we
+         * play this game then we should limit the #of soft references and make
+         * the node choose among its children for those it will hold with a soft
+         * reference so that the notion of frequent access is dynamic and can
+         * change as the access patterns on the index change.
+         */
+
+        if (store == null) {
+
+            /*
+             * Note: Used for transient BTrees.
+             */
+            
+            return new HardReference<AbstractHashPage<T>>(child);
+            
+        } else {
+        
+            return new WeakReference<AbstractHashPage<T>>( child );
+//        return new SoftReference<AbstractNode>( child ); // causes significant GC "hesitations".
+        }
+        
+        
+    }
+
+	/**
+	 * A class that provides hard reference semantics for use with transient
+	 * indices. While the class extends {@link WeakReference}, it internally
+	 * holds a hard reference and thereby prevents the reference from being
+	 * cleared. This approach is necessitated on the one hand by the use of
+	 * {@link Reference} objects for linking directories and buckets, etc. and
+	 * on the other hand by the impossibility of defining your own direct
+	 * subclass of {@link Reference} (a runtime security manager exception will
+	 * result).
+	 * 
+	 * @author <a href="mailto:tho...@us...">Bryan
+	 *         Thompson</a>
+	 *         
+	 * @param <T>
+	 */
+    static class HardReference<T> extends WeakReference<T> {
+        
+        final private T ref;
+        
+        HardReference(final T ref) {
+
+            super(null);
+            
+            this.ref = ref;
+            
+        }
+        
+        /**
+         * Returns the hard reference.
+         */
+        public T get() {
+            
+            return ref;
+            
+        }
+        
+        /**
+         * Overridden as a NOP.
+         */
+        public void clear() {
+
+            // NOP
+            
+        }
+        
+    }
+    
+}

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java	2010-12-01 21:43:35 UTC (rev 3990)
@@ -0,0 +1,468 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htbl;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.nio.ByteBuffer;
+
+import com.bigdata.btree.BTree;
+import com.bigdata.btree.IBloomFilter;
+import com.bigdata.btree.IndexMetadata;
+import com.bigdata.io.SerializerUtil;
+import com.bigdata.rawstore.IRawStore;
+
+/**
+ * A checkpoint record is written each time the hash index is flushed to the
+ * store.
+ * 
+ * @todo Update for hash table, potentially using the same checkpoint record for
+ *       the B+Tree and hash tables.
+ *       <p>
+ *       Note: In order to create a btree use
+ *       {@link BTree#create(IRawStore, IndexMetadata)} to write the initial
+ *       {@link IndexMetadata} record and the initial check point on the store.
+ *       It will then load the {@link BTree} from the {@link Checkpoint} record
+ *       and you can start using the index.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id: Checkpoint.java 3318 2010-07-27 18:36:58Z thompsonbry $
+ */
+public class HTableCheckpoint implements Externalizable {
+
+    // transient and set on write or read-back.
+    transient long addrCheckpoint;
+
+    // persistent and immutable.
+    private long addrMetadata;
+    private long addrRoot;
+    private int height;
+    private int nnodes;
+    private int nleaves;
+    private int nentries;
+    private long counter;
+
+    /** Note: added in {@link #VERSION1} and presumed 0L in earlier versions. */
+    private long addrBloomFilter;
+
+    /**
+     * The address used to read this {@link Checkpoint} record from the
+     * store.
+     * <p>
+     * Note: This is set as a side-effect by {@link #write(IRawStore)}.
+     * 
+     * @throws IllegalStateException
+     *             if the {@link Checkpoint} record has not been written on
+     *             a store.
+     */
+    final public long getCheckpointAddr() {
+        
+        if (addrCheckpoint == 0L) {
+            
+            throw new IllegalStateException();
+            
+        }
+        
+        return addrCheckpoint;
+        
+    }
+
+    /**
+     * Address that can be used to read the {@link IndexMetadata} record for
+     * the index from the store.
+     */
+    final public long getMetadataAddr() {
+        
+        return addrMetadata;
+        
+    }
+    
+    /**
+     * Address of the root node or leaf of the {@link BTree}.
+     * 
+     * @return The address of the root -or- <code>0L</code> iff the btree
+     *         does not have a root.
+     */
+    final public long getRootAddr() {
+        
+        return addrRoot;
+        
+    }
+    
+    /**
+     * Address of the {@link IBloomFilter}.
+     * 
+     * @return The address of the bloom filter -or- <code>0L</code> iff the
+     *         btree does not have a bloom filter.
+     */
+    final public long getBloomFilterAddr() {
+        
+        return addrBloomFilter;
+        
+    }
+    
+    /**
+     * The height of the tree - ZERO(0) means just a root leaf. Values
+     * greater than zero give the #of levels of abstract nodes. There is
+     * always one layer of leaves which is not included in this value.
+     */
+    public final int getHeight() {
+
+        return height;
+        
+    }
+
+    /**
+     * The #of non-leaf nodes.
+     */
+    public final int getNodeCount() {
+        
+        return nnodes;
+        
+    }
+
+    /**
+     * The #of leaves.
+     */
+    public final int getLeafCount() {
+        
+        return nleaves;
+        
+    }
+
+    /**
+     * The #of index entries.
+     */
+    public final int getEntryCount() {
+        
+        return nentries;
+        
+    }
+
+    /**
+     * Return the value of the counter stored in the {@link Checkpoint}
+     * record.
+     */
+    public final long getCounter() {
+        
+        return counter;
+        
+    }
+
+    /**
+     * A human readable representation of the state of the {@link Checkpoint}
+     * record.
+     */
+    public final String toString() {
+
+        return "Checkpoint" + //
+                "{height=" + height + //
+                ",nnodes=" + nnodes + //
+                ",nleaves=" + nleaves + //
+                ",nentries=" + nentries + //
+                ",counter=" + counter + //
+                ",addrRoot=" + addrRoot + //
+                ",addrMetadata=" + addrMetadata + //
+                ",addrBloomFilter=" + addrBloomFilter + //
+                ",addrCheckpoint=" + addrCheckpoint + //
+                "}";
+        
+    }
+    
+    /**
+     * De-serialization ctor.
+     */
+    public HTableCheckpoint() {
+        
+    }
+    
+    /**
+     * Create the first checkpoint record for a new {@link BTree} from a
+     * {@link IndexMetadata} record. The root of the {@link BTree} will NOT
+     * exist (its address will be <code>0L</code>). Once written on the store
+     * the {@link Checkpoint} record may be used to obtain a corresponding
+     * instance of a {@link BTree} object.
+     * 
+     * @param metadata
+     *            The index metadata record.
+     */
+    public HTableCheckpoint(final IndexMetadata metadata ) {
+
+        this( //
+                metadata.getMetadataAddr(), //
+                0L,// No root yet.
+                0L,// No bloom filter yet.
+                0, // height 
+                0, // nnodes
+                0, // nleaves
+                0, // nentries
+                0L // counter
+        );
+        
+    }
+
+    /**
+     * Create the first checkpoint record for an existing {@link BTree} when it
+     * is propagated on overflow onto a new backing {@link IRawStore}. The
+     * {@link #counter} is propagated to the new {@link Checkpoint} but
+     * otherwise the initialization is as if for an empty {@link BTree}.
+     * 
+     * @param metadata
+     *            The index metadata record.
+     * @param oldCheckpoint
+     *            The last {@link Checkpoint} for the index on the old backing
+     *            store. The {@link Checkpoint#counter} is propagated to the new
+     *            {@link Checkpoint} record.
+     */
+    public HTableCheckpoint(final IndexMetadata metadata, final HTableCheckpoint oldCheckpoint ) {
+
+        this( //
+                metadata.getMetadataAddr(), //
+                0L,// No root yet.
+                0L,// No bloom filter yet.
+                0, // height 
+                0, // nnodes
+                0, // nleaves
+                0, // nentries
+                oldCheckpoint.counter
+        );
+        
+    }
+
+//    /**
+//     * Creates a {@link Checkpoint} record from a {@link BTree}.
+//     * <p>
+//     * Pre-conditions:
+//     * <ul>
+//     * <li>The root is clean.</li>
+//     * <li>The metadata record is clean.</li>
+//     * <li>The optional bloom filter is clean if it is defined.</li>
+//     * </ul>
+//     * Note: if the root is <code>null</code> then the root is assumed to be
+//     * clean and the root address from the last {@link Checkpoint} record is
+//     * used. Otherwise the address of the root is used (in which case it MUST be
+//     * defined).
+//     * 
+//     * @param btree
+//     *            The btree.
+//     */
+//    public HTableCheckpoint(final BTree btree) {
+//        
+//        this(btree.metadata.getMetadataAddr(),//
+//                /*
+//                 * root node or leaf.
+//                 * 
+//                 * Note: if the [root] reference is not defined then we use the
+//                 * address in the last checkpoint record. if that is 0L then
+//                 * there is no root and a new root leaf will be created on
+//                 * demand.
+//                 */
+//                (btree.root == null ? btree.getCheckpoint().getRootAddr()
+//                        : btree.root.getIdentity()),//
+//                /*
+//                 * optional bloom filter.
+//                 * 
+//                 * Note: if the [bloomFilter] reference is not defined then we
+//                 * use the address in the last checkpoint record. if that is 0L
+//                 * then there is no bloom filter. If the [bloomFilter] reference
+//                 * is defined but the bloom filter has been disabled, then we
+//                 * also write a 0L so that the bloom filter is no longer
+//                 * reachable from the new checkpoint.
+//                 */
+//                (btree.bloomFilter == null ? btree.getCheckpoint()
+//                        .getBloomFilterAddr()
+//                        : btree.bloomFilter.isEnabled() ? btree.bloomFilter
+//                                .getAddr() : 0L),//
+//                btree.height,//
+//                btree.nnodes,//
+//                btree.nleaves,//
+//                btree.nentries,//
+//                btree.counter.get()//
+//                );
+//           
+//    }
+
+    private HTableCheckpoint(final long addrMetadata, final long addrRoot,
+            final long addrBloomFilter, final int height, final int nnodes,
+            final int nleaves, final int nentries, final long counter) {
+
+        /*
+         * Note: The constraint on [addrMetadata] is relaxed in order to permit
+         * a transient BTree (no backing store).
+         */
+//        assert addrMetadata != 0L;
+        // MUST be valid addr.
+        this.addrMetadata = addrMetadata;
+
+        // MAY be 0L (tree initially has no root)
+        this.addrRoot = addrRoot;
+
+        /*
+         * MAY be 0L (bloom filter is optional and an new bloom filter is clear,
+         * so it will not be written out until something is written on the
+         * index).
+         */
+        this.addrBloomFilter = addrBloomFilter;
+        
+        this.height = height;
+
+        this.nnodes = nnodes;
+
+        this.nleaves = nleaves;
+
+        this.nentries = nentries;
+
+        this.counter = counter;
+        
+    }
+
+    /**
+     * Initial serialization version.
+     * <p>
+     * Note: The fields of the {@link Checkpoint} record use fixed length
+     * representations in order to support the possibility that we might do an
+     * in place update of a {@link Checkpoint} record as part of a data
+     * migration strategy. For the same reason, the {@link Checkpoint} record
+     * includes some unused fields. Those fields are available for future
+     * version changes without requiring us to change the length of the
+     * {@link Checkpoint} record.
+     */
+    private static transient final int VERSION0 = 0x0;
+    
+    /**
+     * The current version.
+     */
+    private static transient final int VERSION = VERSION0;
+
+    /**
+     * Write the {@link Checkpoint} record on the store, setting
+     * {@link #addrCheckpoint} as a side effect.
+     * 
+     * @param store
+     * 
+     * @throws IllegalStateException
+     *             if the {@link Checkpoint} record has already been
+     *             written.
+     */
+    final public void write(final IRawStore store) {
+
+        if (addrCheckpoint != 0L) {
+            
+            throw new IllegalStateException();
+            
+        }
+        
+        final byte[] data = SerializerUtil.serialize(this);
+
+        addrCheckpoint = store.write(ByteBuffer.wrap(data));
+        
+    }
+
+    /**
+     * Read a {@link Checkpoint} record from a store.
+     * 
+     * @param store
+     *            The store.
+     * @param addrCheckpoint
+     *            The address from which to read the {@link Checkpoint}
+     *            record. This address is set on the {@link Checkpoint}
+     *            record as a side-effect.
+     */
+    public static HTableCheckpoint load(final IRawStore store, final long addrCheckpoint) {
+
+        if (store == null)
+            throw new IllegalArgumentException();
+        
+        final ByteBuffer buf = store.read(addrCheckpoint);
+
+        final HTableCheckpoint checkpoint = (HTableCheckpoint) SerializerUtil.deserialize(buf);
+        
+        checkpoint.addrCheckpoint = addrCheckpoint;
+        
+        return checkpoint;
+        
+    }
+
+    public void readExternal(final ObjectInput in) throws IOException, ClassNotFoundException {
+        
+        final int version = in.readInt();
+
+        if (version != VERSION0)
+            throw new IOException("Unknown version: " + version);
+
+        this.addrMetadata = in.readLong();
+
+        this.addrRoot = in.readLong();
+
+        this.addrBloomFilter = in.readLong();
+        
+        this.height = in.readInt();
+
+        this.nnodes = in.readInt();
+
+        this.nleaves = in.readInt();
+
+        this.nentries = in.readInt();
+
+        this.counter = in.readLong();
+
+        in.readLong(); // unused.
+        
+        in.readLong(); // unused.
+        
+    }
+
+    public void writeExternal(final ObjectOutput out) throws IOException {
+
+        out.writeInt(VERSION);
+
+        out.writeLong(addrMetadata);
+
+        out.writeLong(addrRoot);
+        
+        out.writeLong(addrBloomFilter);
+
+        out.writeInt(height);
+
+        out.writeInt(nnodes);
+
+        out.writeInt(nleaves);
+
+        out.writeInt(nentries);
+
+        out.writeLong(counter);
+
+        out.writeLong(0L/*unused*/);
+        
+        out.writeLong(0L/*unused*/);
+        
+    }
+
+}
+

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java	2010-12-01 21:43:35 UTC (rev 3990)
@@ -0,0 +1,102 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htbl;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.UUID;
+
+/**
+ * Configuration options.
+ * 
+ * @todo Reconcile with IndexMetadata.
+ */
+public class HTableMetadata implements Externalizable {
+
+	/**
+	 * The unique identifier for the index.
+	 */
+	private UUID uuid;
+
+	/**
+	 * Function used to generate hash values from keys.
+	 */
+	private HashFunction hashFunction;
+
+	/**
+	 * Function decides whether to split a page, link an overflow page, or
+	 * expand the size of a page.
+	 */
+	// private SplitFunction splitFunction;
+
+	/**
+	 * De-serialization constructor.
+	 */
+	public HTableMetadata() {
+
+	}
+
+	/**
+	 * Anonymous hash index.
+	 * 
+	 * @param uuid
+	 *            The unique index identifier.
+	 */
+	public HTableMetadata(final UUID uuid) {
+
+		this(null/* name */, uuid);
+
+	}
+
+	/**
+	 * Named hash index
+	 * 
+	 * @param name
+	 *            The index name.
+	 * @param uuid
+	 *            The unique index identifier.
+	 */
+	public HTableMetadata(final String name, final UUID uuid) {
+
+	}
+
+	@Override
+	public void readExternal(ObjectInput in) throws IOException,
+			ClassNotFoundException {
+		// TODO Auto-generated method stub
+		throw new UnsupportedOperationException();
+	}
+
+	@Override
+	public void writeExternal(ObjectOutput out) throws IOException {
+		// TODO Auto-generated method stub
+		throw new UnsupportedOperationException();
+	}
+
+}

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java (from rev 3988, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/SimpleBucket.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java	2010-12-01 21:43:35 UTC (rev 3990)
@@ -0,0 +1,518 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Nov 29, 2010
+ */
+package com.bigdata.htbl;
+
+import java.util.Iterator;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.data.ILeafData;
+
+/**
+ * A (very) simple hash bucket. The bucket holds N int32 keys.
+ * 
+ * @todo Share the {@link ILeafData} interface? Or define a common base
+ *       interface for hash buckets and b+tree leaves and then specialize it for
+ *       the different kinds of indices?
+ */
+public class HashBucket extends AbstractHashPage<HashBucket> {
+
+	private final transient static Logger log = Logger
+			.getLogger(HashBucket.class);
+	
+	/**
+	 * The #of hash code bits which are in use by this {@link HashBucket}.
+	 * <p>
+	 * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries
+	 * which address a given page. Initially, globalBits := 1 and localBits :=
+	 * 0. For these values, we have <code>2^(1-0) == 2</code> references to the
+	 * initial page of the hash table.
+	 * 
+	 * @todo If we need to examine this when we change the size of the address
+	 *       space then it makes more sense to have this as local metadata in
+	 *       the address table than as local data in the bucket (the latter
+	 *       would require us to visit each bucket when expanding the address
+	 *       space). This only needs to be 4 bits to express values in [0:31].
+	 * 
+	 * @todo When overflow buckets are chained together, does each bucket have
+	 *       {@link #localHashBits}? If they do, then we need to make sure that
+	 *       all buckets in the chain are updated. If {@link #localHashBits} is
+	 *       only marked on the first bucket in the chain then we need to
+	 *       correctly ignore it on overflow buckets.
+	 * 
+	 * @todo adjusting this dirties the bucket (unless the #of local bits its
+	 *       stored in the address table entry, but that increases the in-memory
+	 *       burden of the address table).
+	 */
+	int localHashBits;
+
+	/**
+	 * The #of keys stored in the bucket. The keys are stored in a dense array.
+	 * For a given {@link #size}, the only indices of the array which have any
+	 * data are [0:{@link #size}-1].
+	 */
+	int size;
+
+	/**
+	 * The user data for the bucket.
+	 * 
+	 * @todo IRaba keys plus IRaba vals, but the encoded representation must
+	 *       support out of line keys/values. That means that the IRaba will
+	 *       have to have access to the store or ITuple will have to have
+	 *       indirection support.
+	 */
+	final int[] data;
+
+	/**
+	 * Human friendly representation.
+	 */
+	public String toString() {
+		final StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append("{localHashBits=" + localHashBits);
+		sb.append(",size=" + size);
+		sb.append(",values={");
+		for (int i = 0; i < size; i++) {
+			if (i > 0)
+				sb.append(',');
+			sb.append(Integer.toString(data[i]));
+		}
+		sb.append("}}");
+		return sb.toString();
+	}
+
+	/**
+	 * Create a new mutable bucket.
+	 * 
+	 * @param htbl
+	 * @param localHashBits
+	 * @param bucketSize
+	 */
+	public HashBucket(final ExtensibleHashMap htbl,
+			final int localHashBits, final int bucketSize) {
+
+		super(htbl, true/* dirty */);
+
+		if (localHashBits < 0 || localHashBits > 32)
+			throw new IllegalArgumentException();
+
+		if (bucketSize <= 0)
+			throw new IllegalArgumentException();
+
+		this.localHashBits = localHashBits;
+
+		this.data = new int[bucketSize];
+
+		// one more bucket.
+		htbl.nbuckets++;
+		
+	}
+
+	/**
+	 * Return <code>true</code> if the bucket contains the key.
+	 * 
+	 * @param h
+	 *            The hash code of the key.
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>true</code> if the key was found in the bucket.
+	 * 
+	 * @todo passing in the hash code here makes sense when the bucket stores
+	 *       the hash values, e.g., if we always do that or if we have an out of
+	 *       bucket reference to a raw record because the tuple did not fit in
+	 *       the bucket.
+	 */
+	public boolean contains(final int h, final int key) {
+
+		for (int i = 0; i < size; i++) {
+
+			if (data[i] == key)
+				return true;
+
+		}
+
+		return false;
+
+	}
+
+	/**
+	 * Type safe enumeration reports on the various outcomes when attempting to
+	 * insert a tuple into a page.
+	 * 
+	 * @todo The problem with this enumeration (or with using a return code per
+	 *       the following javadoc) is that page splits are going to be deferred
+	 *       until the page is evicted unless there is an aspect of the split
+	 *       function which decides based on the #of tuples on the page. If a
+	 *       the split function reports that the page is over capacity when it
+	 *       is evicted, then we need to decide whether to split the page, chain
+	 *       an overflow page, or use a larger capacity page.
+	 *       <p>
+	 *       What we should do is scan the page if an insert would fail (or if
+	 *       the serialization of the page would fail) and determine what local
+	 *       depth we would need to successfully split the page (e.g., no more
+	 *       than 70% of the items would be in any prefix at a given depth).
+	 *       That can be used to guide the decision to use overflow pages or
+	 *       expand the directory.
+	 *       <p>
+	 *       What are some fast techniques for counting the #of bits which we
+	 *       need to make the necessary distinctions in the bucket? Should we
+	 *       build a trie over the hash codes?
+	 */
+	private static enum InsertEnum {
+		/**
+		 * The tuple was inserted successfully into this page.
+		 * 
+		 * @todo This could be reported as ZERO (0), which is an indication that
+		 *       NO expansions where required to insert the tuple into the page.
+		 */
+		OK,
+		/**
+		 * The insert failed because the page is full. Further, the tuple has
+		 * the same key value as all other tuples on the page. Therefore, either
+		 * the insert must be directed into an overflow page or the page size
+		 * must be allowed to increase.
+		 * 
+		 * @todo This could be reported as {@link Integer#MAX_VALUE}, which is
+		 *       an indication that infinite expansions will not make it
+		 *       possible to insert the key into this page (e.g., an overflow
+		 *       page is required). [Alternatively, this could report the 
+		 *       necessary page size if we allow the page size to expand.]
+		 */
+		KEYS_ARE_IDENTICAL,
+		/**
+		 * The insert failed because the page is full. Further, the hash
+		 * associated with the tuple is the same as the hash for all other keys
+		 * on the page. In this case, the insert operation will eventually
+		 * succeed if the address space is expanded (one or more times).
+		 * 
+		 * @todo This could be reported as the #of bits which are in common for
+		 *       the keys in this page. That could be used to determine how many
+		 *       expansions would be required before the key could be inserted.
+		 *       [If KEYS_ARE_IDENTICAL is handled by reporting the necessary
+		 *       page size, then this could report the #of hash bits which are
+		 *       identical using a negative integer (flipping the sign).]
+		 */
+		HASH_IS_IDENTICAL;
+	}
+	
+	/**
+	 * Insert the key into the bucket (duplicates are allowed). It is an error
+	 * if the bucket is full.
+	 * 
+	 * @param h
+	 *            The hash code of the key.
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>false</code> iff the bucket must be split.
+	 * 
+	 * @todo The caller needs to be careful that [h] is the full hash code for
+	 *       the key. Normally this is not a problem, but we sometimes wind up
+	 *       with masked off hash codes, especially during splits and merges,
+	 *       and those must not be passed in here.
+	 */
+	public void insert(final int h, final int key) {
+
+		if (size == data.length) {
+
+			/*
+			 * The bucket must be split, potentially recursively.
+			 * 
+			 * Note: Splits need to be triggered based on information which is
+			 * only available to the bucket when it considers the insert of a
+			 * specific tuple, including whether the tuple is promoted to a raw
+			 * record reference, whether the bucket has deleted tuples which can
+			 * be compacted, etc.
+			 * 
+			 * @todo I need to figure out where the control logic goes to manage
+			 * the split. If the bucket handles splits, then we need to pass in
+			 * the table reference.
+			 */
+
+			// split the bucket and insert the record (recursive?)
+			split(key, this);
+
+			/*
+			 * Insert the key into the expanded hash table (this will insert
+			 * into either the old or the new bucket, depending on the hash code
+			 * for the key).
+			 * 
+			 * FIXME There are a variety of special conditions which need to be
+			 * handled by insert(), especially all keys have the same value or
+			 * the same int32 hash code or the tuple is too large for the
+			 * bucket. Those conditions all need to be handled before requested
+			 * a split. Since insert() has to handle all of this, it is also
+			 * responsible for re-attempting the key insertion after the split.
+			 * 
+			 * The next step is to handle cases where splitting the bucket once
+			 * does not result in a bucket with sufficient space for the new
+			 * key. There are actually two cases here: (1) the hash codes of the
+			 * keys are distinct, so if we double the address space enough times
+			 * the insert will succeed; (2) the hash codes of the keys are
+			 * identical, so no amount of expansion of the address space will
+			 * permit the insert to succeed and an overflow page must be used.
+			 * For (1) we can also chose to use an overflow page in order to
+			 * prevent run away expansion of the address space.
+			 * 
+			 * This class needs to be converted to use persistence and to use an
+			 * IRaba for keys/values. For the sake of the unit tests, it needs
+			 * to be parameterized for the overflow versus expand decision and
+			 * the IRaba for the keys needs to be defined such that we have a
+			 * guaranteed split when there are three integer keys (or a split
+			 * function could be used to make this decision based on more
+			 * general criteria). [Could also use a pure- append binary raba w/
+			 * compacting if the raba is full and there are deleted tuples.]
+			 */
+			if (log.isDebugEnabled())
+				log.debug("retrying insert: key=" + key);
+
+			/*
+			 * @todo This can recurse until the address space reaches the
+			 * maximum possible address space and then throw an exception. The
+			 * code should be modified to use a decision function for growing
+			 * the page, chaining an overflow page, or splitting the page (when
+			 * it would cause the address space to be doubled).
+			 */
+			htbl.insert(key);
+			
+//			{
+//				// the hash value of the key.
+//				final int h = htbl.hash(key);
+//				// the address of the bucket for that hash code.
+//				final int addr = htbl.getRoot().addrOf(h);
+//...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[3990] branches/QUADS_QUERY_BRANCH/bigdata/src/test/com /bigda

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3990] branches/QUADS_QUERY_BRANCH/bigdata/src/test/com /bigdata/htbl