[Bigdata-commit] SF.net SVN: bigdata:[3991] branches/QUADS_QUERY_BRANCH/bigdata/src

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3991
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3991&view=rev
Author:   thompsonbry
Date:     2010-12-03 18:48:02 +0000 (Fri, 03 Dec 2010)

Log Message:
-----------
Modified the Checkpoint record to support HTree as well as BTree.  This change introduces a new version for the checkpoint record and is backwards compatible.

Modified the DefaultLeafCoder to support hash buckets with the optional inclusion of 32-bit hash codes into the record.  This does not change the binary layout of the leaf when hash values are not included and is therefore backward compatible.

Added unit tests for the DefaultLeafCoder when used to store the data for an HTree bucket.

Moved the HTree classes out of test. They are not ready for use, but the modification to support the hash bucket data page require that the various interfaces are declared in the src/java rather than the src/test code paths.

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractNodeOrLeafDataRecordTestCase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/MockLeafData.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashFunction.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IDirectoryData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestExtensibleHashing.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java

Removed Paths:
-------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/ExtensibleHashMap.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableCheckpoint.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashFunction.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/TestExtensibleHashing.java

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java	2010-12-01 21:43:35 UTC (rev 3990)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/Checkpoint.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -34,17 +34,59 @@
 
     // persistent and immutable.
     private long addrMetadata;
-    private long addrRoot;
-    private int height;
-    private int nnodes;
-    private int nleaves;
-    private int nentries;
+    private long addrRoot; // of root node/leaf for BTree; rootDir for HTree.
+    private int height; // height for BTree; globalDepth for HTree.
+    private int nnodes; // #of directories for HTree
+    private int nleaves; // #of buckets for HTree.
+    private int nentries; // #of tuples in the index.
     private long counter;
 
-    /** Note: added in {@link #VERSION1} and presumed 0L in earlier versions. */
     private long addrBloomFilter;
 
-    /**
+	/**
+	 * Added in {@link #VERSION1}. This is a short field allowing for 65536
+	 * different possible index types.
+	 */
+    private IndexTypeEnum indexType;
+
+	/**
+	 * Type safe enumeration of index types.
+	 */
+	public static enum IndexTypeEnum {
+
+		/** BTree. */
+		BTree((short)0),
+		
+		/** Extendable hash tree. */
+		HTree((short)1);
+		
+		private IndexTypeEnum(final short code) {
+		
+			this.code = code;
+			
+		}
+
+		private final short code;
+
+		public short getCode() {
+			
+			return code;
+			
+		}
+		
+		static public IndexTypeEnum valueOf(final short code) {
+			switch (code) {
+			case 0:
+				return BTree;
+			case 1:
+				return HTree;
+			default:
+				throw new IllegalArgumentException("code=" + code);
+			}
+		}
+	}
+
+	/**
      * The address used to read this {@link Checkpoint} record from the
      * store.
      * <p>
@@ -99,20 +141,45 @@
         return addrBloomFilter;
         
     }
-    
-    /**
-     * The height of the tree - ZERO(0) means just a root leaf. Values
-     * greater than zero give the #of levels of abstract nodes. There is
-     * always one layer of leaves which is not included in this value.
-     */
+
+	/**
+	 * The height of a B+Tree. ZERO(0) means just a root leaf. Values greater
+	 * than zero give the #of levels of abstract nodes. There is always one
+	 * layer of leaves which is not included in this value.
+	 * 
+	 * @return The global depth and ZERO (0) unless the checkpoint record is for
+	 *         an {@link IndexTypeEnum#BTree}
+	 */
     public final int getHeight() {
 
-        return height;
+		switch (indexType) {
+		case BTree:
+			return height;
+		default:
+			throw new UnsupportedOperationException();
+		}
         
     }
 
+	/**
+	 * The global depth of the root directory (HTree only).
+	 * 
+	 * @return The global depth and ZERO (0) unless the checkpoint record is for
+	 *         an {@link IndexTypeEnum#HTree}
+	 */
+    public final int getGlobalDepth() {
+    	
+		switch (indexType) {
+		case HTree:
+			return height;
+		default:
+			throw new UnsupportedOperationException();
+		}
+
+    }
+
     /**
-     * The #of non-leaf nodes.
+     * The #of non-leaf nodes (B+Tree) or directories (HTree).
      */
     public final int getNodeCount() {
         
@@ -121,7 +188,7 @@
     }
 
     /**
-     * The #of leaves.
+     * The #of leaves (B+Tree) or hash buckets (HTree).
      */
     public final int getLeafCount() {
         
@@ -130,7 +197,7 @@
     }
 
     /**
-     * The #of index entries.
+     * The #of index entries (aka tuple count).
      */
     public final int getEntryCount() {
         
@@ -155,7 +222,10 @@
     public final String toString() {
 
         return "Checkpoint" + //
-                "{height=" + height + //
+        		"{indexType=" + indexType + //
+				(indexType == IndexTypeEnum.BTree ? ",height=" + height
+						: (indexType == IndexTypeEnum.HTree ? ",globalDepth="
+								+ height : "")) +
                 ",nnodes=" + nnodes + //
                 ",nleaves=" + nleaves + //
                 ",nentries=" + nentries + //
@@ -195,7 +265,9 @@
                 0, // nnodes
                 0, // nleaves
                 0, // nentries
-                0L // counter
+                0L, // counter
+                IndexTypeEnum.BTree // indexType
+                
         );
         
     }
@@ -223,7 +295,8 @@
                 0, // nnodes
                 0, // nleaves
                 0, // nentries
-                oldCheckpoint.counter
+                oldCheckpoint.counter,//
+                IndexTypeEnum.BTree//
         );
         
     }
@@ -276,15 +349,19 @@
                 btree.nnodes,//
                 btree.nleaves,//
                 btree.nentries,//
-                btree.counter.get()//
+                btree.counter.get(),//
+                IndexTypeEnum.BTree//
                 );
            
     }
 
-    private Checkpoint(final long addrMetadata, final long addrRoot,
-            final long addrBloomFilter, final int height, final int nnodes,
-            final int nleaves, final int nentries, final long counter) {
+	private Checkpoint(final long addrMetadata, final long addrRoot,
+			final long addrBloomFilter, final int height, final int nnodes,
+			final int nleaves, final int nentries, final long counter,
+			final IndexTypeEnum indexType) {
 
+		assert indexType != null;
+		
         /*
          * Note: The constraint on [addrMetadata] is relaxed in order to permit
          * a transient BTree (no backing store).
@@ -313,6 +390,8 @@
 
         this.counter = counter;
         
+        this.indexType = indexType;
+        
     }
 
     /**
@@ -327,11 +406,17 @@
      * {@link Checkpoint} record.
      */
     private static transient final int VERSION0 = 0x0;
+
+	/**
+	 * Adds the {@link #indexType} field and the {@link #globalDepth} field,
+	 * which is present only for {@link IndexTypeEnum#HTree}.
+	 */
+    private static transient final int VERSION1 = 0x1;
     
     /**
      * The current version.
      */
-    private static transient final int VERSION = VERSION0;
+    private static transient final int VERSION = VERSION1;
 
     /**
      * Write the {@link Checkpoint} record on the store, setting
@@ -386,8 +471,13 @@
         
         final int version = in.readInt();
 
-        if (version != VERSION0)
-            throw new IOException("Unknown version: " + version);
+		switch (version) {
+		case VERSION0:
+		case VERSION1:
+			break;
+		default:
+			throw new IOException("Unknown version: " + version);
+		}
 
         this.addrMetadata = in.readLong();
 
@@ -405,7 +495,19 @@
 
         this.counter = in.readLong();
 
-        in.readLong(); // unused.
+		switch (version) {
+		case VERSION0:
+			in.readLong(); // unused
+			indexType = IndexTypeEnum.BTree;
+			break;
+		case VERSION1:
+			this.indexType = IndexTypeEnum.valueOf(in.readShort());
+			in.readShort();// ignored.
+			in.readInt();// ignored.
+			break;
+		default:
+			throw new AssertionError();
+        }
         
         in.readLong(); // unused.
         
@@ -431,10 +533,20 @@
 
         out.writeLong(counter);
 
-        out.writeLong(0L/*unused*/);
+        /*
+         * 8 bytes follow. 
+         */
         
-        out.writeLong(0L/*unused*/);
-        
-    }
+        out.writeShort(indexType.getCode());
+		out.writeShort(0/* unused */);
+		out.writeInt(0/* unused */);
 
+        /*
+         * 8 bytes follow. 
+         */
+
+		out.writeLong(0L/* unused */);
+
+	}
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java	2010-12-01 21:43:35 UTC (rev 3990)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/AbstractReadOnlyNodeData.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -129,6 +129,14 @@
      */
     protected static final short DELTA_VERSION_TIMESTAMPS = 1 << 2;
 
+	/**
+	 * Bit flag indicating that the int32 hash of the key should be stored in
+	 * the leaf data record. The function used to compute hash code will be
+	 * known to the owning data structure. This is primarily intended for use
+	 * with hash trees.
+	 */
+    protected static final short FLAG_HASH_KEYS = 1 << 3;
+
     /**
      * The size of the field in the data record which encodes whether the data
      * record represents a B+Tree {@link #NODE}, a {@link #LEAF}, or a

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java	2010-12-01 21:43:35 UTC (rev 3990)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -43,6 +43,7 @@
 import com.bigdata.btree.raba.IRaba;
 import com.bigdata.btree.raba.codec.ICodedRaba;
 import com.bigdata.btree.raba.codec.IRabaCoder;
+import com.bigdata.htree.data.IBucketData;
 import com.bigdata.io.AbstractFixedByteArrayBuffer;
 import com.bigdata.io.DataOutputBuffer;
 
@@ -53,7 +54,7 @@
  * @version $Id$
  */
 public class DefaultLeafCoder implements IAbstractNodeDataCoder<ILeafData>,
-        Externalizable {
+		Externalizable {
 
     /**
      * 
@@ -189,12 +190,16 @@
         short flags = 0;
         final boolean hasDeleteMarkers = leaf.hasDeleteMarkers();
         final boolean hasVersionTimestamps = leaf.hasVersionTimestamps();
+        final boolean hasHashKeys = leaf instanceof IBucketData; // @todo add hasHashKeys() method?
         if (hasDeleteMarkers) {
             flags |= AbstractReadOnlyNodeData.FLAG_DELETE_MARKERS;
         }
         if (hasVersionTimestamps) {
             flags |= AbstractReadOnlyNodeData.FLAG_VERSION_TIMESTAMPS;
         }
+        if(hasHashKeys) {
+        	flags |= AbstractReadOnlyNodeData.FLAG_HASH_KEYS;
+        }
 
         buf.putShort(flags);
         
@@ -341,6 +346,88 @@
 
         }
 
+        // hash codes of the keys (MSB prefix plus LSB coded).
+//      final int O_hashKeys;
+      if (hasHashKeys) {
+
+    	  // The bit length of the hash values.
+    	  final int hashBitLength = 32;//((IBucketData)leaf).getHashBitLength();
+
+    	  // The bit length of the shared MSB prefix.
+    	  final int lengthMSB = ((IBucketData)leaf).getLengthMSB();
+
+    	  // The bit length of the LSB which differ for each hash value.
+    	  final int lengthLSB = hashBitLength - lengthMSB;
+    	  
+//    	  buf.putShort((short) hashBitLength);
+    	  
+    	  buf.putShort((short) lengthMSB);
+    	  
+//          O_hashKeys = buf.pos();
+
+			if (nkeys > 0) {
+
+				final int byteLength = BytesUtil
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+
+				final byte[] a = new byte[byteLength];
+
+				final OutputBitStream obs = new OutputBitStream(a);
+
+				try {
+
+					// The hash of the first key.
+					int h = ((IBucketData) leaf).getHash(0/* index */);
+
+					// Drop off the LSB bits, leaving the MSB bits in the LSB position.
+					h = h >>> lengthLSB;
+
+//					// Reverse bits to since obs writes the LSB of the int.
+//					h = Integer.reverse(h);
+					
+					// The MSB prefix.
+					obs.writeInt(h, lengthMSB/* MSB bits */);
+
+					// The LSB of the hash of each key.
+					for (int i = 0; i < nkeys; i++) {
+
+						// The hash of this key.
+						h = ((IBucketData)leaf).getHash(i);
+						
+						// Drop off the MSB bits.
+						h = h >>> lengthMSB;
+						
+//						// Reverse bits since obs writes the LSB of the int.
+//						h = Integer.reverse(h);
+
+						// The LSB.
+						obs.writeInt(h, lengthLSB);
+
+					}
+
+					// copy onto the buffer.
+					buf.put(a);
+
+				} catch (IOException e) {
+					throw new RuntimeException(e);
+					// Note: close is not necessary if flushed and backed by
+					// byte[].
+					// } finally {
+					// try {
+					// obs.close();
+					// } catch (IOException e) {
+					// log.error(e);
+					// }
+				}
+
+			}
+
+//      } else {
+//      
+//          O_hashKeys = -1;
+          
+      }
+      
         // Slice containing the coded leaf.
         final AbstractFixedByteArrayBuffer slice = buf.slice(//
                 O_origin, buf.pos() - O_origin);
@@ -373,7 +460,7 @@
      * @version $Id$
      */
     static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData>
-            implements ILeafData {
+            implements ILeafData, IBucketData {
 
         /** The backing buffer. */
         private final AbstractFixedByteArrayBuffer b;
@@ -407,6 +494,25 @@
          */
         private final int versionTimestampBits;
 
+		/**
+		 * Offset of the int32 hash values in the buffer encoding hash value of
+		 * the tuple keys -or- <code>-1</code> if the leaf does not report those
+		 * data.
+		 */
+        private final int O_hashKeys;
+
+		/**
+		 * The #of bits used to code the hash keys -or- ZERO (0) if they are not
+		 * present. (The length of the MSB hash prefix is 32-lengthLSB.)
+		 */
+        private final int lengthLSB;
+        
+		/**
+		 * The MSB hash prefix shared by all hash codes on this page -or- ZERO
+		 * (0) if hash codes are not present in the page.
+		 */
+        private final int hashMSB;
+        
         public final AbstractFixedByteArrayBuffer data() {
 
             return b;
@@ -469,6 +575,7 @@
             pos += SIZEOF_FLAGS;
             final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0);
             final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0);
+			final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0);
 
             this.nkeys = buf.getInt(pos);
             pos += SIZEOF_NKEYS;
@@ -523,6 +630,49 @@
                 
             }
 
+            if(hasHashKeys) {
+
+				final int lengthMSB = buf.getShort(pos);
+				pos += 2;
+
+				lengthLSB = 32 /* hashBitLength */- lengthMSB;
+
+				/*
+				 * The byte offset to the start of the bit coded hash keys. The
+				 * first bit coded value is the MSB prefix. You need to skip
+				 * over that when indexing into the LSB array.
+				 */
+				O_hashKeys = pos;
+
+				final int byteLength = BytesUtil
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+
+				if (nkeys > 0) {
+					
+					final InputBitStream ibs = buf.slice(pos, byteLength)
+							.getInputBitStream();
+
+					try {
+						hashMSB = ibs.readInt(lengthMSB);
+					} catch (IOException ex) {
+						// Note: should not be thrown.
+						throw new RuntimeException(ex);
+					}
+					
+				} else {
+					
+					hashMSB = 0;
+					
+				}
+            	
+            } else {
+            	
+            	O_hashKeys = -1;
+            	lengthLSB = 0;
+            	hashMSB = 0;
+            	
+            }
+            
             // save reference to buffer
             this.b = buf;
 
@@ -584,6 +734,7 @@
             pos += SIZEOF_FLAGS;
             final boolean hasVersionTimestamps = ((flags & FLAG_VERSION_TIMESTAMPS) != 0);
             final boolean hasDeleteMarkers = ((flags & FLAG_DELETE_MARKERS) != 0);
+			final boolean hasHashKeys = ((flags & FLAG_HASH_KEYS) != 0);
 
             this.nkeys = buf.getInt(pos);
             pos += SIZEOF_NKEYS;
@@ -638,6 +789,49 @@
                 
             }
 
+            if(hasHashKeys) {
+
+				final int lengthMSB = buf.getShort(pos);
+				pos += 2;
+
+				lengthLSB = 32 /* hashBitLength */- lengthMSB;
+
+				/*
+				 * The byte offset to the start of the bit coded hash keys. The
+				 * first bit coded value is the MSB prefix. You need to skip
+				 * over that when indexing into the LSB array.
+				 */
+				O_hashKeys = pos;
+
+				final int byteLength = BytesUtil
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+
+				if (nkeys > 0) {
+					
+					final InputBitStream ibs = buf.slice(pos, byteLength)
+							.getInputBitStream();
+
+					try {
+						hashMSB = ibs.readInt(lengthMSB);
+					} catch (IOException ex) {
+						// Note: should not be thrown.
+						throw new RuntimeException(ex);
+					}
+					
+				} else {
+					
+					hashMSB = 0;
+					
+				}
+            	
+            } else {
+            	
+            	O_hashKeys = -1;
+            	lengthLSB = 0;
+            	hashMSB = 0;
+            	
+            }
+
             // save reference to buffer
             this.b = buf;
 
@@ -709,6 +903,12 @@
             
         }
 
+        final public boolean hasHashKeys() {
+
+            return (flags & FLAG_HASH_KEYS) != 0;
+            
+        }
+
         public long getMinimumVersionTimestamp() {
 
             if (!hasVersionTimestamps())
@@ -770,7 +970,55 @@
             return b.getBit((O_deleteMarkers << 3) + index);
 
         }
+
+        final public int getLengthMSB() {
+        	
+
+			if (!hasHashKeys())
+				throw new UnsupportedOperationException();
+
+			final int lengthMSB = 32/* hashBitLength */- lengthLSB;
+
+			return lengthMSB;
+        	
+        }
         
+		final public int getHash(final int index) {
+
+            if (index < 0 || index >= nkeys)
+                throw new IllegalArgumentException();
+            
+			if (!hasHashKeys())
+				throw new UnsupportedOperationException();
+
+			final int lengthMSB = 32/* hashBitLength */- lengthLSB;
+
+			final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB
+					+ nkeys * lengthMSB/* nbits */);
+
+			final InputBitStream ibs = b.slice(O_hashKeys, byteLength)
+					.getInputBitStream();
+
+			try {
+
+				final long position = lengthMSB + index * lengthLSB;
+
+				ibs.position(position);
+				
+				int h = ibs.readInt(lengthLSB);
+				
+				h |= hashMSB;
+        		
+				return h;
+				
+        	} catch(IOException ex) {
+        		
+        		throw new RuntimeException(ex);
+        		
+        	}
+        	
+        }
+        
         final public IRaba getKeys() {
             
             return keys;
@@ -942,6 +1190,26 @@
 
         }
 
+        if (leaf instanceof IBucketData) {
+
+        	final IBucketData d = (IBucketData)leaf;
+        	
+			sb.append(",\nhashCodes={lengthMSB=" + d.getLengthMSB()
+					+ ",tuples=[");
+
+            for (int i = 0; i < nkeys; i++) {
+
+                if (i > 0)
+                    sb.append(", ");
+
+                sb.append(d.getHash(i));
+
+            }
+
+            sb.append("]");
+
+        }
+
         return sb;
 
     }

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/AbstractHashPage.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/AbstractHashPage.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -0,0 +1,100 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htree;
+
+import java.lang.ref.Reference;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.PO;
+
+/**
+ * Abstract class for both directory and data pages for a hash index.
+ */
+abstract public class AbstractHashPage <T extends AbstractHashPage
+/*
+ * DO-NOT-USE-GENERIC-HERE. The compiler will fail under Linux (JDK 1.6.0_14,
+ * _16).
+ */
+> extends PO //implements IAbstractNode, IAbstractNodeData 
+{
+	
+	private final static transient Logger log = Logger
+			.getLogger(AbstractHashPage.class);
+	
+	/**
+	 * Transient back reference to the index to which this directory belongs.
+	 */
+	protected transient HashTree htbl;
+
+	/**
+	 * <p>
+	 * A {@link Reference} to this page. This is created when the page is
+	 * created and effectively provides a canonical {@link Reference} object for
+	 * any given page.
+	 * </p>
+	 * 
+	 * @todo Do we need back references for recursive directories?
+	 */
+    transient protected final Reference<? extends AbstractHashPage<T>> self;
+
+    /**
+     * Disallowed.
+     */
+    private AbstractHashPage() {
+
+        throw new UnsupportedOperationException();
+        
+    }
+
+	protected AbstractHashPage(final HashTree htbl, final boolean dirty) {
+
+		if(htbl == null)
+			throw new IllegalArgumentException();
+		
+		this.htbl = htbl;
+
+        // reference to self: reused to link parents and children.
+        this.self = htbl.newRef(this);
+        
+        if (!dirty) {
+
+            /*
+             * Nodes default to being dirty, so we explicitly mark this as
+             * clean. This is ONLY done for the de-serialization constructors.
+             */
+
+            setDirty(false);
+
+        }
+        
+//        @todo Add to the hard reference queue.
+//        btree.touch(this);
+
+	}
+	
+}

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HTableMetadata.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HTableMetadata.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -0,0 +1,106 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htree;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.UUID;
+
+/**
+ * Configuration options.
+ * 
+ * @todo Reconcile with IndexMetadata.
+ */
+public class HTableMetadata implements Externalizable {
+
+	/**
+	 * The unique identifier for the index.
+	 */
+	private UUID uuid;
+
+	/**
+	 * Function used to generate hash values from keys.
+	 */
+	private HashFunction hashFunction;
+
+	private Object directoryCoder;
+	
+	private Object bucketCoder;
+	
+	/**
+	 * Function decides whether to split a page, link an overflow page, or
+	 * expand the size of a page.
+	 */
+	// private SplitFunction splitFunction;
+
+	/**
+	 * De-serialization constructor.
+	 */
+	public HTableMetadata() {
+
+	}
+
+	/**
+	 * Anonymous hash index.
+	 * 
+	 * @param uuid
+	 *            The unique index identifier.
+	 */
+	public HTableMetadata(final UUID uuid) {
+
+		this(null/* name */, uuid);
+
+	}
+
+	/**
+	 * Named hash index
+	 * 
+	 * @param name
+	 *            The index name.
+	 * @param uuid
+	 *            The unique index identifier.
+	 */
+	public HTableMetadata(final String name, final UUID uuid) {
+
+	}
+
+	@Override
+	public void readExternal(ObjectInput in) throws IOException,
+			ClassNotFoundException {
+		// TODO Auto-generated method stub
+		throw new UnsupportedOperationException();
+	}
+
+	@Override
+	public void writeExternal(ObjectOutput out) throws IOException {
+		// TODO Auto-generated method stub
+		throw new UnsupportedOperationException();
+	}
+
+}

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashBucket.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -0,0 +1,689 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Nov 29, 2010
+ */
+package com.bigdata.htree;
+
+import java.util.Iterator;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.btree.IOverflowHandler;
+import com.bigdata.btree.IndexSegment;
+import com.bigdata.btree.data.IAbstractNodeDataCoder;
+import com.bigdata.btree.data.ILeafData;
+import com.bigdata.btree.raba.IRaba;
+import com.bigdata.htree.data.IBucketData;
+import com.bigdata.htree.data.IDirectoryData;
+import com.bigdata.io.AbstractFixedByteArrayBuffer;
+import com.bigdata.rawstore.IRawStore;
+
+/**
+ * A (very) simple hash bucket. The bucket holds N int32 keys.
+ * 
+ * @todo The hash of the key should be part of the ITuple interface so it can be
+ *       passed along based on the application level encoding of the key.
+ * 
+ * @todo Support out-of-line representations of the key and/or value for a tuple
+ *       when they are large. The definition of "large" can be a configuration
+ *       value for the index metadata. For example, 1/4 of the target page size
+ *       or (1k assuming a target page size of 4k). It should also be possible
+ *       to specify that the value is always out of line (this corresponds to
+ *       the common practice in a relational database of indexing into a
+ *       persistent heap rather than the perfect indices with their inline data
+ *       which we use for RDF statements).
+ *       <p>
+ *       The easiest way to do this is to treat the key and value separately and
+ *       write them as raw records onto the backing store if they exceed the
+ *       configured threshold. For the B+Tree, we can not readily move the key
+ *       out of line since we need it for search, but it is easy to do this for
+ *       the HTree. (For now, I suggest that we live with the constraint that
+ *       the key can not be moved out of line for the B+Tree.) For both index
+ *       structures, it is easy to move the value out of line. The tuple
+ *       metadata will stay inline regardless.
+ *       <p>
+ *       In order to resolve out of line keys and/or values the
+ *       {@link ILeafData} will need access to the {@link IRawStore} reference.
+ *       This may require an API change to {@link IRaba} and/or
+ *       {@link IAbstractNodeDataCoder} (the latter also needs to be modified to
+ *       work with {@link IDirectoryData} records) in order to made the
+ *       {@link IRawStore} reference available when the record is serialized
+ *       and/or deserialized.
+ *       <p>
+ *       When the tuple is deleted, the raw record reference for its key and/or
+ *       value must also be deleted.
+ *       <p>
+ *       During a bulk index build, the raw record must be copied to the target
+ *       index store, e.g., an {@link IndexSegment} using an
+ *       {@link IOverflowHandler}.
+ */
+public class HashBucket extends AbstractHashPage<HashBucket>//
+//		implements IBucketData// 
+{
+
+	private final transient static Logger log = Logger
+			.getLogger(HashBucket.class);
+	
+	/**
+	 * The #of hash code bits which are in use by this {@link HashBucket}.
+	 * <p>
+	 * Note: There are <code>2^(globalBits-localBits)</code> dictionary entries
+	 * which address a given page. Initially, globalBits := 1 and localBits :=
+	 * 0. For these values, we have <code>2^(1-0) == 2</code> references to the
+	 * initial page of the hash table.
+	 * 
+	 * @todo If we need to examine this when we change the size of the address
+	 *       space then it makes more sense to have this as local metadata in
+	 *       the address table than as local data in the bucket (the latter
+	 *       would require us to visit each bucket when expanding the address
+	 *       space). This only needs to be 4 bits to express values in [0:31].
+	 * 
+	 * @todo When overflow buckets are chained together, does each bucket have
+	 *       {@link #localHashBits}? If they do, then we need to make sure that
+	 *       all buckets in the chain are updated. If {@link #localHashBits} is
+	 *       only marked on the first bucket in the chain then we need to
+	 *       correctly ignore it on overflow buckets.
+	 * 
+	 * @todo adjusting this dirties the bucket (unless the #of local bits its
+	 *       stored in the address table entry, but that increases the in-memory
+	 *       burden of the address table).
+	 */
+	private int localHashBits;
+
+	/**
+	 * The #of keys stored in the bucket. The keys are stored in a dense array.
+	 * For a given {@link #size}, the only indices of the array which have any
+	 * data are [0:{@link #size}-1].
+	 */
+	int size;
+
+	/**
+	 * The user data for the bucket.
+	 * 
+	 * @todo IRaba keys plus IRaba vals.
+	 */
+	final int[] data;
+
+	protected void setLocalHashBits(final int localHashBits) {
+		
+		this.localHashBits = localHashBits;
+		
+	}
+
+	public int getLocalHashBits() {
+		return localHashBits;
+	}
+
+	/**
+	 * Human friendly representation.
+	 */
+	public String toString() {
+		final StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append("{localHashBits=" + getLocalHashBits());
+		sb.append(",size=" + size);
+		sb.append(",values={");
+		for (int i = 0; i < size; i++) {
+			if (i > 0)
+				sb.append(',');
+			sb.append(Integer.toString(data[i]));
+		}
+		sb.append("}}");
+		return sb.toString();
+	}
+
+	/**
+	 * Create a new mutable bucket.
+	 * 
+	 * @param htbl
+	 * @param localHashBits
+	 * @param bucketSize
+	 */
+	public HashBucket(final HashTree htbl,
+			final int localHashBits, final int bucketSize) {
+
+		super(htbl, true/* dirty */);
+
+		if (localHashBits < 0 || localHashBits > 32)
+			throw new IllegalArgumentException();
+
+		if (bucketSize <= 0)
+			throw new IllegalArgumentException();
+
+		this.localHashBits = localHashBits;
+
+		this.data = new int[bucketSize];
+
+		// one more bucket.
+		htbl.nbuckets++;
+		
+	}
+
+	/**
+	 * Return <code>true</code> if the bucket contains the key.
+	 * 
+	 * @param h
+	 *            The hash code of the key.
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>true</code> if the key was found in the bucket.
+	 * 
+	 * @todo passing in the hash code here makes sense when the bucket stores
+	 *       the hash values, e.g., if we always do that or if we have an out of
+	 *       bucket reference to a raw record because the tuple did not fit in
+	 *       the bucket.
+	 */
+	public boolean contains(final int h, final int key) {
+
+		for (int i = 0; i < size; i++) {
+
+			if (data[i] == key)
+				return true;
+
+		}
+
+		return false;
+
+	}
+
+	/**
+	 * Type safe enumeration reports on the various outcomes when attempting to
+	 * insert a tuple into a page.
+	 * 
+	 * @todo The problem with this enumeration (or with using a return code per
+	 *       the following javadoc) is that page splits are going to be deferred
+	 *       until the page is evicted unless there is an aspect of the split
+	 *       function which decides based on the #of tuples on the page. If a
+	 *       the split function reports that the page is over capacity when it
+	 *       is evicted, then we need to decide whether to split the page, chain
+	 *       an overflow page, or use a larger capacity page.
+	 *       <p>
+	 *       What we should do is scan the page if an insert would fail (or if
+	 *       the serialization of the page would fail) and determine what local
+	 *       depth we would need to successfully split the page (e.g., no more
+	 *       than 70% of the items would be in any prefix at a given depth).
+	 *       That can be used to guide the decision to use overflow pages or
+	 *       expand the directory.
+	 *       <p>
+	 *       What are some fast techniques for counting the #of bits which we
+	 *       need to make the necessary distinctions in the bucket? Should we
+	 *       build a trie over the hash codes?
+	 */
+	private static enum InsertEnum {
+		/**
+		 * The tuple was inserted successfully into this page.
+		 * 
+		 * @todo This could be reported as ZERO (0), which is an indication that
+		 *       NO expansions where required to insert the tuple into the page.
+		 */
+		OK,
+		/**
+		 * The insert failed because the page is full. Further, the tuple has
+		 * the same key value as all other tuples on the page. Therefore, either
+		 * the insert must be directed into an overflow page or the page size
+		 * must be allowed to increase.
+		 * 
+		 * @todo This could be reported as {@link Integer#MAX_VALUE}, which is
+		 *       an indication that infinite expansions will not make it
+		 *       possible to insert the key into this page (e.g., an overflow
+		 *       page is required). [Alternatively, this could report the 
+		 *       necessary page size if we allow the page size to expand.]
+		 */
+		KEYS_ARE_IDENTICAL,
+		/**
+		 * The insert failed because the page is full. Further, the hash
+		 * associated with the tuple is the same as the hash for all other keys
+		 * on the page. In this case, the insert operation will eventually
+		 * succeed if the address space is expanded (one or more times).
+		 * 
+		 * @todo This could be reported as the #of bits which are in common for
+		 *       the keys in this page. That could be used to determine how many
+		 *       expansions would be required before the key could be inserted.
+		 *       [If KEYS_ARE_IDENTICAL is handled by reporting the necessary
+		 *       page size, then this could report the #of hash bits which are
+		 *       identical using a negative integer (flipping the sign).]
+		 */
+		HASH_IS_IDENTICAL;
+	}
+	
+	/**
+	 * Insert the key into the bucket (duplicates are allowed). It is an error
+	 * if the bucket is full.
+	 * 
+	 * @param h
+	 *            The hash code of the key.
+	 * @param key
+	 *            The key.
+	 * 
+	 * @return <code>false</code> iff the bucket must be split.
+	 * 
+	 * @todo The caller needs to be careful that [h] is the full hash code for
+	 *       the key. Normally this is not a problem, but we sometimes wind up
+	 *       with masked off hash codes, especially during splits and merges,
+	 *       and those must not be passed in here.
+	 */
+	public void insert(final int h, final int key) {
+
+		if (size == data.length) {
+
+			/*
+			 * The bucket must be split, potentially recursively.
+			 * 
+			 * Note: Splits need to be triggered based on information which is
+			 * only available to the bucket when it considers the insert of a
+			 * specific tuple, including whether the tuple is promoted to a raw
+			 * record reference, whether the bucket has deleted tuples which can
+			 * be compacted, etc.
+			 * 
+			 * @todo I need to figure out where the control logic goes to manage
+			 * the split. If the bucket handles splits, then we need to pass in
+			 * the table reference.
+			 */
+
+			// split the bucket and insert the record (recursive?)
+			split(key, this);
+
+			/*
+			 * Insert the key into the expanded hash table (this will insert
+			 * into either the old or the new bucket, depending on the hash code
+			 * for the key).
+			 * 
+			 * FIXME There are a variety of special conditions which need to be
+			 * handled by insert(), especially all keys have the same value or
+			 * the same int32 hash code or the tuple is too large for the
+			 * bucket. Those conditions all need to be handled before requested
+			 * a split. Since insert() has to handle all of this, it is also
+			 * responsible for re-attempting the key insertion after the split.
+			 * 
+			 * The next step is to handle cases where splitting the bucket once
+			 * does not result in a bucket with sufficient space for the new
+			 * key. There are actually two cases here: (1) the hash codes of the
+			 * keys are distinct, so if we double the address space enough times
+			 * the insert will succeed; (2) the hash codes of the keys are
+			 * identical, so no amount of expansion of the address space will
+			 * permit the insert to succeed and an overflow page must be used.
+			 * For (1) we can also chose to use an overflow page in order to
+			 * prevent run away expansion of the address space.
+			 * 
+			 * This class needs to be converted to use persistence and to use an
+			 * IRaba for keys/values. For the sake of the unit tests, it needs
+			 * to be parameterized for the overflow versus expand decision and
+			 * the IRaba for the keys needs to be defined such that we have a
+			 * guaranteed split when there are three integer keys (or a split
+			 * function could be used to make this decision based on more
+			 * general criteria). [Could also use a pure- append binary raba w/
+			 * compacting if the raba is full and there are deleted tuples.]
+			 */
+			if (log.isDebugEnabled())
+				log.debug("retrying insert: key=" + key);
+
+			/*
+			 * @todo This can recurse until the address space reaches the
+			 * maximum possible address space and then throw an exception. The
+			 * code should be modified to use a decision function for growing
+			 * the page, chaining an overflow page, or splitting the page (when
+			 * it would cause the address space to be doubled).
+			 */
+			htbl.insert(key);
+			
+//			{
+//				// the hash value of the key.
+//				final int h = htbl.hash(key);
+//				// the address of the bucket for that hash code.
+//				final int addr = htbl.getRoot().addrOf(h);
+//				// the bucket for that address.
+//				final SimpleBucket btmp = htbl.getBucketAtStoreAddr(addr);
+//				if (btmp.insert(h, key)) {
+//					// insert was successful.
+//					return;
+//				}
+//				/*
+//				 */
+//			
+//				log
+//						.fatal("Split of bucket did not map space available for new key: key="
+//								+ key + ", table=" + htbl.dump());
+//				
+//				throw new UnsupportedOperationException();
+//				
+//			}
+			
+			return;
+			
+		}
+
+		data[size++] = key;
+
+        // one more entry in the index.
+        htbl.nentries++;
+
+	}
+
+	/**
+	 * Delete a tuple having the specified key. If there is more than one such
+	 * tuple, then a random tuple having the key is deleted.
+	 * 
+	 * @param h
+	 *            The hash code of the key.
+	 * @param key
+	 *            The key.
+	 * 
+	 * @todo return the delete tuple.
+	 */
+	public boolean delete(final int h, final int key) {
+
+		for (int i = 0; i < size; i++) {
+
+			if (data[i] == key) {
+
+				// #of tuples remaining beyond this point.
+				final int length = size - i - 1;
+
+				if (length > 0) {
+
+					// Keep the array dense by copying down by one.
+					System.arraycopy(data, i + 1/* srcPos */, data/* dest */,
+							i/* destPos */, length);
+
+				}
+
+				size--;
+
+		        // one less entry in the index.
+		        htbl.nentries--;
+
+				return true;
+
+			}
+
+		}
+
+		return false;
+
+	}
+
+	/**
+	 * The #of entries in the bucket.
+	 */
+	public int getEntryCount() {
+
+		return size;
+
+	}
+
+	/**
+	 * Visit the entries in any order.
+	 */
+	public Iterator<Integer/* key */> getEntries() {
+
+		return new EntryIterator();
+
+	}
+
+	/**
+	 * Visits the entries in the page.
+	 */
+	private class EntryIterator implements Iterator<Integer> {
+
+		private int current = 0;
+
+		private EntryIterator() {
+
+		}
+
+		@Override
+		public boolean hasNext() {
+			return current < size;
+		}
+
+		@Override
+		public Integer next() {
+			return data[current++];
+		}
+
+		@Override
+		public void remove() {
+			throw new UnsupportedOperationException();
+		}
+
+	}
+
+	@Override
+	public void delete() throws IllegalStateException {
+		// TODO Auto-generated method stub
+		throw new UnsupportedOperationException();
+	}
+
+	/**
+	 * Split the bucket, adjusting the address map iff necessary. How this
+	 * proceeds depends on whether the hash #of bits used in the bucket is equal
+	 * to the #of bits used to index into the bucket address table. There are
+	 * two cases:
+	 * <p>
+	 * Case 1: If {@link #globalHashBits} EQ the
+	 * {@link HashBucket#localHashBits}, then the bucket address table is out
+	 * of space and needs to be resized.
+	 * <p>
+	 * Case 2: If {@link #globalHashBits} is GT
+	 * {@link HashBucket#localHashBits}, then there will be at least two
+	 * entries in the bucket address table which point to the same bucket. One
+	 * of those entries is relabeled. The record is then inserted based on the
+	 * new #of hash bits to be considered. If it still does not fit, then either
+	 * handle by case (1) or case (2) as appropriate.
+	 * <p>
+	 * Note that records which are in themselves larger than the bucket size
+	 * must eventually be handled by: (A) using an overflow record; (B) allowing
+	 * the bucket to become larger than the target page size (using a larger
+	 * allocation slot or becoming a blob); or (C) recording the tuple as a raw
+	 * record and maintaining only the full hash code of the tuple and its raw
+	 * record address in the bucket (this would allow us to automatically
+	 * promote long literals out of the hash bucket and a similar approach might
+	 * be used for a B+Tree leaf, except that a long key will still cause a
+	 * problem [also, this implies that deleting a bucket or leaf on the
+	 * unisolated index of the RWStore might require a scan of the IRaba to
+	 * identify blob references which must also be deleted, so it makes sense to
+	 * track those as part of the bucket/leaf metadata).
+	 * 
+	 * @param h
+	 *            The key which triggered the split.
+	 * @param bold
+	 *            The bucket lacking sufficient room for the key which triggered
+	 *            the split.
+	 * 
+	 * @todo caller will need an exclusive lock if this is to be thread safe.
+	 * 
+	 * @todo Overflow buckets (or oversize buckets) are required when all hash
+	 *       bits considered by the local bucket are the same, when all keys in
+	 *       the local bucket are the same, and when the record to be inserted
+	 *       is larger than the bucket. In order to handle these cases we may
+	 *       need to more closely integrate the insert/split logic since
+	 *       detecting some of these cases requires transparency into the
+	 *       bucket.
+	 * 
+	 *       FIXME The caller could decide to switch to a larger page size or
+	 *       chain overflow pages together in order to increase storage
+	 *       utilization or handle buckets having large populations of identical
+	 *       keys (or keys with the same int32 hash code). [This decision must
+	 *       be made before we decide to split.]
+	 * 
+	 *       FIXME The caller should handle the promotion of large tuples to raw
+	 *       records when they are inserted, so we do not need to handle that
+	 *       here either.
+	 */
+	private void split(final int key, final HashBucket bold) {
+
+		final int globalHashBits = htbl.getGlobalHashBits();
+		
+		if (log.isDebugEnabled())
+			log.debug("globalBits=" + globalHashBits + ",localHashBits="
+					+ bold.getLocalHashBits() + ",key=" + key);
+
+		if (globalHashBits < bold.getLocalHashBits()) {
+			// This condition should never arise.
+			throw new AssertionError();
+		}
+
+		if (globalHashBits == bold.getLocalHashBits()) {
+			/*
+			 * The address table is out of space and needs to be resized.
+			 */
+			htbl.getRoot().doubleAddressSpaceAndSplitBucket(key, bold);
+			// fall through
+		}
+
+		if (globalHashBits > bold.getLocalHashBits()) {
+			/*
+			 * Split the bucket.
+			 */
+			htbl.getRoot().splitBucket(key, bold);
+			// fall through.
+		}
+
+	}
+
+	/*
+	 * IBucketData
+	 */
+	
+	public int getHash(int index) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public int getLengthMSB() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+	
+	/*
+	 * IAbstractNodeData
+	 */
+	
+	public boolean hasVersionTimestamps() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	public AbstractFixedByteArrayBuffer data() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	public int getKeyCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public IRaba getKeys() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	public long getMaximumVersionTimestamp() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public long getMinimumVersionTimestamp() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public int getSpannedTupleCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public boolean isCoded() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	final public boolean isLeaf() {
+		
+		return true;
+		
+	}
+
+	/**
+	 * The result depends on the implementation. The {@link HashBucket} will be
+	 * mutable when it is first created and is made immutable when it is
+	 * persisted. If there is a mutation operation, the backing
+	 * {@link IBucketData} is automatically converted into a mutable instance.
+	 */
+    final public boolean isReadOnly() {
+        
+//        return data.isReadOnly();
+		// TODO Auto-generated method stub
+		return false;
+        
+    }
+
+    /*
+	 * ILeafData
+	 */
+	
+	public boolean getDeleteMarker(int index) {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	public long getNextAddr() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public long getPriorAddr() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public int getValueCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public IRaba getValues() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	public long getVersionTimestamp(int index) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	public boolean hasDeleteMarkers() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	public boolean isDoubleLinked() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+}

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java (from rev 3990, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htbl/HashDirectory.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java	2010-12-03 18:48:02 UTC (rev 3991)
@@ -0,0 +1,989 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Dec 1, 2010
+ */
+package com.bigdata.htree;
+
+import java.lang.ref.Reference;
+import java.util.Formatter;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+
+import com.bigdata.rawstore.IRawStore;
+import com.bigdata.util.concurrent.Memoizer;
+
+/**
+ * A simple (flat) directory for an extensible hashing.
+ */
+public class HashDirectory extends AbstractHashPage<HashDirectory> {
+	
+	private final transient static Logger log = Logger
+			.getLogger(HashDirectory.class);
+
+/* FIXME We need a data record (interface and implementations) for the directory
+ * page.  The data record for a bucket is more similar to a B+Tree leaf than is
+ * the data record for a directory to a B+Tree node.
+ */
+//    /**
+//     * The data record. {@link MutableNodeData} is used for all mutation
+//     * operations. {@link ReadOnlyNodeData} is used when the {@link Node} is
+//     * made persistent. A read-only data record is automatically converted into
+//     * a {@link MutableNodeData} record when a mutation operation is requested.
+//     * <p>
+//     * Note: This is package private in order to expose it to {@link Leaf}.
+//     * 
+//     * @todo consider volatile and private for {@link Node#data} and
+//     *       {@link Leaf#data} with accessors and settors at package private
+//     *       where necessary.
+//     */
+//    INodeData data;
+
+	/**
+	 * The #of hash code bits which are in use by the {@link #addressMap}. Each
+	 * hash bucket also as a local #of hash bits. Given <code>i</code> is the
+	 * #of global hash bits and <code>j</code> is the number of hash bits in
+	 * some bucket, there will be <code>2^(i-j)</code> addresses which point to
+	 * the same bucket.
+	 */
+	private int globalHashBits;
+
+	/**
+	 * The maximum number of directories entries which are permitted.
+	 * 
+	 * @todo This can be ignored for a hash tree (recursive directories). In
+	 *       that case we are concerned with when to split the directory because
+	 *       the page is full rather than an absolute maximum on the address
+	 *       space size.
+	 */
+	private final int maximumCapacity;
+
+	/**
+	 * The address map. You index into this map using {@link #globalHashBits}
+	 * out of the hash code for a probe key. The values are storage addresses
+	 * for the backing {@link IRawStore}. The address will be {@link #NULL} if
+	 * the corresponding child is dirty, in which case {@link #childRefs} will
+	 * always have a {@link Reference} to the dirty child. This pattern is used
+	 * in combination with either strong references or weak references and a
+	 * ring buffer to manage the incremental eviction of dirty pages.
+	 * 
+	 * @todo make this into a private IDirectoryData record.
+	 *       <p>
+	 *       It seems likely that we want to also record the local depth for
+	 *       each child in the IDataDirectory record and a flag indicating
+	 *       whether the child is a bucket or a directory page.
+	 */
+	private long[] addressMap;
+
+	/**
+	 * <p>
+	 * Weak references to child pages (may be directories or buckets). The
+	 * capacity of this array depends on the #of global bits for the directory.
+	 * </p>
+	 * <p>
+	 * Note: This should not be marked as volatile. Volatile does not make the
+	 * elements of the array volatile, only the array reference itself. The
+	 * field would be final except that we clear the reference when stealing the
+	 * array or deleting the node.
+	 * </p>
+	 * 
+	 * @todo document why package private (AbstractBTree.loadChild uses this but
+	 *       maybe that method could be moved to Node).
+	 */
+    private transient/* volatile */Reference<AbstractHashPage<?>>[] childRefs;
+
+	public String toString() {
+
+		return super.toString();
+		
+	}
+
+	/**
+	 * Dumps the buckets in the directory along with metadata about the
+	 * directory.
+	 * 
+	 * @param sb
+	 *            Where to write the dump.
+	 */
+	protected void dump(final StringBuilder sb) {
+
+		// used to remember the visited pages by their addresses (when non-NULL)
+		final Set<Long/* addrs */> visitedAddrs = new LinkedHashSet<Long>();
+
+		// used to remember the visited pages when they are transient.
+		final Map<AbstractHashPage/* children */, Integer/* label */> visitedChildren = new LinkedHashMap<AbstractHashPage, Integer>();
+
+		// used to format the address table.
+		final Formatter f = new Formatter(sb);
+
+		// scan through the address table.
+		for (int index = 0; index < addressMap.length; index++) {
+
+			boolean visited = false;
+
+			long addr = addressMap[index];
+
+			if (addr != NULL && !visitedAddrs.add(addr)) {
+
+				visited = true;
+
+			}
+
+			HashBucket b = (HashBucket) (childRefs[index]).get();
+
+			if (b != null && visitedChildren.containsKey(b)) {
+
+				visited = true;
+
+			} else {
+				
+				visitedChildren.put(b, index);
+				
+			}
+
+			if(b == null) {
+				
+				// materialize the bucket.
+				b = getBucketFromEntryIndex(index);
+				
+				addr = b.getIdentity();
+				
+			}
+			
+			/*
+			 * The label will be either the storage address followed by "P" (for
+			 * Persistent) or the index of the directory entry followed by "T"
+			 * (for Transient).
+			 */
+			final String label = addr == 0L ? (visitedChildren.get(b) + "T")
+					: (addr + "P");
+			
+			f.format("\n%2d [%" + globalHashBits + "s] => (%8s)", index,
+					Integer.toBinaryString(HashTree.maskOff(index,
+							globalHashBits)), label);
+
+			if (!visited) {
+			
+				/*
+				 * Show the bucket details the first time we visit it.
+				 */
+				
+				// The #of local hash bits for the target page.
+				final int localHashBits = b.getLocalHashBits();
+				
+				// The #of entries in this directory for that target page.
+				final int nrefs = HashTree.pow2(globalHashBits
+						- localHashBits);
+				
+				sb.append(" [k=" + b.getLocalHashBits() + ", n=" + nrefs
+						+ "] {");
+
+				final Iterator<Integer> eitr = b.getEntries();
+
+				boolean first = true;
+				
+				while(eitr.hasNext()) {
+
+					if (!first)
+						sb.append(", ");
+					
+					sb.append(eitr.next()/*.getObject()*/);
+				
+					first = false;
+					
+				}
+				
+				sb.append("}");
+				
+			}
+			
+		}
+
+		sb.append('\n');
+
+	}
+	
+	/**
+	 * Create a new mutable directory page.
+	 * 
+	 * @param htbl
+	 * @param initialCapacity
+	 *            The initial capacity is the #of buckets which may be stored in
+	 *            the hash table before it must be resized. It is expressed in
+	 *            buckets and not tuples because there is not (in general) a
+	 *            fixed relationship between the size of a bucket and the #of
+	 *            tuples which can be stored in that bucket. This will be
+	 *            rounded up to the nearest power of two.
+	 * @param maximumCapacity
+	 * @param bucketSize
+	 * 
+	 * @todo both maximumCapacity and bucketSize will go away. The maximum
+	 *       capacity will be replaced by a decision function for splitting the
+	 *       directory page. The bucketSize will be replaced by a decision
+	 *       function for splitting, overflowing, or growing the bucket page.
+	 */
+	@SuppressWarnings("unchecked")
+	protected HashDirectory(final HashTree htbl,
+			final int initialCapacity, final int maximumCapacity,
+			final int bucketSize) {
+
+		super(htbl, true /* dirty */);
+
+		if (initialCapacity <= 0)
+			throw new IllegalArgumentException();
+
+		if (maximumCapacity < initialCapacity)
+			throw new IllegalArgumentException();
+
+		this.maximumCapacity = maximumCapacity;
+		
+		/*
+		 * Setup the hash table given the ini...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[3991] branches/QUADS_QUERY_BRANCH/bigdata/src

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3991] branches/QUADS_QUERY_BRANCH/bigdata/src