[Bigdata-commit] SF.net SVN: bigdata:[3993] branches/QUADS_QUERY_BRANCH/bigdata/src

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3993
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3993&view=rev
Author:   thompsonbry
Date:     2010-12-03 22:26:40 +0000 (Fri, 03 Dec 2010)

Log Message:
-----------
Continued work on the HTree, especially on the interface, persistent data record, and mutable data record for the hash bucket.

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java

Added Paths:
-----------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java

Removed Paths:
-------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -35,6 +35,8 @@
 import java.io.IOException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 import org.apache.log4j.Logger;
 
@@ -368,7 +370,7 @@
 			if (nkeys > 0) {
 
 				final int byteLength = BytesUtil
-						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */);
 
 				final byte[] a = new byte[byteLength];
 
@@ -443,22 +445,45 @@
         return encodeLive(leaf, buf).data();
 
     }
-    
-    /**
-     * A read-only view of the data for a B+Tree leaf based on a compact record
-     * format. While some fields are cached, for the most part the various data
-     * fields, including the keys and values, are accessed in place in the data
-     * record in order to minimize the memory footprint of the leaf. The keys and
-     * values are coded using a caller specified {@link IRabaCoder}. The specific
-     * coding scheme is specified by the {@link IndexMetadata} for the B+Tree
-     * instance and is not stored within the leaf data record.
-     * <p>
-     * Note: The leading byte of the record format codes for a leaf, a double-linked
-     * leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}.
-     * 
-     * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
-     * @version $Id$
-     */
+
+	/**
+	 * A read-only view of the data for a B+Tree leaf based on a compact record
+	 * format. While some fields are cached, for the most part the various data
+	 * fields, including the keys and values, are accessed in place in the data
+	 * record in order to minimize the memory footprint of the leaf. The keys
+	 * and values are coded using a caller specified {@link IRabaCoder}. The
+	 * specific coding scheme is specified by the {@link IndexMetadata} for the
+	 * B+Tree instance and is not stored within the leaf data record. The use of
+	 * prefix coding for keys is a good general choices, but should not be used
+	 * in combination with a hash tree unless an order preserving hashing
+	 * function is being used.
+	 * <p>
+	 * Note: The leading byte of the record format codes for a leaf, a
+	 * double-linked leaf or a node in a manner which is compatible with
+	 * {@link ReadOnlyNodeData}.
+	 * <p>
+	 * The {@link DefaultLeafCoder} automatically maintains hash values for keys
+	 * for an {@link IBucketData} record. The hash values of the keys in the
+	 * bucket will have a shared prefix (the MSB hash prefix) which corresponds
+	 * to the globalDepth of the path through the hash tree leading to this
+	 * bucket less the localDepth of this bucket. It is therefore possible to
+	 * store only the LSB bits of the hash values in the page and reconstruct
+	 * the hash values using the MSB bits from the path through the hash tree.
+	 * In order to be able to reconstruct the full hash code key based solely on
+	 * local information, the MSB bits can be written out once and the LSB bits
+	 * can be written out once per tuple. Testing the hash value of a key may
+	 * then be done considering only the LSB bits of the hash value. This
+	 * storage scheme also has the advantage that the hash value is not
+	 * restricted to an int32 and is therefore compatible with the use of
+	 * cryptographic hash functions. (If hash values are stored in a B+Tree leaf
+	 * they will not shared this prefix property and can not be compressed in
+	 * this manner).
+	 * 
+	 * @author <a href="mailto:tho...@us...">Bryan
+	 *         Thompson</a>
+	 * @version $Id: DefaultLeafCoder.java 3991 2010-12-03 18:48:02Z thompsonbry
+	 *          $
+	 */
     static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData>
             implements ILeafData, IBucketData {
 
@@ -645,7 +670,7 @@
 				O_hashKeys = pos;
 
 				final int byteLength = BytesUtil
-						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */);
 
 				if (nkeys > 0) {
 					
@@ -804,7 +829,7 @@
 				O_hashKeys = pos;
 
 				final int byteLength = BytesUtil
-						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */);
+						.bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */);
 
 				if (nkeys > 0) {
 					
@@ -994,7 +1019,7 @@
 			final int lengthMSB = 32/* hashBitLength */- lengthLSB;
 
 			final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB
-					+ nkeys * lengthMSB/* nbits */);
+					+ (nkeys * lengthLSB)/* nbits */);
 
 			final InputBitStream ibs = b.slice(O_hashKeys, byteLength)
 					.getInputBitStream();
@@ -1018,8 +1043,102 @@
         	}
         	
         }
-        
-        final public IRaba getKeys() {
+
+		public Iterator<Integer> hashIterator(final int h) {
+			
+			return new HashMatchIterator(h);
+			
+		}
+
+		/**
+		 * Visits the index of each bucket entry having a matching hash code.
+		 * 
+		 * @todo a trie over the hash entries would provide much faster search.
+		 */
+		private class HashMatchIterator implements Iterator<Integer> {
+			
+			private final int h;
+			private final int lengthMSB;
+			private final InputBitStream ibs;
+			private int currentIndex = 0;
+			private Integer nextResult = null;
+
+			private HashMatchIterator(final int h) {
+
+				this.h = h;
+				
+				lengthMSB = 32/* hashBitLength */- lengthLSB;
+
+				final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB
+						+ (nkeys * lengthLSB)/* nbits */);
+
+				ibs = b.slice(O_hashKeys, byteLength)
+						.getInputBitStream();
+
+			}
+
+			public boolean hasNext() {
+
+				final int n = getKeyCount();
+
+				while (nextResult == null && currentIndex < n) {
+
+					final int index = currentIndex++;
+					
+					int h1;
+					try {
+
+						// We do not need to re-position the ibs.
+//						final long position = lengthMSB + currentIndex
+//								* lengthLSB;
+//						ibs.position(position);
+
+						h1 = ibs.readInt(lengthLSB);
+
+						h1 |= hashMSB;
+
+					} catch (IOException ex) {
+
+						throw new RuntimeException(ex);
+
+					}
+
+					if (h1 == h) {
+
+						nextResult = Integer.valueOf(index);
+
+						break;
+
+					}
+					
+				}
+
+				return nextResult != null;
+				
+			}
+
+			public Integer next() {
+				
+				if (!hasNext())
+					throw new NoSuchElementException();
+				
+				final Integer tmp = nextResult;
+				
+				nextResult = null;
+				
+				return tmp;
+				
+			}
+
+			public void remove() {
+				
+				throw new UnsupportedOperationException();
+				
+			}
+
+		}
+
+		final public IRaba getKeys() {
             
             return keys;
             

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -32,6 +32,7 @@
 
 import com.bigdata.btree.IOverflowHandler;
 import com.bigdata.btree.IndexSegment;
+import com.bigdata.btree.data.DefaultLeafCoder;
 import com.bigdata.btree.data.IAbstractNodeDataCoder;
 import com.bigdata.btree.data.ILeafData;
 import com.bigdata.btree.raba.IRaba;
@@ -46,6 +47,11 @@
  * @todo The hash of the key should be part of the ITuple interface so it can be
  *       passed along based on the application level encoding of the key.
  * 
+ * @todo Consider organizing the hash values of the keys in the page using a
+ *       trie for faster lookup. This could be done when they are serialized (in
+ *       which case this decision disappears into the {@link DefaultLeafCoder})
+ *       or dynamically.
+ * 
  * @todo Support out-of-line representations of the key and/or value for a tuple
  *       when they are large. The definition of "large" can be a configuration
  *       value for the index metadata. For example, 1/4 of the target page size
@@ -124,8 +130,19 @@
 	 * 
 	 * @todo IRaba keys plus IRaba vals.
 	 */
-	final int[] data;
+	final int[] entries;
 
+	/**
+	 * The data record. A mutable is used for all mutation operations. The data
+	 * record is replaced by a read-only record used when the hash bucket is
+	 * made persistent. A read-only data record is automatically converted into
+	 * a mutable record when a mutation operation is requested.
+	 * <p>
+	 * Note: This is package private in order to expose it to
+	 * {@link HashDirectory}.
+	 */
+	private IBucketData data;
+
 	protected void setLocalHashBits(final int localHashBits) {
 		
 		this.localHashBits = localHashBits;
@@ -133,7 +150,9 @@
 	}
 
 	public int getLocalHashBits() {
+
 		return localHashBits;
+		
 	}
 
 	/**
@@ -148,7 +167,7 @@
 		for (int i = 0; i < size; i++) {
 			if (i > 0)
 				sb.append(',');
-			sb.append(Integer.toString(data[i]));
+			sb.append(Integer.toString(entries[i]));
 		}
 		sb.append("}}");
 		return sb.toString();
@@ -174,7 +193,7 @@
 
 		this.localHashBits = localHashBits;
 
-		this.data = new int[bucketSize];
+		this.entries = new int[bucketSize];
 
 		// one more bucket.
 		htbl.nbuckets++;
@@ -200,7 +219,7 @@
 
 		for (int i = 0; i < size; i++) {
 
-			if (data[i] == key)
+			if (entries[i] == key)
 				return true;
 
 		}
@@ -287,7 +306,7 @@
 	 */
 	public void insert(final int h, final int key) {
 
-		if (size == data.length) {
+		if (size == entries.length) {
 
 			/*
 			 * The bucket must be split, potentially recursively.
@@ -375,7 +394,7 @@
 			
 		}
 
-		data[size++] = key;
+		entries[size++] = key;
 
         // one more entry in the index.
         htbl.nentries++;
@@ -397,7 +416,7 @@
 
 		for (int i = 0; i < size; i++) {
 
-			if (data[i] == key) {
+			if (entries[i] == key) {
 
 				// #of tuples remaining beyond this point.
 				final int length = size - i - 1;
@@ -405,7 +424,7 @@
 				if (length > 0) {
 
 					// Keep the array dense by copying down by one.
-					System.arraycopy(data, i + 1/* srcPos */, data/* dest */,
+					System.arraycopy(entries, i + 1/* srcPos */, entries/* dest */,
 							i/* destPos */, length);
 
 				}
@@ -461,7 +480,7 @@
 
 		@Override
 		public Integer next() {
-			return data[current++];
+			return entries[current++];
 		}
 
 		@Override
@@ -568,14 +587,12 @@
 	 * IBucketData
 	 */
 	
-	public int getHash(int index) {
-		// TODO Auto-generated method stub
-		return 0;
+	public int getHash(final int index) {
+		return data.getHash(index);
 	}
 
 	public int getLengthMSB() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getLengthMSB();
 	}
 	
 	/*
@@ -583,49 +600,39 @@
 	 */
 	
 	public boolean hasVersionTimestamps() {
-		// TODO Auto-generated method stub
-		return false;
+		return data.hasVersionTimestamps();
 	}
 
 	public AbstractFixedByteArrayBuffer data() {
-		// TODO Auto-generated method stub
-		return null;
+		return data.data();
 	}
 
 	public int getKeyCount() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getKeyCount();
 	}
 
 	public IRaba getKeys() {
-		// TODO Auto-generated method stub
-		return null;
+		return data.getKeys();
 	}
 
 	public long getMaximumVersionTimestamp() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getMaximumVersionTimestamp();
 	}
 
 	public long getMinimumVersionTimestamp() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getMinimumVersionTimestamp();
 	}
 
 	public int getSpannedTupleCount() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getKeyCount();
 	}
 
 	public boolean isCoded() {
-		// TODO Auto-generated method stub
-		return false;
+		return data.isCoded();
 	}
 
 	final public boolean isLeaf() {
-		
 		return true;
-		
 	}
 
 	/**
@@ -635,55 +642,43 @@
 	 * {@link IBucketData} is automatically converted into a mutable instance.
 	 */
     final public boolean isReadOnly() {
-        
-//        return data.isReadOnly();
-		// TODO Auto-generated method stub
-		return false;
-        
+        return data.isReadOnly();
     }
 
     /*
 	 * ILeafData
 	 */
 	
-	public boolean getDeleteMarker(int index) {
-		// TODO Auto-generated method stub
-		return false;
+	public boolean getDeleteMarker(final int index) {
+		return data.getDeleteMarker(index);
 	}
 
 	public long getNextAddr() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getNextAddr();
 	}
 
 	public long getPriorAddr() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getPriorAddr();
 	}
 
 	public int getValueCount() {
-		// TODO Auto-generated method stub
-		return 0;
+		return data.getValueCount();
 	}
 
 	public IRaba getValues() {
-		// TODO Auto-generated method stub
-		return null;
+		return data.getValues();
 	}
 
-	public long getVersionTimestamp(int index) {
-		// TODO Auto-generated method stub
-		return 0;
+	public long getVersionTimestamp(final int index) {
+		return data.getVersionTimestamp(index);
 	}
 
 	public boolean hasDeleteMarkers() {
-		// TODO Auto-generated method stub
-		return false;
+		return data.hasDeleteMarkers();
 	}
 
 	public boolean isDoubleLinked() {
-		// TODO Auto-generated method stub
-		return false;
+		return data.isDoubleLinked();
 	}
 
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -775,7 +775,7 @@
 			// Adjust the #of local bits to be considered.
 			bold.setLocalHashBits(bold.getLocalHashBits() + 1);
 			// The new bucket.
-			bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.data.length/* bucketSize */);
+			bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.entries.length/* bucketSize */);
 //			// The address for the new bucket.
 //			final int addrBNew = htbl.buckets.size();
 			// Add to the chain of buckets.
@@ -862,7 +862,7 @@
 		{
 			// the new bucket.
 			bnew = new HashBucket(htbl, bold.getLocalHashBits() + 1,
-					bold.data.length/* bucketSize */);
+					bold.entries.length/* bucketSize */);
 //			// Add to the chain of buckets.
 //			htbl.buckets.add(bnew);
 		}

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -528,8 +528,7 @@
 	 *       have access to the store or ITuple will have to have indirection
 	 *       support.
 	 */
-	public HashTree(final int initialCapacity,
-			final int bucketSize) {
+	public HashTree(final int initialCapacity, final int bucketSize) {
 
 		// @todo pass in the store reference per AbstractBTree.
 		this.store = new SimpleMemoryRawStore();

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -0,0 +1,32 @@
+package com.bigdata.htree;
+
+import com.bigdata.btree.ITuple;
+
+/**
+ * Extended interface to support hash buckets.
+ * 
+ * @author tho...@us...
+ * 
+ * @param <E>
+ * 
+ * @todo The reason for having this on ITuple is to make it practical for the
+ *       hash code to be defined in terms of application specific data types
+ *       rather than the unsigned byte[] key (but the latter could of course be
+ *       decoded by the hash function before computing the hash of the
+ *       application data type, except for things like Unicode keys).
+ *       <p>
+ *       This should probably be lifted onto {@link ITuple} and
+ *       {@link #getKeyHash()} should be declared to throw an
+ *       {@link UnsupportedOperationException} if the hash code of the key is
+ *       not being stored.
+ */
+public interface IHashTuple<E> extends ITuple<E> {
+
+//	int getHashBitLength();
+	
+	/**
+	 * The int32 hash value of the key.
+	 */
+	int getKeyHash();
+	
+}

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -0,0 +1,617 @@
+/*
+
+Copyright (C) SYSTAP, LLC 2006-2008.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Aug 25, 2009
+ */
+
+package com.bigdata.htree;
+
+import java.util.Iterator;
+
+import cern.colt.map.OpenIntIntHashMap;
+
+import com.bigdata.btree.ITuple;
+import com.bigdata.btree.MutableLeafData;
+import com.bigdata.btree.raba.IRaba;
+import com.bigdata.htree.data.IBucketData;
+import com.bigdata.io.AbstractFixedByteArrayBuffer;
+import com.bigdata.io.ByteArrayBuffer;
+import com.bigdata.io.IDataRecord;
+import com.bigdata.rawstore.Bytes;
+
+/**
+ * Implementation maintains Java objects corresponding to the persistent data
+ * and defines methods for a variety of mutations on the {@link IBucketData}
+ * record which operate by direct manipulation of the Java objects.
+ * <p>
+ * Note: package private fields are used so that they may be directly accessed
+ * by the {@link HashBucket} class.
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id: MutableLeafData.java 2265 2009-10-26 12:51:06Z thompsonbry $
+ * 
+ * @todo Consider mutable implementation based on a compacting record ala GOM.
+ *       This is especially attractive for the hash tree. The implementation
+ *       would have to be wholly different from the {@link MutableLeafData}
+ *       class. Instead of managing the {@link IRaba} for the keys and values
+ *       separately, each {@link ITuple} would be appended into a byte[] (or
+ *       {@link IDataRecord}). There would be a budget for that backing buffer
+ *       which is the maximum in-memory size of the bucket. An index would
+ *       provide random access into the buffer for only those entries which are
+ *       "live" and a counter is maintain of the #of entries in the buffer which
+ *       are no longer in use. When the buffer capacity is reached, the buffer
+ *       is compacted by copying all of the entries accessible from the index
+ *       into a new buffer and the old buffer is released.
+ *       <p>
+ *       Records which are too large for the buffer should be moved out of line.
+ *       <p>
+ *       This can be used in combination with a dynamically maintained trie for
+ *       fast hash lookup, or we could just scan the entries.
+ *       <p>
+ *       Lexicon key search can scan the entries using the index. Scanning can
+ *       have a side-effect in which the position of the entry offsets in the
+ *       index is swapped if the keys are out of order. This would give us
+ *       MonetDB style "cracking". The keys would have to be put into full order
+ *       no later than when the record is made persistent.
+ *       <p>
+ *       Even though mutation is not thread safe, compacting the data record
+ *       must not cause the assignment of indices to tuples to change when the
+ *       caller is iterating through the entries by index.
+ * 
+ * @todo When the record is serialized, do we need to allow a decision function
+ *       to examine the record and decide whether it must be split? Since we do
+ *       not have a fixed target for the page size, but only a budget, and since
+ *       compression of keys, values, metadata, and the encoded record can all
+ *       be applied, it seems that the decision function should be in terms of
+ *       the buffer budget and a maximum #of entries (e.g., B+Tree branching
+ *       factor or an equivalent hash bucket threshold).
+ */
+public class MutableBucketData implements IBucketData {
+
+	private IDataRecord buf;
+
+	private /*@todo final*/ OpenIntIntHashMap index;
+	
+	/**
+	 * Constructor used when converting a persistent data record into a mutable
+	 * one.
+	 * 
+	 * @param data
+	 */
+	public MutableBucketData(final IBucketData data) {
+		
+	}
+
+	/**
+	 * 
+	 * @param bufferSize
+	 *            The initial size of the backing byte[].
+	 * @param branchingFactor
+	 *            The maximum #of tuples which may be stored in the data record.
+	 * 
+	 * @todo The buffer must be permitted grow until it is sufficient to encode
+	 *       approximately one page worth of tuples.
+	 *       <p>
+	 *       is typically on the order of the size of a page, e.g., 4k. Since
+	 *       the data record will be encoded and possible compressed before it
+	 *       is written onto the store, this can be larger than the target.
+	 *       <p>
+	 *       In order to avoid problems where the objects are much smaller than
+	 *       expected, we should allow the backing buffer to grow or we should
+	 *       fit a model which estimates the size of the resulting page based on
+	 *       the size of the buffer and then grow the buffer until we can
+	 *       satisfy the target page size.
+	 */
+	public MutableBucketData(final int bufferSize, final int branchingFactor) {
+
+		final int initialBufferSize = Math
+				.min(Bytes.kilobyte32 * 4, bufferSize);
+
+		buf = new ByteArrayBuffer(initialBufferSize);
+
+		index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */);
+
+	}
+	
+	@Override
+	public int getHash(int index) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public int getKeyCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public int getLengthMSB() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public Iterator<Integer> hashIterator(int h) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public boolean getDeleteMarker(int index) {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public long getNextAddr() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public long getPriorAddr() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public int getValueCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public IRaba getValues() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public long getVersionTimestamp(int index) {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public boolean hasDeleteMarkers() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public boolean hasVersionTimestamps() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public boolean isDoubleLinked() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public AbstractFixedByteArrayBuffer data() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public IRaba getKeys() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public long getMaximumVersionTimestamp() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public long getMinimumVersionTimestamp() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public int getSpannedTupleCount() {
+		// TODO Auto-generated method stub
+		return 0;
+	}
+
+	@Override
+	public boolean isCoded() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public boolean isLeaf() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+
+	@Override
+	public boolean isReadOnly() {
+		// TODO Auto-generated method stub
+		return false;
+	}
+	
+//	/**
+//	 * The keys for the entries in the bucket. Unlike a B+Tree, the keys are NOT
+//	 * maintained in a sorted order. Search proceeds by scanning for matching
+//	 * hash codes and filtering for matching keys.
+//	 */
+//    final MutableKeyBuffer keys;
+//
+//	/**
+//	 * The values for the entries in the bucket. There is one value per key. The
+//	 * value MAY be null.
+//	 */
+//    final MutableValueBuffer vals;
+//
+//    /**
+//     * The deletion markers IFF isolation is supported by the {@link HTree}.
+//     */
+//    final boolean[] deleteMarkers;
+//    
+//    /**
+//     * The version timestamps IFF isolation is supported by the {@link HTree}.
+//     */
+//    final long[] versionTimestamps;
+//
+//    /**
+//     * The minimum version timestamp.
+//     * 
+//     * @todo these fields add 16 bytes to each {@link MutableBucketData} object
+//     *       even when we do not use them. It would be better to use a subclass
+//     *       or tack them onto the end of the {@link #versionTimestamps} array.
+//     */
+//    long minimumVersionTimestamp;
+//    long maximumVersionTimestamp;
+//
+//	/**
+//	 * Create an empty data record with internal arrays dimensioned for the
+//	 * specified branching factor.
+//	 * 
+//	 * @param branchingFactor
+//	 *            The maximum #of entries in the hash bucket before it will
+//	 *            overflow or be split.  Since the goal is to manage the size
+//	 *            of the bucket on the disk and since we do not known the size
+//	 *            of the bucket's data record until it is being evicted, this
+//	 *            value places an upper bound after which the bucket will be
+//	 * @param hasVersionTimestamps
+//	 *            <code>true</code> iff version timestamps will be maintained.
+//	 * @param hasDeleteMarkers
+//	 *            <code>true</code> iff delete markers will be maintained.
+//	 */
+//    public MutableBucketData(final int branchingFactor,
+//            final boolean hasVersionTimestamps, final boolean hasDeleteMarkers) {
+//
+//        keys = new MutableKeyBuffer(branchingFactor + 1);
+//
+//        vals = new MutableValueBuffer(branchingFactor + 1);
+//
+//        versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor + 1]
+//                : null);
+//
+//        // init per API specification.
+//        minimumVersionTimestamp = Long.MAX_VALUE;
+//        maximumVersionTimestamp = Long.MIN_VALUE;
+//        
+//        deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor + 1]
+//                : null);
+//
+//    }
+//
+//    /**
+//     * Copy ctor.
+//     * 
+//     * @param branchingFactor
+//     *            The branching factor for the owning B+Tree.
+//     * @param src
+//     *            The source leaf.
+//     */
+//    public MutableBucketData(final int branchingFactor, final ILeafData src) {
+//
+//        keys = new MutableKeyBuffer(branchingFactor + 1, src.getKeys());
+//
+//        vals = new MutableValueBuffer(branchingFactor + 1, src.getValues());
+//
+//        versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor + 1]
+//                : null);
+//
+//        deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor + 1]
+//                : null);
+//
+//        final int nkeys = keys.size();
+//        
+//        if (versionTimestamps != null) {
+//
+//            for (int i = 0; i < nkeys; i++) {
+//
+//                versionTimestamps[i] = src.getVersionTimestamp(i);
+//                
+//            }
+//
+//            minimumVersionTimestamp = src.getMinimumVersionTimestamp();
+//
+//            maximumVersionTimestamp = src.getMaximumVersionTimestamp();
+//
+//        } else {
+//
+//            minimumVersionTimestamp = Long.MAX_VALUE;
+//
+//            maximumVersionTimestamp = Long.MIN_VALUE;
+//
+//
+//        }
+//        
+//        if (deleteMarkers != null) {
+//
+//            for (int i = 0; i < nkeys; i++) {
+//
+//                deleteMarkers[i] = src.getDeleteMarker(i);
+//                
+//            }
+//            
+//        }
+//
+//    }
+//
+//    /**
+//     * Ctor based on just "data" -- used by unit tests.
+//     * 
+//     * @param keys
+//     *            A representation of the defined keys in the leaf.
+//     * @param values
+//     *            An array containing the values found in the leaf.
+//     * @param versionTimestamps
+//     *            An array of the version timestamps (iff the version metadata
+//     *            is being maintained).
+//     * @param deleteMarkers
+//     *            An array of the delete markers (iff the version metadata is
+//     *            being maintained).
+//     */
+//    public MutableBucketData(final MutableKeyBuffer keys,
+//            final MutableValueBuffer values, final long[] versionTimestamps,
+//            final boolean[] deleteMarkers) {
+//        
+//        assert keys != null;
+//        assert values != null;
+//        assert keys.capacity() == values.capacity();
+//        if (versionTimestamps != null) {
+//            assert versionTimestamps.length == keys.capacity();
+//        }
+//        if (deleteMarkers != null) {
+//            assert deleteMarkers.length == keys.capacity();
+//        }
+//
+//        this.keys = keys;
+//        this.vals = values;
+//        this.versionTimestamps = versionTimestamps;
+//        this.deleteMarkers = deleteMarkers;
+//
+//        if (versionTimestamps != null)
+//            recalcMinMaxVersionTimestamp();
+//
+//    }
+//    
+//    /**
+//     * Range check a tuple index.
+//     * 
+//     * @param index
+//     *            The index of a tuple in [0:nkeys].
+//     * @return <code>true</code>
+//     * 
+//     * @throws IndexOutOfBoundsException
+//     *             if the index is not in the legal range.
+//     */
+//    final protected boolean rangeCheckTupleIndex(final int index) {
+//        
+//        if (index < 0 || index > getKeys().size())
+//            throw new IndexOutOfBoundsException();
+//
+//        return true;
+//        
+//    }
+//    
+//    /**
+//     * No - this is a mutable data record.
+//     */
+//    final public boolean isReadOnly() {
+//        
+//        return false;
+//        
+//    }
+//
+//    /**
+//     * No.
+//     */
+//    final public boolean isCoded() {
+//        
+//        return false;
+//        
+//    }
+//    
+//    final public AbstractFixedByteArrayBuffer data() {
+//        
+//        throw new UnsupportedOperationException();
+//        
+//    }
+//
+//    public final long getVersionTimestamp(final int index) {
+//
+//        if (versionTimestamps == null)
+//            throw new UnsupportedOperationException();
+//
+//        assert rangeCheckTupleIndex(index);
+//        
+//        return versionTimestamps[index];
+//
+//    }
+//    
+//    final public long getMinimumVersionTimestamp() {
+//
+//        if (versionTimestamps == null)
+//            throw new UnsupportedOperationException();
+//
+//        return minimumVersionTimestamp;
+//
+//    }
+//    
+//    final public long getMaximumVersionTimestamp() {
+//
+//        if (versionTimestamps == null)
+//            throw new UnsupportedOperationException();
+//
+//        return maximumVersionTimestamp;
+//
+//    }
+//
+//    public final boolean getDeleteMarker(final int index) {
+//
+//        if (deleteMarkers == null)
+//            throw new UnsupportedOperationException();
+//
+//        assert rangeCheckTupleIndex(index);
+//
+//        return deleteMarkers[index];
+//
+//    }
+//
+//    final public IRaba getValues() {
+//        
+//        return vals;
+//        
+//    }
+//    
+//    final public IRaba getKeys() {
+//        
+//        return keys;
+//        
+//    }
+//    
+//    /**
+//     * Always returns <code>true</code>.
+//     */
+//    final public boolean isLeaf() {
+//
+//        return true;
+//
+//    }
+//
+//    /**
+//     * For a leaf the #of entries is always the #of keys.
+//     */
+//    final public int getSpannedTupleCount() {
+//        
+//        return getKeys().size();
+//        
+//    }
+//    
+//    final public int getValueCount() {
+//        
+//        return vals.size();
+//        
+//    }
+//    
+//    final public boolean hasDeleteMarkers() {
+//        
+//        return deleteMarkers != null; 
+//        
+//    }
+//    
+//    final public boolean hasVersionTimestamps() {
+//        
+//        return versionTimestamps != null; 
+//        
+//    }
+//
+//    final public int getKeyCount() {
+//        
+//        return keys.size();
+//        
+//    }
+//
+//    /**
+//     * No - this class does not support double-linked leaves (only the
+//     * {@link IndexSegment} actually uses double-linked leaves).
+//     */
+//    final public boolean isDoubleLinked() {
+//     
+//        return false;
+//
+//    }
+//
+//    final public long getNextAddr() {
+//    
+//        throw new UnsupportedOperationException();
+//        
+//    }
+//
+//    final public long getPriorAddr() {
+//        
+//        throw new UnsupportedOperationException();
+//        
+//    }
+//
+//    /**
+//     * Recalculate the min/max version timestamp on the leaf. The caller is
+//     * responsible for propagating the new min/max to the ancestors of the leaf.
+//     * 
+//     * @throws UnsupportedOperationException
+//     *             if the leaf is not maintaining per-tuple version timestamps.
+//     */
+//    void recalcMinMaxVersionTimestamp() {
+//
+//        // must be maintaining version timestamps.
+//        if (versionTimestamps == null)
+//            throw new UnsupportedOperationException();
+//
+//        final int nkeys = keys.nkeys;
+//
+//        long min = Long.MAX_VALUE;
+//        long max = Long.MIN_VALUE;
+//
+//        for (int i = 0; i < nkeys; i++) {
+//
+//            final long t = versionTimestamps[i];
+//
+//            if (t < min)
+//                min = t;
+//
+//            if (t > max)
+//                max = t;
+//
+//        }
+//
+//        minimumVersionTimestamp = min;
+//        maximumVersionTimestamp = max;
+//
+//    }
+
+}

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -26,40 +26,23 @@
  */
 package com.bigdata.htree.data;
 
-import com.bigdata.btree.IOverflowHandler;
-import com.bigdata.btree.IndexSegment;
-import com.bigdata.btree.data.IAbstractNodeDataCoder;
+import java.util.Iterator;
+
 import com.bigdata.btree.data.ILeafData;
-import com.bigdata.btree.raba.IRaba;
-import com.bigdata.rawstore.IRawStore;
 
 /**
- * Interface for the data record of a hash bucket.
+ * Interface for the data record of a hash bucket. The hash bucket extends the
+ * B+Tree leaf data record interface. A hash bucket page may be shared by
+ * multiple directory entries (this is one of the principle tenants of
+ * extendible hashing). However, the bucket is just a bucket to each such
+ * directory entry. There is no sense of offset addressing into the shared
+ * bucket.
  * <p>
- * The hash bucket extends the B+Tree leaf node page. However, hash buckets must
- * have the HASH_KEYS flag enabled and SHOULD NOT use prefix compression unless
- * (a) an order preserving hash function is used; and (b) the tuples are in key
- * order within the page.
- * <p>
- * The hash values of the keys in the bucket will have a shared prefix (when
- * using an MSB hash prefix) which corresponds to the globalDepth of the path
- * through the hash tree leading to this bucket less the localDepth of this
- * bucket. It is therefore possible (in principle) to store only the LSB bits of
- * the hash values in the page and reconstruct the hash values using the MSB
- * bits from the path through the hash tree. In order to be able to reconstruct
- * the full hash code key based solely on local information, the MSB bits can be
- * written out once and the LSB bits can be written out once per tuple. Testing
- * the hash value of a key may then be done considering only the LSB bits of the
- * hash value. This storage scheme also has the advantage that the hash value is
- * not restricted to an int32 and is therefore compatible with the use of
- * cryptographic hash functions. (If hash values are stored in a B+Tree leaf
- * they will not shared this prefix property and can not be compressed in this
- * manner).
- * <p>
  * The {@link ILeafData#getPriorAddr()} and {@link ILeafData#getNextAddr()}
  * fields of the {@link ILeafData} record are reserved by the hash tree to
  * encode the search order for range queries when used in combination with an
  * order preserving hash function.
+ * <p>
  * 
  * @author tho...@us...
  */
@@ -72,10 +55,11 @@
 //	 */
 //	int getLocalDepth();
 
-//	/**
-//	 * The total bit length of the hash values of the keys.
-//	 */
-//	int getHashBitLength();
+	/**
+	 * Return the #of entries in the hash bucket (all keys, not just the
+	 * distinct keys).
+	 */
+	int getKeyCount();
 	
 	/**
 	 * The length (in bits) of the MSB prefix shared by the hash values of the
@@ -91,6 +75,29 @@
 	 * @return The hash value of that key.
 	 */
 	int getHash(int index);
+
+	/**
+	 * Return an {@link Iterator} which visits the index of each entry in the
+	 * hash bucket having the given hash code.
+	 * 
+	 * @param h
+	 *            The hash code.
+	 * 
+	 * @todo Note: There is a design tradeoff between autoboxing of the
+	 *       <code>int</code> index and allowing the {@link IBucketData} class
+	 *       to encapsulate the iterator pattern together with any setup which
+	 *       can be done once per scan for a given hash code. For example, using
+	 *       a BitInputStream. The iterator allows us to amortize the cost of
+	 *       that setup, but we pay for the autoboxing of the index values.
+	 *       However, autobox primitives tend to be quite cheap as they are
+	 *       rapidly reclaimed by GC.
+	 *       <p>
+	 *       It is possible to implement an extension interface which returns
+	 *       the [int] index without autoboxing. If this method signature is
+	 *       modified to return that interface then the implementation can avoid
+	 *       autoboxing.
+	 */
+	Iterator<Integer> hashIterator(int h);
 	
 //	/**
 //	 * The storage address of the last overflow page in the overflow chain.

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -32,6 +32,8 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.TreeMap;
@@ -53,6 +55,7 @@
 import com.bigdata.btree.raba.IRaba;
 import com.bigdata.btree.raba.codec.RandomKeysGenerator;
 import com.bigdata.cache.HardReferenceQueue;
+import com.bigdata.htree.data.IBucketData;
 import com.bigdata.io.SerializerUtil;
 import com.bigdata.rawstore.Bytes;
 import com.bigdata.rawstore.IRawStore;
@@ -479,6 +482,12 @@
             
         }
 
+        if(n1 instanceof IBucketData) {
+
+			assertSameHashCodes((IBucketData) n1, (IBucketData) n2);
+	
+        }
+        
         assertSameRaba(n1.getValues(), n2.getValues());
         
     }
@@ -668,6 +677,68 @@
 
     }
 
+	/**
+	 * Verifies details for the {@link IBucketData} interface.
+	 * 
+	 * @param b1
+	 *            A hash bucket.
+	 * @param b2
+	 *            Another hash bucket.
+	 */
+    static public void assertSameHashCodes(final IBucketData b1, final IBucketData b2) {
+
+    	// The key and value counts must be the same.
+    	final int n = b1.getKeyCount();
+    	assertEquals("keyCount", n, b2.getKeyCount());
+    	assertEquals("valueCount", n, b1.getValueCount());
+    	assertEquals("valueCount", n, b2.getValueCount());
+
+		assertEquals("lengthMSB", b1.getLengthMSB(), b2.getLengthMSB());
+    	
+    	/*
+    	 * Verify that the same hash codes are reported at each index position.
+    	 */
+		for (int i = 0; i < n; i++) {
+
+			final int h1 = b1.getHash(i);
+
+			final int h2 = b2.getHash(i);
+			
+			if (h1 != h2) {
+			
+				assertEquals("getHash(" + i + ")", h1, h2);
+				
+			}
+
+		}
+		
+		/*
+		 * Now verify that the same hash matches are reported for each
+		 * visited hash code.
+		 */
+		for (int i = 0; i < n; i++) {
+
+			final int h1 = b1.getHash(i);
+
+			final List<Integer> indices = new LinkedList<Integer>();
+
+			final Iterator<Integer> eitr = b1.hashIterator(h1);
+
+			while (eitr.hasNext()) {
+
+				indices.add(eitr.next());
+
+			}
+
+			final Integer[] hashCodes = indices.toArray(new Integer[indices
+					.size()]);
+
+			assertSameIterator("hashCodes", hashCodes, b2.hashIterator(h1));
+
+    	}
+
+    }
+    
     /**
      * Special purpose helper used to vet {@link Node#childAddr}.
      * 
@@ -677,7 +748,7 @@
      * @param node
      *            The node.
      */
-    public void assertChildKeys(final long[] childAddr, final Node node) {
+    static public void assertChildKeys(final long[] childAddr, final Node node) {
 
         final int nChildAddr = childAddr.length;
         
@@ -720,7 +791,7 @@
      * @param node
      *            The node.
      */
-    public void assertKeys(final byte[][] keys, final AbstractNode<?> node) {
+    static public void assertKeys(final byte[][] keys, final AbstractNode<?> node) {
 
 //        // verify the capacity of the keys[] on the node.
 //        assertEquals("keys[] capacity", (node.maxKeys + 1) * stride,
@@ -763,7 +834,7 @@
      * @param node
      *            The node.
      */
-    public void assertEntryCounts(final int[] expected, final INodeData node) {
+    static public void assertEntryCounts(final int[] expected, final INodeData node) {
 
         final int len = expected.length;
         

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -27,8 +27,6 @@
 
 package com.bigdata.btree.data;
 
-
-import com.bigdata.btree.raba.IRaba;
 import com.bigdata.btree.raba.ReadOnlyKeysRaba;
 import com.bigdata.btree.raba.ReadOnlyValuesRaba;
 import com.bigdata.io.DataOutputBuffer;

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -0,0 +1,68 @@
+/**
+
+Copyright (C) SYSTAP, LLC 2006-2007.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package com.bigdata.htree;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Aggregates test suites into increasing dependency order.
+ *
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public class TestAll extends TestCase {
+
+    /**
+     * 
+     */
+    public TestAll() {
+    }
+
+    /**
+     * @param arg0
+     */
+    public TestAll(String arg0) {
+        super(arg0);
+    }
+
+    /**
+     * Returns a test that will run each of the implementation specific test
+     * suites in turn.
+     */
+    public static Test suite()
+    {
+
+        final TestSuite suite = new TestSuite("HTree");
+
+		suite.addTest(com.bigdata.htree.data.TestAll.suite());
+
+		suite.addTestSuite(TestExtensibleHashing.class);
+
+		return suite;
+
+	}
+
+}

Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -0,0 +1,49 @@
+package com.bigdata.htree.data;
+
+import com.bigdata.btree.data.AbstractLeafDataRecordTestCase;
+import com.bigdata.btree.data.ILeafData;
+import com.bigdata.btree.raba.IRaba;
+
+/**
+ * Abstract class for tests of {@link IBucketData} implementations.
+ */
+abstract public class AbstractHashBucketDataRecordTestCase extends
+		AbstractLeafDataRecordTestCase {
+
+	public AbstractHashBucketDataRecordTestCase() {
+
+		super();
+
+	}
+
+	public AbstractHashBucketDataRecordTestCase(String name) {
+
+		super(name);
+
+	}
+
+	protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals,
+			final boolean[] deleteMarkers, final long[] versionTimestamps) {
+
+		/*
+		 * Note: This computes the MSB prefix and the hash codes using the
+		 * standard Java semantics for the hash of a byte[]. In practice, the
+		 * hash value is normally computed from the key using an application
+		 * specified hash function.
+		 */
+		final int lengthMSB = 0;
+
+		final int[] hashCodes = new int[keys.size()];
+
+		for (int i = 0; i < hashCodes.length; i++) {
+
+			hashCodes[i] = keys.get(i).hashCode();
+
+		}
+
+		return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps,
+				lengthMSB, hashCodes);
+
+	}
+
+}

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -1,5 +1,10 @@
 package com.bigdata.htree.data;
 
+import it.unimi.dsi.fastutil.Hash;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
 import com.bigdata.btree.data.MockLeafData;
 import com.bigdata.btree.raba.IRaba;
 
@@ -72,4 +77,57 @@
 
 	}
 
+	public Iterator<Integer> hashIterator(final int h) {
+		
+		return new HashMatchIterator(h);
+		
+	}
+
+	/**
+	 * Visits the index of each bucket entry having a matching hash code.
+	 */
+	private class HashMatchIterator implements Iterator<Integer> {
+		
+		private final int h;
+		private int currentIndex = 0;
+		private Integer nextResult = null;
+
+		private HashMatchIterator(final int h) {
+			this.h = h;
+		}
+
+		public boolean hasNext() {
+			final int n = getKeyCount();
+			while (nextResult == null && currentIndex < n) {
+				final int index = currentIndex++;
+				final int h1 = getHash(index);
+				if (h1 == h) {
+					nextResult = Integer.valueOf(index);
+					break;
+				}
+			}
+			return nextResult != null;
+		}
+
+		public Integer next() {
+			
+			if (!hasNext())
+				throw new NoSuchElementException();
+			
+			final Integer tmp = nextResult;
+			
+			nextResult = null;
+			
+			return tmp;
+			
+		}
+
+		public void remove() {
+			
+			throw new UnsupportedOperationException();
+			
+		}
+
+	}
+
 }

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -67,6 +67,8 @@
 		/*
 		 * Test w/ all key and value coders suitable for leaves.
 		 * 
+		 * @todo test the mutable bucket data record
+		 * 
 		 * @todo test w/ linked-leaf (order preserving hash functions).
 		 * 
 		 * @todo test w/ out-of-line tuples (when too large for the page).

Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (from rev 3991, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java)
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java	                        (rev 0)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -0,0 +1,68 @@
+/*
+
+Copyright (C) SYSTAP, LLC 2006-2008.  All rights reserved.
+
+Contact:
+     SYSTAP, LLC
+     4501 Tower Road
+     Greensboro, NC 27410
+     lic...@bi...
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*
+ * Created on Aug 5, 2009
+ */
+
+package com.bigdata.htree.data;
+
+import com.bigdata.btree.data.DefaultLeafCoder;
+import com.bigdata.btree.data.ILeafData;
+import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder;
+
+/**
+ * Test suite for the HTree {@link ILeafData} records (accessing coded data in
+ * place).
+ * 
+ * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
+ * @version $Id$
+ */
+public class TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman extends
+		AbstractHashBucketDataRecordTestCase {
+
+	/**
+     * 
+     */
+	public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman() {
+	}
+
+	/**
+	 * @param name
+	 */
+	public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman(String name) {
+		super(name);
+	}
+
+	protected void setUp() throws Exception {
+
+		super.setUp();
+
+		coder = new DefaultLeafCoder(//
+				CanonicalHuffmanRabaCoder.INSTANCE,// keys
+				CanonicalHuffmanRabaCoder.INSTANCE // vals
+		);
+
+	}
+
+}

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -29,8 +29,6 @@
 
 import com.bigdata.btree.data.AbstractLeafDataRecordTestCase;
 import com.bigdata.btree.data.DefaultLeafCoder;
-import com.bigdata.btree.data.ILeafData;
-import com.bigdata.btree.raba.IRaba;
 import com.bigdata.btree.raba.codec.SimpleRabaCoder;
 
 /**
@@ -65,28 +63,4 @@
         
     }
 
-	protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals,
-			final boolean[] deleteMarkers, final long[] versionTimestamps) {
-
-		/*
-		 * Note: This computes the MSB prefix and the hash codes using the
-		 * standard Java semantics for the hash of a byte[]. In practice, the
-		 * hash value is normally computed from the key using an application
-		 * specified hash function.
-		 */
-		final int lengthMSB = 0;
-		
-		final int[] hashCodes = new int[keys.size()];
-		
-		for (int i = 0; i < hashCodes.length; i++) {
-	
-			hashCodes[i] = keys.get(i).hashCode();
-			
-		}
-
-		return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps,
-				lengthMSB, hashCodes);
-
-	}
-
 }

Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java
===================================================================
--- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java	2010-12-03 22:25:13 UTC (rev 3992)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java	2010-12-03 22:26:40 UTC (rev 3993)
@@ -1,93 +0,0 @@
-/*
-
-Copyright (C) SYSTAP, LLC 2006-2008.  All rights reserved.
-
-Contact:
-     SYSTAP, LLC
-     4501 Tower Road
-     Greensboro, NC 27410
-     lic...@bi...
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-/*
- * Created on Aug 5, 2009
- */
-
-package com.bigdata.htree.data;
-
-import com.bigdata.btree.data.AbstractLeafDataRecordTestCase;
-import com.bigdata.btree.data.DefaultLeafCoder;
-import com.bigdata.btree.data.ILeafData;
-import com.bigdata.btree.raba.IRaba;
-import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder;
-
-/**
- * Test suite for the HTree {@link ILeafData} records (accessing coded data in
- * place).
- * 
- * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
- * @version $Id$
- */
-public class TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman extends AbstractLeafDataRecordTestCase {
-
-    /**
-     * 
-     */
-    public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman() {
-    }
-
-    /**
-     * @param name
-     */
-    public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman(String name) {
-        super(name);
-    }
-
-    protected void setUp() throws Exception {
-        
-        super.setUp();
-
-        coder = new DefaultLeafCoder(//
-                CanonicalHuffmanRabaCoder.INSTANCE,// keys
-                CanonicalHuffmanRabaCoder.INSTANCE // vals
-        );
-        
-    }
-
-	protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals,
-			final boolean[] deleteMarkers, final long[] versionTimestamps) {
-
-		/*
-		 * Note: This computes the MSB prefix and the hash codes using the
-		 * standard Java semantics for the hash of a byte[]. In practice, the
-		 * hash value is normally computed from the key using an application
-		 * specified hash function.
-		 */
-		final int lengthMSB = 0;
-		
-		final int[] hashCodes = new int[keys.size()];
-		
-		for (int i = 0; i < hashCodes.length; i++) {
-	
-			hashCodes[i] = keys.get(i).hashCode();
-			
-		}
-
-		return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps,
-				lengthMSB, hashCodes);
-
-	}
-
-}


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




[Bigdata-commit] SF.net SVN: bigdata:[3993] branches/QUADS_QUERY_BRANCH/bigdata/src

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3993] branches/QUADS_QUERY_BRANCH/bigdata/src