From: <tho...@us...> - 2010-12-03 22:26:48
|
Revision: 3993 http://bigdata.svn.sourceforge.net/bigdata/?rev=3993&view=rev Author: thompsonbry Date: 2010-12-03 22:26:40 +0000 (Fri, 03 Dec 2010) Log Message: ----------- Continued work on the HTree, especially on the interface, persistent data record, and mutable data record for the hash bucket. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/data/DefaultLeafCoder.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -35,6 +35,8 @@ import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; +import java.util.Iterator; +import java.util.NoSuchElementException; import org.apache.log4j.Logger; @@ -368,7 +370,7 @@ if (nkeys > 0) { final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); final byte[] a = new byte[byteLength]; @@ -443,22 +445,45 @@ return encodeLive(leaf, buf).data(); } - - /** - * A read-only view of the data for a B+Tree leaf based on a compact record - * format. While some fields are cached, for the most part the various data - * fields, including the keys and values, are accessed in place in the data - * record in order to minimize the memory footprint of the leaf. The keys and - * values are coded using a caller specified {@link IRabaCoder}. The specific - * coding scheme is specified by the {@link IndexMetadata} for the B+Tree - * instance and is not stored within the leaf data record. - * <p> - * Note: The leading byte of the record format codes for a leaf, a double-linked - * leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ + + /** + * A read-only view of the data for a B+Tree leaf based on a compact record + * format. While some fields are cached, for the most part the various data + * fields, including the keys and values, are accessed in place in the data + * record in order to minimize the memory footprint of the leaf. The keys + * and values are coded using a caller specified {@link IRabaCoder}. The + * specific coding scheme is specified by the {@link IndexMetadata} for the + * B+Tree instance and is not stored within the leaf data record. The use of + * prefix coding for keys is a good general choices, but should not be used + * in combination with a hash tree unless an order preserving hashing + * function is being used. + * <p> + * Note: The leading byte of the record format codes for a leaf, a + * double-linked leaf or a node in a manner which is compatible with + * {@link ReadOnlyNodeData}. + * <p> + * The {@link DefaultLeafCoder} automatically maintains hash values for keys + * for an {@link IBucketData} record. The hash values of the keys in the + * bucket will have a shared prefix (the MSB hash prefix) which corresponds + * to the globalDepth of the path through the hash tree leading to this + * bucket less the localDepth of this bucket. It is therefore possible to + * store only the LSB bits of the hash values in the page and reconstruct + * the hash values using the MSB bits from the path through the hash tree. + * In order to be able to reconstruct the full hash code key based solely on + * local information, the MSB bits can be written out once and the LSB bits + * can be written out once per tuple. Testing the hash value of a key may + * then be done considering only the LSB bits of the hash value. This + * storage scheme also has the advantage that the hash value is not + * restricted to an int32 and is therefore compatible with the use of + * cryptographic hash functions. (If hash values are stored in a B+Tree leaf + * they will not shared this prefix property and can not be compressed in + * this manner). + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + * @version $Id: DefaultLeafCoder.java 3991 2010-12-03 18:48:02Z thompsonbry + * $ + */ static private class ReadOnlyLeafData extends AbstractReadOnlyNodeData<ILeafData> implements ILeafData, IBucketData { @@ -645,7 +670,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -804,7 +829,7 @@ O_hashKeys = pos; final int byteLength = BytesUtil - .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB)) * 8/* nbits */); + .bitFlagByteLength((lengthMSB + (nkeys * lengthLSB))/* nbits */); if (nkeys > 0) { @@ -994,7 +1019,7 @@ final int lengthMSB = 32/* hashBitLength */- lengthLSB; final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB - + nkeys * lengthMSB/* nbits */); + + (nkeys * lengthLSB)/* nbits */); final InputBitStream ibs = b.slice(O_hashKeys, byteLength) .getInputBitStream(); @@ -1018,8 +1043,102 @@ } } - - final public IRaba getKeys() { + + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + * + * @todo a trie over the hash entries would provide much faster search. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private final int lengthMSB; + private final InputBitStream ibs; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + + this.h = h; + + lengthMSB = 32/* hashBitLength */- lengthLSB; + + final int byteLength = BytesUtil.bitFlagByteLength(lengthMSB + + (nkeys * lengthLSB)/* nbits */); + + ibs = b.slice(O_hashKeys, byteLength) + .getInputBitStream(); + + } + + public boolean hasNext() { + + final int n = getKeyCount(); + + while (nextResult == null && currentIndex < n) { + + final int index = currentIndex++; + + int h1; + try { + + // We do not need to re-position the ibs. +// final long position = lengthMSB + currentIndex +// * lengthLSB; +// ibs.position(position); + + h1 = ibs.readInt(lengthLSB); + + h1 |= hashMSB; + + } catch (IOException ex) { + + throw new RuntimeException(ex); + + } + + if (h1 == h) { + + nextResult = Integer.valueOf(index); + + break; + + } + + } + + return nextResult != null; + + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + + final public IRaba getKeys() { return keys; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashBucket.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,7 @@ import com.bigdata.btree.IOverflowHandler; import com.bigdata.btree.IndexSegment; +import com.bigdata.btree.data.DefaultLeafCoder; import com.bigdata.btree.data.IAbstractNodeDataCoder; import com.bigdata.btree.data.ILeafData; import com.bigdata.btree.raba.IRaba; @@ -46,6 +47,11 @@ * @todo The hash of the key should be part of the ITuple interface so it can be * passed along based on the application level encoding of the key. * + * @todo Consider organizing the hash values of the keys in the page using a + * trie for faster lookup. This could be done when they are serialized (in + * which case this decision disappears into the {@link DefaultLeafCoder}) + * or dynamically. + * * @todo Support out-of-line representations of the key and/or value for a tuple * when they are large. The definition of "large" can be a configuration * value for the index metadata. For example, 1/4 of the target page size @@ -124,8 +130,19 @@ * * @todo IRaba keys plus IRaba vals. */ - final int[] data; + final int[] entries; + /** + * The data record. A mutable is used for all mutation operations. The data + * record is replaced by a read-only record used when the hash bucket is + * made persistent. A read-only data record is automatically converted into + * a mutable record when a mutation operation is requested. + * <p> + * Note: This is package private in order to expose it to + * {@link HashDirectory}. + */ + private IBucketData data; + protected void setLocalHashBits(final int localHashBits) { this.localHashBits = localHashBits; @@ -133,7 +150,9 @@ } public int getLocalHashBits() { + return localHashBits; + } /** @@ -148,7 +167,7 @@ for (int i = 0; i < size; i++) { if (i > 0) sb.append(','); - sb.append(Integer.toString(data[i])); + sb.append(Integer.toString(entries[i])); } sb.append("}}"); return sb.toString(); @@ -174,7 +193,7 @@ this.localHashBits = localHashBits; - this.data = new int[bucketSize]; + this.entries = new int[bucketSize]; // one more bucket. htbl.nbuckets++; @@ -200,7 +219,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) + if (entries[i] == key) return true; } @@ -287,7 +306,7 @@ */ public void insert(final int h, final int key) { - if (size == data.length) { + if (size == entries.length) { /* * The bucket must be split, potentially recursively. @@ -375,7 +394,7 @@ } - data[size++] = key; + entries[size++] = key; // one more entry in the index. htbl.nentries++; @@ -397,7 +416,7 @@ for (int i = 0; i < size; i++) { - if (data[i] == key) { + if (entries[i] == key) { // #of tuples remaining beyond this point. final int length = size - i - 1; @@ -405,7 +424,7 @@ if (length > 0) { // Keep the array dense by copying down by one. - System.arraycopy(data, i + 1/* srcPos */, data/* dest */, + System.arraycopy(entries, i + 1/* srcPos */, entries/* dest */, i/* destPos */, length); } @@ -461,7 +480,7 @@ @Override public Integer next() { - return data[current++]; + return entries[current++]; } @Override @@ -568,14 +587,12 @@ * IBucketData */ - public int getHash(int index) { - // TODO Auto-generated method stub - return 0; + public int getHash(final int index) { + return data.getHash(index); } public int getLengthMSB() { - // TODO Auto-generated method stub - return 0; + return data.getLengthMSB(); } /* @@ -583,49 +600,39 @@ */ public boolean hasVersionTimestamps() { - // TODO Auto-generated method stub - return false; + return data.hasVersionTimestamps(); } public AbstractFixedByteArrayBuffer data() { - // TODO Auto-generated method stub - return null; + return data.data(); } public int getKeyCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public IRaba getKeys() { - // TODO Auto-generated method stub - return null; + return data.getKeys(); } public long getMaximumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMaximumVersionTimestamp(); } public long getMinimumVersionTimestamp() { - // TODO Auto-generated method stub - return 0; + return data.getMinimumVersionTimestamp(); } public int getSpannedTupleCount() { - // TODO Auto-generated method stub - return 0; + return data.getKeyCount(); } public boolean isCoded() { - // TODO Auto-generated method stub - return false; + return data.isCoded(); } final public boolean isLeaf() { - return true; - } /** @@ -635,55 +642,43 @@ * {@link IBucketData} is automatically converted into a mutable instance. */ final public boolean isReadOnly() { - -// return data.isReadOnly(); - // TODO Auto-generated method stub - return false; - + return data.isReadOnly(); } /* * ILeafData */ - public boolean getDeleteMarker(int index) { - // TODO Auto-generated method stub - return false; + public boolean getDeleteMarker(final int index) { + return data.getDeleteMarker(index); } public long getNextAddr() { - // TODO Auto-generated method stub - return 0; + return data.getNextAddr(); } public long getPriorAddr() { - // TODO Auto-generated method stub - return 0; + return data.getPriorAddr(); } public int getValueCount() { - // TODO Auto-generated method stub - return 0; + return data.getValueCount(); } public IRaba getValues() { - // TODO Auto-generated method stub - return null; + return data.getValues(); } - public long getVersionTimestamp(int index) { - // TODO Auto-generated method stub - return 0; + public long getVersionTimestamp(final int index) { + return data.getVersionTimestamp(index); } public boolean hasDeleteMarkers() { - // TODO Auto-generated method stub - return false; + return data.hasDeleteMarkers(); } public boolean isDoubleLinked() { - // TODO Auto-generated method stub - return false; + return data.isDoubleLinked(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashDirectory.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -775,7 +775,7 @@ // Adjust the #of local bits to be considered. bold.setLocalHashBits(bold.getLocalHashBits() + 1); // The new bucket. - bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.data.length/* bucketSize */); + bnew = new HashBucket(htbl, bold.getLocalHashBits(), bold.entries.length/* bucketSize */); // // The address for the new bucket. // final int addrBNew = htbl.buckets.size(); // Add to the chain of buckets. @@ -862,7 +862,7 @@ { // the new bucket. bnew = new HashBucket(htbl, bold.getLocalHashBits() + 1, - bold.data.length/* bucketSize */); + bold.entries.length/* bucketSize */); // // Add to the chain of buckets. // htbl.buckets.add(bnew); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/HashTree.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -528,8 +528,7 @@ * have access to the store or ITuple will have to have indirection * support. */ - public HashTree(final int initialCapacity, - final int bucketSize) { + public HashTree(final int initialCapacity, final int bucketSize) { // @todo pass in the store reference per AbstractBTree. this.store = new SimpleMemoryRawStore(); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/IHashTuple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,32 @@ +package com.bigdata.htree; + +import com.bigdata.btree.ITuple; + +/** + * Extended interface to support hash buckets. + * + * @author tho...@us... + * + * @param <E> + * + * @todo The reason for having this on ITuple is to make it practical for the + * hash code to be defined in terms of application specific data types + * rather than the unsigned byte[] key (but the latter could of course be + * decoded by the hash function before computing the hash of the + * application data type, except for things like Unicode keys). + * <p> + * This should probably be lifted onto {@link ITuple} and + * {@link #getKeyHash()} should be declared to throw an + * {@link UnsupportedOperationException} if the hash code of the key is + * not being stored. + */ +public interface IHashTuple<E> extends ITuple<E> { + +// int getHashBitLength(); + + /** + * The int32 hash value of the key. + */ + int getKeyHash(); + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/MutableBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,617 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 25, 2009 + */ + +package com.bigdata.htree; + +import java.util.Iterator; + +import cern.colt.map.OpenIntIntHashMap; + +import com.bigdata.btree.ITuple; +import com.bigdata.btree.MutableLeafData; +import com.bigdata.btree.raba.IRaba; +import com.bigdata.htree.data.IBucketData; +import com.bigdata.io.AbstractFixedByteArrayBuffer; +import com.bigdata.io.ByteArrayBuffer; +import com.bigdata.io.IDataRecord; +import com.bigdata.rawstore.Bytes; + +/** + * Implementation maintains Java objects corresponding to the persistent data + * and defines methods for a variety of mutations on the {@link IBucketData} + * record which operate by direct manipulation of the Java objects. + * <p> + * Note: package private fields are used so that they may be directly accessed + * by the {@link HashBucket} class. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: MutableLeafData.java 2265 2009-10-26 12:51:06Z thompsonbry $ + * + * @todo Consider mutable implementation based on a compacting record ala GOM. + * This is especially attractive for the hash tree. The implementation + * would have to be wholly different from the {@link MutableLeafData} + * class. Instead of managing the {@link IRaba} for the keys and values + * separately, each {@link ITuple} would be appended into a byte[] (or + * {@link IDataRecord}). There would be a budget for that backing buffer + * which is the maximum in-memory size of the bucket. An index would + * provide random access into the buffer for only those entries which are + * "live" and a counter is maintain of the #of entries in the buffer which + * are no longer in use. When the buffer capacity is reached, the buffer + * is compacted by copying all of the entries accessible from the index + * into a new buffer and the old buffer is released. + * <p> + * Records which are too large for the buffer should be moved out of line. + * <p> + * This can be used in combination with a dynamically maintained trie for + * fast hash lookup, or we could just scan the entries. + * <p> + * Lexicon key search can scan the entries using the index. Scanning can + * have a side-effect in which the position of the entry offsets in the + * index is swapped if the keys are out of order. This would give us + * MonetDB style "cracking". The keys would have to be put into full order + * no later than when the record is made persistent. + * <p> + * Even though mutation is not thread safe, compacting the data record + * must not cause the assignment of indices to tuples to change when the + * caller is iterating through the entries by index. + * + * @todo When the record is serialized, do we need to allow a decision function + * to examine the record and decide whether it must be split? Since we do + * not have a fixed target for the page size, but only a budget, and since + * compression of keys, values, metadata, and the encoded record can all + * be applied, it seems that the decision function should be in terms of + * the buffer budget and a maximum #of entries (e.g., B+Tree branching + * factor or an equivalent hash bucket threshold). + */ +public class MutableBucketData implements IBucketData { + + private IDataRecord buf; + + private /*@todo final*/ OpenIntIntHashMap index; + + /** + * Constructor used when converting a persistent data record into a mutable + * one. + * + * @param data + */ + public MutableBucketData(final IBucketData data) { + + } + + /** + * + * @param bufferSize + * The initial size of the backing byte[]. + * @param branchingFactor + * The maximum #of tuples which may be stored in the data record. + * + * @todo The buffer must be permitted grow until it is sufficient to encode + * approximately one page worth of tuples. + * <p> + * is typically on the order of the size of a page, e.g., 4k. Since + * the data record will be encoded and possible compressed before it + * is written onto the store, this can be larger than the target. + * <p> + * In order to avoid problems where the objects are much smaller than + * expected, we should allow the backing buffer to grow or we should + * fit a model which estimates the size of the resulting page based on + * the size of the buffer and then grow the buffer until we can + * satisfy the target page size. + */ + public MutableBucketData(final int bufferSize, final int branchingFactor) { + + final int initialBufferSize = Math + .min(Bytes.kilobyte32 * 4, bufferSize); + + buf = new ByteArrayBuffer(initialBufferSize); + + index = new OpenIntIntHashMap(branchingFactor/* initialCapacity */); + + } + + @Override + public int getHash(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getKeyCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getLengthMSB() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public Iterator<Integer> hashIterator(int h) { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean getDeleteMarker(int index) { + // TODO Auto-generated method stub + return false; + } + + @Override + public long getNextAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getPriorAddr() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getValueCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public IRaba getValues() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getVersionTimestamp(int index) { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean hasDeleteMarkers() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean hasVersionTimestamps() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isDoubleLinked() { + // TODO Auto-generated method stub + return false; + } + + @Override + public AbstractFixedByteArrayBuffer data() { + // TODO Auto-generated method stub + return null; + } + + @Override + public IRaba getKeys() { + // TODO Auto-generated method stub + return null; + } + + @Override + public long getMaximumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public long getMinimumVersionTimestamp() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public int getSpannedTupleCount() { + // TODO Auto-generated method stub + return 0; + } + + @Override + public boolean isCoded() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isLeaf() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isReadOnly() { + // TODO Auto-generated method stub + return false; + } + +// /** +// * The keys for the entries in the bucket. Unlike a B+Tree, the keys are NOT +// * maintained in a sorted order. Search proceeds by scanning for matching +// * hash codes and filtering for matching keys. +// */ +// final MutableKeyBuffer keys; +// +// /** +// * The values for the entries in the bucket. There is one value per key. The +// * value MAY be null. +// */ +// final MutableValueBuffer vals; +// +// /** +// * The deletion markers IFF isolation is supported by the {@link HTree}. +// */ +// final boolean[] deleteMarkers; +// +// /** +// * The version timestamps IFF isolation is supported by the {@link HTree}. +// */ +// final long[] versionTimestamps; +// +// /** +// * The minimum version timestamp. +// * +// * @todo these fields add 16 bytes to each {@link MutableBucketData} object +// * even when we do not use them. It would be better to use a subclass +// * or tack them onto the end of the {@link #versionTimestamps} array. +// */ +// long minimumVersionTimestamp; +// long maximumVersionTimestamp; +// +// /** +// * Create an empty data record with internal arrays dimensioned for the +// * specified branching factor. +// * +// * @param branchingFactor +// * The maximum #of entries in the hash bucket before it will +// * overflow or be split. Since the goal is to manage the size +// * of the bucket on the disk and since we do not known the size +// * of the bucket's data record until it is being evicted, this +// * value places an upper bound after which the bucket will be +// * @param hasVersionTimestamps +// * <code>true</code> iff version timestamps will be maintained. +// * @param hasDeleteMarkers +// * <code>true</code> iff delete markers will be maintained. +// */ +// public MutableBucketData(final int branchingFactor, +// final boolean hasVersionTimestamps, final boolean hasDeleteMarkers) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1); +// +// vals = new MutableValueBuffer(branchingFactor + 1); +// +// versionTimestamps = (hasVersionTimestamps ? new long[branchingFactor + 1] +// : null); +// +// // init per API specification. +// minimumVersionTimestamp = Long.MAX_VALUE; +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// deleteMarkers = (hasDeleteMarkers ? new boolean[branchingFactor + 1] +// : null); +// +// } +// +// /** +// * Copy ctor. +// * +// * @param branchingFactor +// * The branching factor for the owning B+Tree. +// * @param src +// * The source leaf. +// */ +// public MutableBucketData(final int branchingFactor, final ILeafData src) { +// +// keys = new MutableKeyBuffer(branchingFactor + 1, src.getKeys()); +// +// vals = new MutableValueBuffer(branchingFactor + 1, src.getValues()); +// +// versionTimestamps = (src.hasVersionTimestamps() ? new long[branchingFactor + 1] +// : null); +// +// deleteMarkers = (src.hasDeleteMarkers() ? new boolean[branchingFactor + 1] +// : null); +// +// final int nkeys = keys.size(); +// +// if (versionTimestamps != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// versionTimestamps[i] = src.getVersionTimestamp(i); +// +// } +// +// minimumVersionTimestamp = src.getMinimumVersionTimestamp(); +// +// maximumVersionTimestamp = src.getMaximumVersionTimestamp(); +// +// } else { +// +// minimumVersionTimestamp = Long.MAX_VALUE; +// +// maximumVersionTimestamp = Long.MIN_VALUE; +// +// +// } +// +// if (deleteMarkers != null) { +// +// for (int i = 0; i < nkeys; i++) { +// +// deleteMarkers[i] = src.getDeleteMarker(i); +// +// } +// +// } +// +// } +// +// /** +// * Ctor based on just "data" -- used by unit tests. +// * +// * @param keys +// * A representation of the defined keys in the leaf. +// * @param values +// * An array containing the values found in the leaf. +// * @param versionTimestamps +// * An array of the version timestamps (iff the version metadata +// * is being maintained). +// * @param deleteMarkers +// * An array of the delete markers (iff the version metadata is +// * being maintained). +// */ +// public MutableBucketData(final MutableKeyBuffer keys, +// final MutableValueBuffer values, final long[] versionTimestamps, +// final boolean[] deleteMarkers) { +// +// assert keys != null; +// assert values != null; +// assert keys.capacity() == values.capacity(); +// if (versionTimestamps != null) { +// assert versionTimestamps.length == keys.capacity(); +// } +// if (deleteMarkers != null) { +// assert deleteMarkers.length == keys.capacity(); +// } +// +// this.keys = keys; +// this.vals = values; +// this.versionTimestamps = versionTimestamps; +// this.deleteMarkers = deleteMarkers; +// +// if (versionTimestamps != null) +// recalcMinMaxVersionTimestamp(); +// +// } +// +// /** +// * Range check a tuple index. +// * +// * @param index +// * The index of a tuple in [0:nkeys]. +// * @return <code>true</code> +// * +// * @throws IndexOutOfBoundsException +// * if the index is not in the legal range. +// */ +// final protected boolean rangeCheckTupleIndex(final int index) { +// +// if (index < 0 || index > getKeys().size()) +// throw new IndexOutOfBoundsException(); +// +// return true; +// +// } +// +// /** +// * No - this is a mutable data record. +// */ +// final public boolean isReadOnly() { +// +// return false; +// +// } +// +// /** +// * No. +// */ +// final public boolean isCoded() { +// +// return false; +// +// } +// +// final public AbstractFixedByteArrayBuffer data() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// public final long getVersionTimestamp(final int index) { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return versionTimestamps[index]; +// +// } +// +// final public long getMinimumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return minimumVersionTimestamp; +// +// } +// +// final public long getMaximumVersionTimestamp() { +// +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// return maximumVersionTimestamp; +// +// } +// +// public final boolean getDeleteMarker(final int index) { +// +// if (deleteMarkers == null) +// throw new UnsupportedOperationException(); +// +// assert rangeCheckTupleIndex(index); +// +// return deleteMarkers[index]; +// +// } +// +// final public IRaba getValues() { +// +// return vals; +// +// } +// +// final public IRaba getKeys() { +// +// return keys; +// +// } +// +// /** +// * Always returns <code>true</code>. +// */ +// final public boolean isLeaf() { +// +// return true; +// +// } +// +// /** +// * For a leaf the #of entries is always the #of keys. +// */ +// final public int getSpannedTupleCount() { +// +// return getKeys().size(); +// +// } +// +// final public int getValueCount() { +// +// return vals.size(); +// +// } +// +// final public boolean hasDeleteMarkers() { +// +// return deleteMarkers != null; +// +// } +// +// final public boolean hasVersionTimestamps() { +// +// return versionTimestamps != null; +// +// } +// +// final public int getKeyCount() { +// +// return keys.size(); +// +// } +// +// /** +// * No - this class does not support double-linked leaves (only the +// * {@link IndexSegment} actually uses double-linked leaves). +// */ +// final public boolean isDoubleLinked() { +// +// return false; +// +// } +// +// final public long getNextAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// final public long getPriorAddr() { +// +// throw new UnsupportedOperationException(); +// +// } +// +// /** +// * Recalculate the min/max version timestamp on the leaf. The caller is +// * responsible for propagating the new min/max to the ancestors of the leaf. +// * +// * @throws UnsupportedOperationException +// * if the leaf is not maintaining per-tuple version timestamps. +// */ +// void recalcMinMaxVersionTimestamp() { +// +// // must be maintaining version timestamps. +// if (versionTimestamps == null) +// throw new UnsupportedOperationException(); +// +// final int nkeys = keys.nkeys; +// +// long min = Long.MAX_VALUE; +// long max = Long.MIN_VALUE; +// +// for (int i = 0; i < nkeys; i++) { +// +// final long t = versionTimestamps[i]; +// +// if (t < min) +// min = t; +// +// if (t > max) +// max = t; +// +// } +// +// minimumVersionTimestamp = min; +// maximumVersionTimestamp = max; +// +// } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/htree/data/IBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -26,40 +26,23 @@ */ package com.bigdata.htree.data; -import com.bigdata.btree.IOverflowHandler; -import com.bigdata.btree.IndexSegment; -import com.bigdata.btree.data.IAbstractNodeDataCoder; +import java.util.Iterator; + import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.rawstore.IRawStore; /** - * Interface for the data record of a hash bucket. + * Interface for the data record of a hash bucket. The hash bucket extends the + * B+Tree leaf data record interface. A hash bucket page may be shared by + * multiple directory entries (this is one of the principle tenants of + * extendible hashing). However, the bucket is just a bucket to each such + * directory entry. There is no sense of offset addressing into the shared + * bucket. * <p> - * The hash bucket extends the B+Tree leaf node page. However, hash buckets must - * have the HASH_KEYS flag enabled and SHOULD NOT use prefix compression unless - * (a) an order preserving hash function is used; and (b) the tuples are in key - * order within the page. - * <p> - * The hash values of the keys in the bucket will have a shared prefix (when - * using an MSB hash prefix) which corresponds to the globalDepth of the path - * through the hash tree leading to this bucket less the localDepth of this - * bucket. It is therefore possible (in principle) to store only the LSB bits of - * the hash values in the page and reconstruct the hash values using the MSB - * bits from the path through the hash tree. In order to be able to reconstruct - * the full hash code key based solely on local information, the MSB bits can be - * written out once and the LSB bits can be written out once per tuple. Testing - * the hash value of a key may then be done considering only the LSB bits of the - * hash value. This storage scheme also has the advantage that the hash value is - * not restricted to an int32 and is therefore compatible with the use of - * cryptographic hash functions. (If hash values are stored in a B+Tree leaf - * they will not shared this prefix property and can not be compressed in this - * manner). - * <p> * The {@link ILeafData#getPriorAddr()} and {@link ILeafData#getNextAddr()} * fields of the {@link ILeafData} record are reserved by the hash tree to * encode the search order for range queries when used in combination with an * order preserving hash function. + * <p> * * @author tho...@us... */ @@ -72,10 +55,11 @@ // */ // int getLocalDepth(); -// /** -// * The total bit length of the hash values of the keys. -// */ -// int getHashBitLength(); + /** + * Return the #of entries in the hash bucket (all keys, not just the + * distinct keys). + */ + int getKeyCount(); /** * The length (in bits) of the MSB prefix shared by the hash values of the @@ -91,6 +75,29 @@ * @return The hash value of that key. */ int getHash(int index); + + /** + * Return an {@link Iterator} which visits the index of each entry in the + * hash bucket having the given hash code. + * + * @param h + * The hash code. + * + * @todo Note: There is a design tradeoff between autoboxing of the + * <code>int</code> index and allowing the {@link IBucketData} class + * to encapsulate the iterator pattern together with any setup which + * can be done once per scan for a given hash code. For example, using + * a BitInputStream. The iterator allows us to amortize the cost of + * that setup, but we pay for the autoboxing of the index values. + * However, autobox primitives tend to be quite cheap as they are + * rapidly reclaimed by GC. + * <p> + * It is possible to implement an extension interface which returns + * the [int] index without autoboxing. If this method signature is + * modified to return that interface then the implementation can avoid + * autoboxing. + */ + Iterator<Integer> hashIterator(int h); // /** // * The storage address of the last overflow page in the overflow chain. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/AbstractBTreeTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -32,6 +32,8 @@ import java.io.IOException; import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.TreeMap; @@ -53,6 +55,7 @@ import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.RandomKeysGenerator; import com.bigdata.cache.HardReferenceQueue; +import com.bigdata.htree.data.IBucketData; import com.bigdata.io.SerializerUtil; import com.bigdata.rawstore.Bytes; import com.bigdata.rawstore.IRawStore; @@ -479,6 +482,12 @@ } + if(n1 instanceof IBucketData) { + + assertSameHashCodes((IBucketData) n1, (IBucketData) n2); + + } + assertSameRaba(n1.getValues(), n2.getValues()); } @@ -668,6 +677,68 @@ } + /** + * Verifies details for the {@link IBucketData} interface. + * + * @param b1 + * A hash bucket. + * @param b2 + * Another hash bucket. + */ + static public void assertSameHashCodes(final IBucketData b1, final IBucketData b2) { + + // The key and value counts must be the same. + final int n = b1.getKeyCount(); + assertEquals("keyCount", n, b2.getKeyCount()); + assertEquals("valueCount", n, b1.getValueCount()); + assertEquals("valueCount", n, b2.getValueCount()); + + assertEquals("lengthMSB", b1.getLengthMSB(), b2.getLengthMSB()); + + /* + * Verify that the same hash codes are reported at each index position. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final int h2 = b2.getHash(i); + + if (h1 != h2) { + + assertEquals("getHash(" + i + ")", h1, h2); + + } + + } + + /* + * Now verify that the same hash matches are reported for each + * visited hash code. + */ + for (int i = 0; i < n; i++) { + + final int h1 = b1.getHash(i); + + final List<Integer> indices = new LinkedList<Integer>(); + + final Iterator<Integer> eitr = b1.hashIterator(h1); + + while (eitr.hasNext()) { + + indices.add(eitr.next()); + + } + + final Integer[] hashCodes = indices.toArray(new Integer[indices + .size()]); + + assertSameIterator("hashCodes", hashCodes, b2.hashIterator(h1)); + + } + + } + /** * Special purpose helper used to vet {@link Node#childAddr}. * @@ -677,7 +748,7 @@ * @param node * The node. */ - public void assertChildKeys(final long[] childAddr, final Node node) { + static public void assertChildKeys(final long[] childAddr, final Node node) { final int nChildAddr = childAddr.length; @@ -720,7 +791,7 @@ * @param node * The node. */ - public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { + static public void assertKeys(final byte[][] keys, final AbstractNode<?> node) { // // verify the capacity of the keys[] on the node. // assertEquals("keys[] capacity", (node.maxKeys + 1) * stride, @@ -763,7 +834,7 @@ * @param node * The node. */ - public void assertEntryCounts(final int[] expected, final INodeData node) { + static public void assertEntryCounts(final int[] expected, final INodeData node) { final int len = expected.length; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/btree/data/AbstractLeafDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -27,8 +27,6 @@ package com.bigdata.btree.data; - -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.ReadOnlyKeysRaba; import com.bigdata.btree.raba.ReadOnlyValuesRaba; import com.bigdata.io.DataOutputBuffer; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + super(arg0); + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("HTree"); + + suite.addTest(com.bigdata.htree.data.TestAll.suite()); + + suite.addTestSuite(TestExtensibleHashing.class); + + return suite; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/AbstractHashBucketDataRecordTestCase.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,49 @@ +package com.bigdata.htree.data; + +import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.IRaba; + +/** + * Abstract class for tests of {@link IBucketData} implementations. + */ +abstract public class AbstractHashBucketDataRecordTestCase extends + AbstractLeafDataRecordTestCase { + + public AbstractHashBucketDataRecordTestCase() { + + super(); + + } + + public AbstractHashBucketDataRecordTestCase(String name) { + + super(name); + + } + + protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, + final boolean[] deleteMarkers, final long[] versionTimestamps) { + + /* + * Note: This computes the MSB prefix and the hash codes using the + * standard Java semantics for the hash of a byte[]. In practice, the + * hash value is normally computed from the key using an application + * specified hash function. + */ + final int lengthMSB = 0; + + final int[] hashCodes = new int[keys.size()]; + + for (int i = 0; i < hashCodes.length; i++) { + + hashCodes[i] = keys.get(i).hashCode(); + + } + + return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, + lengthMSB, hashCodes); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/MockBucketData.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,5 +1,10 @@ package com.bigdata.htree.data; +import it.unimi.dsi.fastutil.Hash; + +import java.util.Iterator; +import java.util.NoSuchElementException; + import com.bigdata.btree.data.MockLeafData; import com.bigdata.btree.raba.IRaba; @@ -72,4 +77,57 @@ } + public Iterator<Integer> hashIterator(final int h) { + + return new HashMatchIterator(h); + + } + + /** + * Visits the index of each bucket entry having a matching hash code. + */ + private class HashMatchIterator implements Iterator<Integer> { + + private final int h; + private int currentIndex = 0; + private Integer nextResult = null; + + private HashMatchIterator(final int h) { + this.h = h; + } + + public boolean hasNext() { + final int n = getKeyCount(); + while (nextResult == null && currentIndex < n) { + final int index = currentIndex++; + final int h1 = getHash(index); + if (h1 == h) { + nextResult = Integer.valueOf(index); + break; + } + } + return nextResult != null; + } + + public Integer next() { + + if (!hasNext()) + throw new NoSuchElementException(); + + final Integer tmp = nextResult; + + nextResult = null; + + return tmp; + + } + + public void remove() { + + throw new UnsupportedOperationException(); + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestAll.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -67,6 +67,8 @@ /* * Test w/ all key and value coders suitable for leaves. * + * @todo test the mutable bucket data record + * * @todo test w/ linked-leaf (order preserving hash functions). * * @todo test w/ out-of-line tuples (when too large for the page). Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (from rev 3991, branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -0,0 +1,68 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 5, 2009 + */ + +package com.bigdata.htree.data; + +import com.bigdata.btree.data.DefaultLeafCoder; +import com.bigdata.btree.data.ILeafData; +import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; + +/** + * Test suite for the HTree {@link ILeafData} records (accessing coded data in + * place). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman extends + AbstractHashBucketDataRecordTestCase { + + /** + * + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman() { + } + + /** + * @param name + */ + public TestBucketDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { + super(name); + } + + protected void setUp() throws Exception { + + super.setUp(); + + coder = new DefaultLeafCoder(// + CanonicalHuffmanRabaCoder.INSTANCE,// keys + CanonicalHuffmanRabaCoder.INSTANCE // vals + ); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestBucketDataRecord_Simple_Simple.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -29,8 +29,6 @@ import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.SimpleRabaCoder; /** @@ -65,28 +63,4 @@ } - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:25:13 UTC (rev 3992) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/htree/data/TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman.java 2010-12-03 22:26:40 UTC (rev 3993) @@ -1,93 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 5, 2009 - */ - -package com.bigdata.htree.data; - -import com.bigdata.btree.data.AbstractLeafDataRecordTestCase; -import com.bigdata.btree.data.DefaultLeafCoder; -import com.bigdata.btree.data.ILeafData; -import com.bigdata.btree.raba.IRaba; -import com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder; - -/** - * Test suite for the HTree {@link ILeafData} records (accessing coded data in - * place). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman extends AbstractLeafDataRecordTestCase { - - /** - * - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman() { - } - - /** - * @param name - */ - public TestLeafDataRecord_CanonicalHuffman_CanonicalHuffman(String name) { - super(name); - } - - protected void setUp() throws Exception { - - super.setUp(); - - coder = new DefaultLeafCoder(// - CanonicalHuffmanRabaCoder.INSTANCE,// keys - CanonicalHuffmanRabaCoder.INSTANCE // vals - ); - - } - - protected ILeafData mockLeafFactory(final IRaba keys, final IRaba vals, - final boolean[] deleteMarkers, final long[] versionTimestamps) { - - /* - * Note: This computes the MSB prefix and the hash codes using the - * standard Java semantics for the hash of a byte[]. In practice, the - * hash value is normally computed from the key using an application - * specified hash function. - */ - final int lengthMSB = 0; - - final int[] hashCodes = new int[keys.size()]; - - for (int i = 0; i < hashCodes.length; i++) { - - hashCodes[i] = keys.get(i).hashCode(); - - } - - return new MockBucketData(keys, vals, deleteMarkers, versionTimestamps, - lengthMSB, hashCodes); - - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |