From: <tho...@us...> - 2012-04-12 15:27:42
|
Revision: 6279 http://bigdata.svn.sourceforge.net/bigdata/?rev=6279&view=rev Author: thompsonbry Date: 2012-04-12 15:27:30 +0000 (Thu, 12 Apr 2012) Log Message: ----------- Code changes to support packing TIDs on the Journal. This does not yet support packing longs on a cluster since we also would need to address the TermIdEncoder. Packed TIDs are disabled by default in IVUtility.PACK_TIDS since they are incompatible with existing data on the disk. @see https://sourceforge.net/apps/trac/bigdata/ticket/548 (Pack TIDs) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/ByteArrayBuffer.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/LongPacker.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/io/TestLongPacker.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/TermId.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/Term2IdTupleSerializer.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestTermIV.java branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestId2TermTupleSerializer.java Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -39,6 +39,7 @@ import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleSerializer; +import com.bigdata.io.LongPacker; /** * A class that may be used to form multi-component keys but which does not @@ -61,7 +62,7 @@ * len field to the mark. keys with multiple components could benefit from * allowing multiple marks (the sparse row store is the main use case). */ -public class KeyBuilder implements IKeyBuilder { +public class KeyBuilder implements IKeyBuilder, LongPacker.IByteBuffer { private static final transient Logger log = Logger.getLogger(KeyBuilder.class); @@ -737,6 +738,100 @@ } /** + * Packs a non-negative long value into the minimum #of bytes in which the + * value can be represented and writes those bytes onto the buffer. The + * first byte determines whether or not the long value was packed and, if + * packed, how many bytes were required to represent the packed long value. + * When the high bit of the first byte is a one (1), then the long value + * could not be packed and the long value is found by clearing the high bit + * and interpreting the first byte plus the next seven (7) bytes as a long. + * Otherwise the next three (3) bits are interpreted as an unsigned integer + * giving the #of bytes (nbytes) required to represent the packed long + * value. To recover the long value the high nibble is cleared and the first + * byte together with the next nbytes are interpreted as an unsigned long + * value whose leading zero bytes were not written. + * + * <pre> + * + * [0|1|2|3|4|5|6|7] + * 1 - - - nbytes = 8, clear high bit and interpret this plus the next 7 bytes as a long. + * 0 1 1 1 nbytes = 7, clear high nibble and interpret this plus the next 6 bytes as a long. + * 0 1 1 0 nbytes = 6, clear high nibble and interpret this plus the next 5 bytes as a long. + * 0 1 0 1 nbytes = 5, clear high nibble and interpret this plus the next 4 bytes as a long. + * 0 1 0 0 nbytes = 4, clear high nibble and interpret this plus the next 3 bytes as a long. + * 0 0 1 1 nbytes = 3, clear high nibble and interpret this plus the next 3 bytes as a long. + * 0 0 1 0 nbytes = 2, clear high nibble and interpret this plus the next byte as a long. + * 0 0 0 1 nbytes = 1, clear high nibble. value is the low nibble. + * + * </pre> + * + * Note: These are decodable (no loss) but negative longs are not allowed. + * <p> + * Note: The order is NOT fully preserved. Any long which is encoded into + * less than 8 bytes has its order preserved. However, a long which is + * encoded into 8 bytes will wind up ordered before any longs which pack + * into fewer bytes. + * + * @param v + * The unsigned long value. + * + * @return The #of bytes onto which the unsigned long value was packed. + */ + final public KeyBuilder pack(final long v) { + + LongPacker.packLong(v, pbuf, this); + + return this; + + } + + /** + * Relative <i>put</i> method for writing a byte[] on the buffer. + * + * @param b + * The byte[]. + * @param off + * The offset of the first byte in <i>b</i> to be written on + * the buffer. + * @param len + * The #of bytes in <i>b</i> to be written on the buffer. + */ + public void put(final byte[] b, final int off, final int len) { + + ensureFree(len); + + System.arraycopy(b/* src */, 0/* srcPos */, buf/* dest */, + this.len/* destPos */, len/* length */); + + this.len += len; + + } + + /** + * Private buffer for packing long integers. + */ + final private byte[] pbuf = new byte[8]; + +// /** +// * Unpack a long value from the current buffer position. +// * +// * @param buf +// * The buffer containing the data to be decoded. +// * @param off +// * The offset of the first byte of the value to be decoded. +// * @param limit +// * The exclusive upper bound available for decoding +// * +// * @return The long value. +// */ +// static final public long unpackLong(final byte[] buf, final int off, +// final int limit) { +// +// return LongPacker.unpackLong(buf, off, limit); +// +// } + + /** * Return the value that will impose the lexiographic ordering as an * unsigned long integer. * Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/ByteArrayBuffer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/ByteArrayBuffer.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/ByteArrayBuffer.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -70,7 +70,7 @@ * @version $Id$ */ public class ByteArrayBuffer extends OutputStream implements IByteArrayBuffer, - RepositionableStream { + RepositionableStream, LongPacker.IByteBuffer { private static final transient Logger log = Logger .getLogger(ByteArrayBuffer.class); @@ -1012,101 +1012,10 @@ * * @return The #of bytes onto which the unsigned long value was packed. */ - final public int packLong( final long v ) { + final public int packLong(final long v) { + + return LongPacker.packLong(v, pbuf, this); - /* - * You can only pack non-negative long values with this method. - */ - - if (v < 0) { - - throw new IllegalArgumentException("negative value: v=" + v); - - } - - /* - * If the high byte is non-zero then we will write the value as a normal - * long and return nbytes == 8. This case handles large positive long - * values. - */ - if( ( v >> 56 ) != 0 ) { - pbuf[0] = ( (byte)((0xff & (v >> 56))|0x80) ); // note: set the high bit. - pbuf[1] = ( (byte)(0xff & (v >> 48)) ); - pbuf[2] = ( (byte)(0xff & (v >> 40)) ); - pbuf[3] = ( (byte)(0xff & (v >> 32)) ); - pbuf[4] = ( (byte)(0xff & (v >> 24)) ); - pbuf[5] = ( (byte)(0xff & (v >> 16)) ); - pbuf[6] = ( (byte)(0xff & (v >> 8)) ); - pbuf[7] = ( (byte)(0xff & v) ); - put(pbuf, 0, 8); - return 8; - } - - // #of nibbles required to represent the long value. - final int nnibbles = getNibbleLength( v ); - - /* - * Is [nnibbles] even? (If it is even then we need to pad out an extra - * zero nibble in the first byte.) - */ - final boolean evenNibbleCount = ( nnibbles == ( ( nnibbles >> 1 ) << 1 ) ); - - // #of bytes required to represent the long value (plus the header nibble). - final int nbytes = ( ( nnibbles +1 ) >> 1 ) + (evenNibbleCount?1:0); - - int nwritten = 0; - - if( evenNibbleCount ) { - - /* - * An even nibble count requires that we pad the low nibble of the - * first byte with zeros. - */ - - // header byte. low nibble is empty. - byte b = (byte) ( nbytes << 4 ); - - pbuf[nwritten++] = b; - - // remaining bytes containing the packed value. - for( int i=(nnibbles-2)<<2; i>=0; i-=8 ) { - - b = (byte) (0xff & (v >> i)); - - pbuf[nwritten++] = b; - - } - - } else { - - /* - * An odd nibble count means that we pack the first nibble of the - * long value into the low nibble of the header byte. In this case - * the first nibble will always be the low nibble of the first - * non-zero byte in the long value (the high nibble of that byte - * must be zero since there is an odd nibble count). - */ - - byte highByte = (byte) (0xff & (v >> ((nbytes-1)*8) )); - - byte b = (byte) ( ( nbytes << 4 ) | highByte ); - - pbuf[nwritten++] = b; - - for( int i=(nnibbles-3)<<2; i>=0; i-=8 ) { - - b = (byte) (0xff & (v >> i)); - - pbuf[nwritten++] = b; - - } - - } - - put(pbuf,0,nwritten); - - return nwritten; - } /** @@ -1114,37 +1023,38 @@ */ final private byte[] pbuf = new byte[8]; - /** - * Return the #of non-zero nibbles, counting from the first non-zero nibble - * in the long value. A value of <code>0L</code> is considered to be one - * nibble for our purposes. - * - * @param v - * The long value. - * - * @return The #of nibbles in [1:16]. - */ - static protected final int getNibbleLength( final long v ) - { - - for( int i=56, j=16; i>=0; i-=8, j-=2 ) { - - if( (0xf0 & (v >> i)) != 0 ) return j; - - if( (0x0f & (v >> i)) != 0 ) return j-1; - - } - - if (v != 0) - throw new AssertionError("v=" + v); - - /* - * value is zero, which is considered to be one nibble for our purposes. - */ - - return 1; - - } +// /** +// * Return the #of non-zero nibbles, counting from the first non-zero nibble +// * in the long value. A value of <code>0L</code> is considered to be one +// * nibble for our purposes. +// * +// * @param v +// * The long value. +// * +// * @return The #of nibbles in [1:16]. +// */ +// static protected final int getNibbleLength( final long v ) +// { +// return LongPacker.getNibbleLength(v); +// +//// for( int i=56, j=16; i>=0; i-=8, j-=2 ) { +//// +//// if( (0xf0 & (v >> i)) != 0 ) return j; +//// +//// if( (0x0f & (v >> i)) != 0 ) return j-1; +//// +//// } +//// +//// if (v != 0) +//// throw new AssertionError("v=" + v); +//// +//// /* +//// * value is zero, which is considered to be one nibble for our purposes. +//// */ +//// +//// return 1; +//// +// } /* * Pack unsigned short integer. Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/LongPacker.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/LongPacker.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/java/com/bigdata/io/LongPacker.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -39,7 +39,6 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ - public class LongPacker { @@ -242,6 +241,157 @@ } /** + * Narrow interface to support packing against different buffer classes. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + public interface IByteBuffer { + /** + * Relative <i>put</i> method for writing a byte[] on the buffer. + * + * @param b + * The byte[]. + * @param off + * The offset of the first byte in <i>b</i> to be written on + * the buffer. + * @param len + * The #of bytes in <i>b</i> to be written on the buffer. + */ + void put(final byte[] b, final int off, final int len); + } + + /** + * Packs a non-negative long value into the minimum #of bytes in which the + * value can be represented and writes those bytes onto the buffer. + * The first byte determines whether or not the long value was packed and, + * if packed, how many bytes were required to represent the packed long + * value. When the high bit of the first byte is a one (1), then the long + * value could not be packed and the long value is found by clearing the + * high bit and interpreting the first byte plus the next seven (7) bytes as + * a long. Otherwise the next three (3) bits are interpreted as an unsigned + * integer giving the #of bytes (nbytes) required to represent the packed + * long value. To recover the long value the high nibble is cleared and the + * first byte together with the next nbytes are interpreted as an unsigned + * long value whose leading zero bytes were not written. + * + * <pre> + * + * [0|1|2|3|4|5|6|7] + * 1 - - - nbytes = 8, clear high bit and interpret this plus the next 7 bytes as a long. + * 0 1 1 1 nbytes = 7, clear high nibble and interpret this plus the next 6 bytes as a long. + * 0 1 1 0 nbytes = 6, clear high nibble and interpret this plus the next 5 bytes as a long. + * 0 1 0 1 nbytes = 5, clear high nibble and interpret this plus the next 4 bytes as a long. + * 0 1 0 0 nbytes = 4, clear high nibble and interpret this plus the next 3 bytes as a long. + * 0 0 1 1 nbytes = 3, clear high nibble and interpret this plus the next 3 bytes as a long. + * 0 0 1 0 nbytes = 2, clear high nibble and interpret this plus the next byte as a long. + * 0 0 0 1 nbytes = 1, clear high nibble. value is the low nibble. + * + * </pre> + * + * @param v The unsigned long value. + * + * @return The #of bytes onto which the unsigned long value was packed. + */ + static final public int packLong(final long v, final byte[] pbuf, + final IByteBuffer buf) { + + /* + * You can only pack non-negative long values with this method. + */ + + if (v < 0) { + + throw new IllegalArgumentException("negative value: v=" + v); + + } + + /* + * If the high byte is non-zero then we will write the value as a normal + * long and return nbytes == 8. This case handles large positive long + * values. + */ + if( ( v >> 56 ) != 0 ) { + pbuf[0] = ( (byte)((0xff & (v >> 56))|0x80) ); // note: set the high bit. + pbuf[1] = ( (byte)(0xff & (v >> 48)) ); + pbuf[2] = ( (byte)(0xff & (v >> 40)) ); + pbuf[3] = ( (byte)(0xff & (v >> 32)) ); + pbuf[4] = ( (byte)(0xff & (v >> 24)) ); + pbuf[5] = ( (byte)(0xff & (v >> 16)) ); + pbuf[6] = ( (byte)(0xff & (v >> 8)) ); + pbuf[7] = ( (byte)(0xff & v) ); + buf.put(pbuf, 0, 8); + return 8; + } + + // #of nibbles required to represent the long value. + final int nnibbles = LongPacker.getNibbleLength( v ); + + /* + * Is [nnibbles] even? (If it is even then we need to pad out an extra + * zero nibble in the first byte.) + */ + final boolean evenNibbleCount = ( nnibbles == ( ( nnibbles >> 1 ) << 1 ) ); + + // #of bytes required to represent the long value (plus the header nibble). + final int nbytes = ( ( nnibbles +1 ) >> 1 ) + (evenNibbleCount?1:0); + + int nwritten = 0; + + if( evenNibbleCount ) { + + /* + * An even nibble count requires that we pad the low nibble of the + * first byte with zeros. + */ + + // header byte. low nibble is empty. + byte b = (byte) ( nbytes << 4 ); + + pbuf[nwritten++] = b; + + // remaining bytes containing the packed value. + for( int i=(nnibbles-2)<<2; i>=0; i-=8 ) { + + b = (byte) (0xff & (v >> i)); + + pbuf[nwritten++] = b; + + } + + } else { + + /* + * An odd nibble count means that we pack the first nibble of the + * long value into the low nibble of the header byte. In this case + * the first nibble will always be the low nibble of the first + * non-zero byte in the long value (the high nibble of that byte + * must be zero since there is an odd nibble count). + */ + + byte highByte = (byte) (0xff & (v >> ((nbytes-1)*8) )); + + byte b = (byte) ( ( nbytes << 4 ) | highByte ); + + pbuf[nwritten++] = b; + + for( int i=(nnibbles-3)<<2; i>=0; i-=8 ) { + + b = (byte) (0xff & (v >> i)); + + pbuf[nwritten++] = b; + + } + + } + + buf.put(pbuf,0,nwritten); + + return nwritten; + + } + + /** * Read a byte from an {@link InputStream} ala {@link DataInput#readByte()} * * @param is @@ -369,6 +519,41 @@ } /** + * Unpack a long value from the buffer position. + * + * @param buf + * The buffer + * @param off + * The offset from which the data will be unpacked. + * @return The long value. + */ + static final public long unpackLong(final byte[] buf, int off) { + int b = buf[off++]; + int nbytes; + long l; + if ((b & 0x80) != 0) { + // high bit is set. + nbytes = 8; // use 8 bytes (this one plus the next 7). + l = b & 0x7f; // clear the high bit - the rest of the byte is the + // start value. + } else { + // high bit is clear. + nbytes = b >> 4; // nbytes is the upper nibble. (right shift one + // nibble). + l = b & 0x0f; // starting value is lower nibble (clear the upper + // nibble). + } + for (int i = 1; i < nbytes; i++) { + // Read the next byte. + b = buf[off++]; + // Shift the existing value one byte left and add into the low + // (unsigned) byte. + l = (l << 8) + (0xff & b); + } + return l; + } + + /** * Convenience method unpacks long and throws an exception if the value * exceeds {@link Integer#MAX_VALUE}. * Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -42,6 +42,7 @@ import junit.framework.TestCase2; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.BytesUtil.UnsignedByteArrayComparator; +import com.bigdata.io.LongPacker; /** * Test suite for high level operations that build variable length _unsigned_ @@ -1500,6 +1501,31 @@ } +// /* +// * Packed long integers. +// * +// * These are decodable (no loss) but negative longs are not allowed. +// */ +// public void test_packLong() { +// +// final KeyBuilder keyBuilder = new KeyBuilder(); +// +// /* +// * TODO Do loop, appending into the buffer. Then do decode of each +// * packed value in turn. +// */ +// final long v = 1; +// final int off = keyBuilder.off(); +// keyBuilder.pack(1); +// final int nbytes = LongPacker.getByteLength(v); +// assertEquals("nbytes", off + nbytes, keyBuilder.off()); +// +// final long d = KeyBuilder.unpackLong(keyBuilder.array(), off, off +// + nbytes); +// assertEquals("decodedValue", v, d); +// +// } + /* * BigInteger. * @@ -1508,10 +1534,10 @@ public void test_BigInteger_ctor() { - Random r = new Random(); - - for(int i=0; i<10000; i++) { - + final Random r = new Random(); + + for (int i = 0; i < 10000; i++) { + final BigInteger v1 = BigInteger.valueOf(r.nextLong()); // Note: This DOES NOT work. @@ -2568,7 +2594,8 @@ for (BigDecimal i : a) { i = i.stripTrailingZeros(); - System.err.println("i=" + if(log.isInfoEnabled()) + log.info("i=" + i + "\t(scale=" + i.scale() Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/io/TestLongPacker.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/io/TestLongPacker.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata/src/test/com/bigdata/io/TestLongPacker.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -37,7 +37,6 @@ import junit.framework.TestCase; - /** * Test suite for packing and unpacking unsigned long integers using the * {@link DataInputBuffer} and the {@link ByteArrayBuffer}. @@ -79,9 +78,9 @@ throws IOException { - DataInputBuffer dib = new DataInputBuffer(packed); + final DataInputBuffer dib = new DataInputBuffer(packed); - long actual = dib.unpackLong(); + final long actual = dib.unpackLong(); assertEquals( "value", expected, actual ); @@ -208,46 +207,46 @@ { // Note: zero (0) is interpreted as being one nibble for our purposes. - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0x0 ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0x0 ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0x1 ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0x2 ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0x7 ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0x8 ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0xe ) ); - assertEquals( "nibbles", 1, DataOutputBuffer.getNibbleLength( 0xf ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0x1 ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0x2 ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0x7 ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0x8 ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0xe ) ); + assertEquals( "nibbles", 1, LongPacker.getNibbleLength( 0xf ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x10 ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x11 ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x12 ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x17 ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x18 ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x1e ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x1f ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x7f ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0x8f ) ); - assertEquals( "nibbles", 2, DataOutputBuffer.getNibbleLength( 0xff ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x10 ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x11 ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x12 ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x17 ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x18 ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x1e ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x1f ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x7f ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0x8f ) ); + assertEquals( "nibbles", 2, LongPacker.getNibbleLength( 0xff ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0x100 ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0x101 ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0x121 ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0x1ee ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0x1ff ) ); - assertEquals( "nibbles", 3, DataOutputBuffer.getNibbleLength( 0xfff ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0x100 ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0x101 ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0x121 ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0x1ee ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0x1ff ) ); + assertEquals( "nibbles", 3, LongPacker.getNibbleLength( 0xfff ) ); - assertEquals( "nibbles", 4, DataOutputBuffer.getNibbleLength( 0x1ff0 ) ); - assertEquals( "nibbles", 4, DataOutputBuffer.getNibbleLength( 0x7ff0 ) ); - assertEquals( "nibbles", 4, DataOutputBuffer.getNibbleLength( 0xfff0 ) ); - assertEquals( "nibbles", 4, DataOutputBuffer.getNibbleLength( 0xfff1 ) ); + assertEquals( "nibbles", 4, LongPacker.getNibbleLength( 0x1ff0 ) ); + assertEquals( "nibbles", 4, LongPacker.getNibbleLength( 0x7ff0 ) ); + assertEquals( "nibbles", 4, LongPacker.getNibbleLength( 0xfff0 ) ); + assertEquals( "nibbles", 4, LongPacker.getNibbleLength( 0xfff1 ) ); - assertEquals( "nibbles", 5, DataOutputBuffer.getNibbleLength( 0x12345 ) ); - assertEquals( "nibbles", 5, DataOutputBuffer.getNibbleLength( 0x54321 ) ); + assertEquals( "nibbles", 5, LongPacker.getNibbleLength( 0x12345 ) ); + assertEquals( "nibbles", 5, LongPacker.getNibbleLength( 0x54321 ) ); - assertEquals( "nibbles", 6, DataOutputBuffer.getNibbleLength( 0x123456 ) ); - assertEquals( "nibbles", 6, DataOutputBuffer.getNibbleLength( 0x654321 ) ); + assertEquals( "nibbles", 6, LongPacker.getNibbleLength( 0x123456 ) ); + assertEquals( "nibbles", 6, LongPacker.getNibbleLength( 0x654321 ) ); - assertEquals( "nibbles", 7, DataOutputBuffer.getNibbleLength( 0x1234567 ) ); - assertEquals( "nibbles", 7, DataOutputBuffer.getNibbleLength( 0x7654321 ) ); + assertEquals( "nibbles", 7, LongPacker.getNibbleLength( 0x1234567 ) ); + assertEquals( "nibbles", 7, LongPacker.getNibbleLength( 0x7654321 ) ); /* * Note: At 8 nibbles we have 32 bits. When the high bit is one, this @@ -255,32 +254,32 @@ * will be interpreted as a negative integer and sign extended to a * negative long. */ - assertEquals( "nibbles", 8, DataOutputBuffer.getNibbleLength( 0x12345678L ) ); - assertEquals( "nibbles", 8, DataOutputBuffer.getNibbleLength( 0x87654321L ) ); + assertEquals( "nibbles", 8, LongPacker.getNibbleLength( 0x12345678L ) ); + assertEquals( "nibbles", 8, LongPacker.getNibbleLength( 0x87654321L ) ); - assertEquals( "nibbles", 9, DataOutputBuffer.getNibbleLength( 0x123456789L ) ); - assertEquals( "nibbles", 9, DataOutputBuffer.getNibbleLength( 0x987654321L ) ); + assertEquals( "nibbles", 9, LongPacker.getNibbleLength( 0x123456789L ) ); + assertEquals( "nibbles", 9, LongPacker.getNibbleLength( 0x987654321L ) ); - assertEquals( "nibbles", 10, DataOutputBuffer.getNibbleLength( 0x123456789aL ) ); - assertEquals( "nibbles", 10, DataOutputBuffer.getNibbleLength( 0xa987654321L ) ); + assertEquals( "nibbles", 10, LongPacker.getNibbleLength( 0x123456789aL ) ); + assertEquals( "nibbles", 10, LongPacker.getNibbleLength( 0xa987654321L ) ); - assertEquals( "nibbles", 11, DataOutputBuffer.getNibbleLength( 0x123456789abL ) ); - assertEquals( "nibbles", 11, DataOutputBuffer.getNibbleLength( 0xba987654321L ) ); + assertEquals( "nibbles", 11, LongPacker.getNibbleLength( 0x123456789abL ) ); + assertEquals( "nibbles", 11, LongPacker.getNibbleLength( 0xba987654321L ) ); - assertEquals( "nibbles", 12, DataOutputBuffer.getNibbleLength( 0x123456789abcL ) ); - assertEquals( "nibbles", 12, DataOutputBuffer.getNibbleLength( 0xcba987654321L ) ); + assertEquals( "nibbles", 12, LongPacker.getNibbleLength( 0x123456789abcL ) ); + assertEquals( "nibbles", 12, LongPacker.getNibbleLength( 0xcba987654321L ) ); - assertEquals( "nibbles", 13, DataOutputBuffer.getNibbleLength( 0x123456789abcdL ) ); - assertEquals( "nibbles", 13, DataOutputBuffer.getNibbleLength( 0xdcba987654321L ) ); + assertEquals( "nibbles", 13, LongPacker.getNibbleLength( 0x123456789abcdL ) ); + assertEquals( "nibbles", 13, LongPacker.getNibbleLength( 0xdcba987654321L ) ); - assertEquals( "nibbles", 14, DataOutputBuffer.getNibbleLength( 0x123456789abcdeL ) ); - assertEquals( "nibbles", 14, DataOutputBuffer.getNibbleLength( 0xedcba987654321L ) ); + assertEquals( "nibbles", 14, LongPacker.getNibbleLength( 0x123456789abcdeL ) ); + assertEquals( "nibbles", 14, LongPacker.getNibbleLength( 0xedcba987654321L ) ); - assertEquals( "nibbles", 15, DataOutputBuffer.getNibbleLength( 0x123456789abcdefL ) ); - assertEquals( "nibbles", 15, DataOutputBuffer.getNibbleLength( 0xfedcba987654321L ) ); + assertEquals( "nibbles", 15, LongPacker.getNibbleLength( 0x123456789abcdefL ) ); + assertEquals( "nibbles", 15, LongPacker.getNibbleLength( 0xfedcba987654321L ) ); - assertEquals( "nibbles", 16, DataOutputBuffer.getNibbleLength( 0x1234567890abcdefL ) ); - assertEquals( "nibbles", 16, DataOutputBuffer.getNibbleLength( 0xfedcba0987654321L ) ); + assertEquals( "nibbles", 16, LongPacker.getNibbleLength( 0x1234567890abcdefL ) ); + assertEquals( "nibbles", 16, LongPacker.getNibbleLength( 0xfedcba0987654321L ) ); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -43,6 +43,7 @@ import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.io.LongPacker; import com.bigdata.rdf.internal.impl.AbstractIV; import com.bigdata.rdf.internal.impl.AbstractInlineIV; import com.bigdata.rdf.internal.impl.BlobIV; @@ -71,6 +72,7 @@ import com.bigdata.rdf.internal.impl.uri.VocabURIShortIV; import com.bigdata.rdf.lexicon.BlobsIndexHelper; import com.bigdata.rdf.lexicon.ITermIndexCodes; +import com.bigdata.rdf.lexicon.TermIdEncoder; import com.bigdata.rdf.model.BigdataBNode; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataURI; @@ -91,6 +93,18 @@ // private static final transient Logger log = Logger.getLogger(IVUtility.class); + /** + * When <code>true</code>, we will pack term identifiers using + * {@link LongPacker}. + * <p> + * Note: This option requires that term identifiers are non-negative. That + * is not currently true for the cluster due to the {@link TermIdEncoder}. + * + * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/529"> + * Improve load performance </a> + */ + public static final boolean PACK_TIDS = false; + public static boolean equals(final IV iv1, final IV iv2) { // same IV or both null @@ -386,7 +400,12 @@ */ // decode the term identifier. - final long termId = KeyBuilder.decodeLong(key, o); + final long termId; + if(PACK_TIDS) { + termId = LongPacker.unpackLong(key, o); + } else { + termId = KeyBuilder.decodeLong(key, o); + } if (termId == TermId.NULL) { if(nullIsNullRef) { Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/TermId.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/TermId.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/impl/TermId.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -29,9 +29,12 @@ import org.openrdf.model.Value; import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.io.LongPacker; import com.bigdata.rawstore.Bytes; import com.bigdata.rdf.internal.DTE; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.model.BigdataValue; @@ -228,15 +231,23 @@ return (int) (termId ^ (termId >>> 32)); } - + public int byteLength() { - return 1 + Bytes.SIZEOF_LONG; + if (IVUtility.PACK_TIDS) { + + return 1 + LongPacker.getByteLength(termId); + + } else { + + return 1 + Bytes.SIZEOF_LONG; + + } } @Override - public int _compareTo(IV o) { + public int _compareTo(final IV o) { final long termId2 = ((TermId<?>) o).termId; @@ -250,7 +261,17 @@ // First emit the flags byte. keyBuilder.appendSigned(flags()); - keyBuilder.append(getTermId()); + if (IVUtility.PACK_TIDS) { + + // variable length encoding + ((KeyBuilder) keyBuilder).pack(termId); + + } else { + + // fixed length encoding. + keyBuilder.append(termId); + + } return keyBuilder; Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -135,7 +135,7 @@ public class LexiconRelation extends AbstractRelation<BigdataValue> implements IDatatypeURIResolver { - final static Logger log = Logger.getLogger(LexiconRelation.class); + private final static Logger log = Logger.getLogger(LexiconRelation.class); private final Set<String> indexNames; Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/Term2IdTupleSerializer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/Term2IdTupleSerializer.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/Term2IdTupleSerializer.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -97,7 +97,10 @@ */ // super(keyBuilderFactory); super(keyBuilderFactory, getDefaultLeafKeysCoder(), - new com.bigdata.btree.raba.codec.FixedLengthValueRabaCoder(9)); + IVUtility.PACK_TIDS // + ? new com.bigdata.btree.raba.codec.SimpleRabaCoder()// + : new com.bigdata.btree.raba.codec.FixedLengthValueRabaCoder(9)// + ); } Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestTermIV.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestTermIV.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/internal/TestTermIV.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -164,16 +164,28 @@ public void test_TermId_URI_Counter_MINUS_ONE() { - doTermIVTest(VTE.URI,-1L); + if (!IVUtility.PACK_TIDS) + doTermIVTest(VTE.URI, -1L); } public void test_TermId_URI_Counter_MIN_VALUE() { - doTermIVTest(VTE.URI, Long.MIN_VALUE); + if (!IVUtility.PACK_TIDS) + doTermIVTest(VTE.URI, Long.MIN_VALUE); } + /* + * Note: This is hitting odd fence posts having to do with equality and + * mock IVs. + */ +// public void test_TermId_URI_Counter_ZERO() { +// +// doTermIVTest(VTE.URI, 0); +// +// } + public void test_TermId_URI_Counter_MAX_VALUE() { doTermIVTest(VTE.URI, Long.MAX_VALUE); @@ -190,7 +202,8 @@ for (VTE vte : VTE.values()) { // 64 bit random term identifier. - final long termId = r.nextLong(); + final long termId = IVUtility.PACK_TIDS ? Math.abs(r.nextLong()) + : r.nextLong(); final TermId<?> v = new TermId<BigdataValue>(vte, termId); Modified: branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestId2TermTupleSerializer.java =================================================================== --- branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestId2TermTupleSerializer.java 2012-04-12 11:06:36 UTC (rev 6278) +++ branches/BIGDATA_RELEASE_1_2_0/bigdata-rdf/src/test/com/bigdata/rdf/lexicon/TestId2TermTupleSerializer.java 2012-04-12 15:27:30 UTC (rev 6279) @@ -34,6 +34,7 @@ import junit.framework.TestCase2; import com.bigdata.btree.BytesUtil; import com.bigdata.io.SerializerUtil; +import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.impl.TermId; import com.bigdata.rdf.model.BigdataURI; @@ -67,9 +68,16 @@ final Id2TermTupleSerializer fixture = new Id2TermTupleSerializer( namespace, BigdataValueFactoryImpl.getInstance(namespace)); - final TermId<?> id1 = new TermId<BigdataURI>(VTE.URI, -1); - final TermId<?> id2 = new TermId<BigdataURI>(VTE.URI, 0); - final TermId<?> id3 = new TermId<BigdataURI>(VTE.URI, 1); + final TermId<?> id1, id2, id3; + if (IVUtility.PACK_TIDS) { + id1 = new TermId<BigdataURI>(VTE.URI, 0); + id2 = new TermId<BigdataURI>(VTE.URI, 1); + id3 = new TermId<BigdataURI>(VTE.URI, 2); + } else { + id1 = new TermId<BigdataURI>(VTE.URI, -1); + id2 = new TermId<BigdataURI>(VTE.URI, 0); + id3 = new TermId<BigdataURI>(VTE.URI, 1); + } final byte[] k1 = fixture.id2key(id1); final byte[] k2 = fixture.id2key(id2); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |