From: <tho...@us...> - 2010-07-26 14:24:25
|
Revision: 3288 http://bigdata.svn.sourceforge.net/bigdata/?rev=3288&view=rev Author: thompsonbry Date: 2010-07-26 14:24:18 +0000 (Mon, 26 Jul 2010) Log Message: ----------- Working through xsd:decimal issues with MartynC. Moved much of the byteLength() logic for xsd:integer and xsd:decimal onto KeyBuilder. Modified Paths: -------------- branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-26 02:16:07 UTC (rev 3287) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-26 14:24:18 UTC (rev 3288) @@ -247,7 +247,7 @@ * {@link #buf buffer} may be grown by this operation but it will not be * truncated. * <p> - * This operation is equivilent to + * This operation is equivalent to * * <pre> * ensureCapacity(this.len + len) @@ -945,28 +945,57 @@ } - // The encoding ot a BigDecimal requires the expression - // of scale and length - // BigDecimal.scale indicates the precision of the number, where - // '3' is three decimal places and '-3' rounded to '000' - // BigDecimal.precision() is the number of unscaled digits - // therefore the precision - scale is an expression of the - // exponent of the normalised number. - // This means that the exponent could be zero or negative so the sign of the - // number cannot be indicated by adding to the exponent. - // Instead an explicit sign byte,'0' or '1' is used. - // The actual BigDecimal serialization uses the String conversions - // supported by BigDecimal, less the '-' sign character if applicable. - // The length of this data is terminated by a zero byte. - // - // The variable encoding of BigNumbers requires this String representation - // and negative representations are further encoded using flipDigits - // for the equivalent of 2s compliment negative representation. - + /** + * Return the #of bytes in the unsigned byte[] representation of the + * {@link BigInteger} value. + * + * @param value + * The {@link BigInteger} value. + * + * @return The byte length of its unsigned byte[] representation. + */ + static public int byteLength(final BigInteger value) { + + return 2/* runLength */+ (value.bitLength() / 8 + 1)/* data */; + + } + + /** + * {@inheritDoc} + * <p> + * Note: Precision is NOT preserved by this encoding. + * <h2>Implementation details</h2> + * The encoding to a BigDecimal requires the expression of scale and length + * {@link BigDecimal#scale()} indicates the precision of the number, where + * '3' is three decimal places and '-3' rounded to '000' + * {@link BigDecimal#precision()} is the number of unscaled digits therefore + * <code>precision - scale</code> is an expression of the exponent of the + * normalized number. This means that the exponent could be zero or negative + * so the sign of the number cannot be indicated by adding to the exponent. + * Instead an explicit sign byte,'0' or '1' is used. The actual + * {@link BigDecimal} serialization uses the {@link String} conversions + * supported by {@link BigDecimal}, less the '-' sign character if + * applicable. The length of this data is terminated by a trailing byte. The + * value of that byte depends on the sign of the original {@link BigDecimal} + * and is used to impose the correct sort order on negative + * {@link BigDecimal} values which differ only in the digits in the decimal + * portion. + *<p> + * The variable encoding of BigNumbers requires this String representation + * and negative representations are further encoded using + * {@link #flipDigits(String)} for the equivalent of 2s compliment negative + * representation. + * + * @todo document the handling of trailing zeros. these are normalized out + * as they reflect a quality of precision which is not preserved by + * the corresponding sort key. + * + * @see #decodeBigDecimal(int, byte[]) + */ public KeyBuilder append(final BigDecimal d) { - int sign = d.signum(); - int precision = d.precision(); - int scale = d.scale(); + final int sign = d.signum(); + final int precision = d.precision(); + final int scale = d.scale(); int exponent = precision - scale; if (sign == -1) { exponent = -exponent; @@ -975,16 +1004,43 @@ append((byte) sign); append(exponent); + // Note: coded as digits String unscaledStr = d.unscaledValue().toString(); if (sign == -1) { unscaledStr = flipDigits(unscaledStr); } appendASCII(unscaledStr); // the unscaled BigInteger representation - append((byte) 0); + // Note: uses unsigned 255 if negative and unsigned 0 if positive. + append(sign == -1 ? (byte) Byte.MAX_VALUE: (byte) 0); return this; } + /** + * Return the #of bytes in the unsigned byte[] representation of the + * {@link BigDecimal} value. + * + * @param value + * The {@link BigDecimal} value. + * + * @return The byte length of its unsigned byte[] representation. + */ + static public int byteLength(final BigDecimal value) { + + // FIXME Make sure to normalize trailing zeros first if necessary. + final int dataLen = value.unscaledValue().toString().length(); + + final int byteLength = + + 1 /* sign */ + + 4 /* exponent */ + + dataLen /* data */ + + 1 /* termination byte */ + ; + + return byteLength; + + } + /* * static helper methods. */ @@ -1420,47 +1476,73 @@ } - private static final byte eos = decodeByte(0); - private static final byte negSign = decodeByte(-1); - - // FIXME decodeBigDecimal(int, byte[]) + /** + * Decodes a {@link BigDecimal} key, returning a byte[] which may be used to + * construct a {@link BigDecimal} having the decoded value. + * + * The number of bytes consumed by the key component is + * <code>2 + runLength</2>. The + * <code>2</code> is a fixed length field coding the signum of the value and + * its runLength. The length of the returned array is the runLength of the + * variable length portion of the value. + * + * This method may be used to scan through a key containing + * {@link BigDecimal} components. + * + * @param offset + * The offset of the start of the {@link BigDecimal} in the key. + * @param key + * The key. + * @return The byte[] to be passed to {@link BigDecimal#BigInteger(byte[])}. + * + * @todo update javadoc + * + * FIXME We need a version which has all the metadata to support scanning + * through a key as well as one that does a simple decode. + */ static public BigDecimal decodeBigDecimal(final int offset, final byte[] key) { int curs = offset; - byte sign = key[curs++]; + final byte sign = key[curs++]; int exponent = decodeInt(key, curs); - boolean neg = sign == negSign; + final boolean neg = sign == negSign; if (neg) { exponent = -exponent; } curs += 4; int len = 0; - for (int i = curs; key[i] != eos; i++) len++; + for (int i = curs; key[i] != (neg ? eos2 : eos); i++) len++; String unscaledStr = decodeASCII(key, curs, len); if (neg) { unscaledStr = flipDigits(unscaledStr); } - BigInteger unscaled = new BigInteger(unscaledStr); + final BigInteger unscaled = new BigInteger(unscaledStr); - int precision = len; - int scale = precision - exponent - (neg ? 1 : 0); + final int precision = len; + final int scale = precision - exponent - (neg ? 1 : 0); - BigDecimal ret = new BigDecimal(unscaled, scale); + final BigDecimal ret = new BigDecimal(unscaled, scale); return ret; // relative scale adjustment } - static final char[] flipMap = {'0', '1', '2', '3', '4', + private static final byte eos = decodeByte(0); + private static final byte eos2 = decodeByte(Byte.MAX_VALUE); + private static final byte negSign = decodeByte(-1); + + private static final char[] flipMap = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; - - /* - * Flip numbers such that 0/9,1/8,2/7,3/6,4/5 + + /** + * Flip numbers such that <code>0/9,1/8,2/7,3/6,4/5</code> - this is the + * equivalent of a two-complement representation for the base 10 character + * digits. */ - static private String flipDigits(String str) { - char[] chrs = str.toCharArray(); + static private String flipDigits(final String str) { + final char[] chrs = str.toCharArray(); for (int i = 0; i < chrs.length; i++) { - int flip = '9' - chrs[i]; + final int flip = '9' - chrs[i]; if (flip >= 0 && flip < 10) { chrs[i] = flipMap[flip]; } Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2010-07-26 02:16:07 UTC (rev 3287) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2010-07-26 14:24:18 UTC (rev 3288) @@ -1995,8 +1995,52 @@ } } - + /** + * Test with positive and negative {@link BigInteger}s having a common + * prefix with varying digits after the prefix. + */ + public void test_BigInteger_sortOrder() { + + final BigInteger p1 = new BigInteger("15"); + final BigInteger p2 = new BigInteger("151"); + final BigInteger m1 = new BigInteger("-15"); + final BigInteger m2 = new BigInteger("-151"); + + doEncodeDecodeTest(p1); + doEncodeDecodeTest(p2); + doEncodeDecodeTest(m1); + doEncodeDecodeTest(m2); + + doLTTest(p1, p2); // 15 LT 151 + doLTTest(m1, p1); // -15 LT 15 + doLTTest(m2, m1); // -151 LT -15 + + } + + /** + * Test with positive and negative {@link BigDecimal}s having varying + * digits after the decimals. + */ + public void test_BigDecimal_negativeSortOrder() { + + final BigDecimal p1 = new BigDecimal("1.5"); + final BigDecimal p2 = new BigDecimal("1.51"); + final BigDecimal m1 = new BigDecimal("-1.5"); + final BigDecimal m2 = new BigDecimal("-1.51"); + + doEncodeDecodeTest(p1); + doEncodeDecodeTest(p2); + doEncodeDecodeTest(m1); + doEncodeDecodeTest(m2); + + doLTTest(p1, p2); // 1.5 LT 1.51 + doLTTest(m1, p1); // -1.5 LT 1.5 + doLTTest(m2, m1); // -1.51 LT -1.5 + + } + + /** * Stress test with random byte[]s from which we then construct * {@link BigInteger}s. */ @@ -2074,12 +2118,8 @@ /** * Stress test with random byte[]s from which we then construct * {@link BigDecimal}s. - * - * FIXME: At present the comparisons fail proably due to a failure to - * understand the limits of the BigDecimal representations - so this - * test has been deactivated by name prefix. */ - public void badTest_BigDecimal_stress_byteArray_values() { + public void test_BigDecimal_stress_byteArray_values() { final Random r = new Random(); @@ -2211,48 +2251,48 @@ // return key; // // } -// -// /** -// * Normalize the {@link BigDecimal} by setting the scale such that there are -// * no digits before the decimal point. -// * -// * FIXME This fails for "0" and "0.0". The trailing .0 is considered a -// * significant digit and is not being stripped. We need to also strip -// * trailing zeros which are significant. -// * -// * <pre> -// * i=0 (scale=0,prec=1) : 0, scale=0, precision=1, unscaled=0, unscaled_byte[]=[0] -// * i=0.0 (scale=1,prec=1) : 0.0, scale=1, precision=1, unscaled=0, unscaled_byte[]=[0] -// * </pre> -// */ -// private BigDecimal normalizeBigDecimal(final BigDecimal i) { -// -// return i.stripTrailingZeros(); -// -// } -// -// /** -// * Dumps out interesting bits of the {@link BigDecimal} state. -// * -// * @return The dump. -// */ -// private String dumpBigDecimal(final BigDecimal i) { -// -// final BigInteger unscaled = i.unscaledValue(); -// -// final String msg = i.toString() + ", scale=" + i.scale() -// + // -// ", precision=" + i.precision() -// + // -// ", unscaled=" + unscaled -// + // -// ", unscaled_byte[]=" -// + BytesUtil.toString(unscaled.toByteArray())// -// ; -// -// return msg; -// -// } + + /** + * Normalize the {@link BigDecimal} by setting the scale such that there are + * no digits before the decimal point. + * + * FIXME This fails for "0" and "0.0". The trailing .0 is considered a + * significant digit and is not being stripped. We need to also strip + * trailing zeros which are significant. + * + * <pre> + * i=0 (scale=0,prec=1) : 0, scale=0, precision=1, unscaled=0, unscaled_byte[]=[0] + * i=0.0 (scale=1,prec=1) : 0.0, scale=1, precision=1, unscaled=0, unscaled_byte[]=[0] + * </pre> + */ + private BigDecimal normalizeBigDecimal(final BigDecimal i) { + + return i.stripTrailingZeros(); + + } + + /** + * Dumps out interesting bits of the {@link BigDecimal} state. + * + * @return The dump. + */ + private String dumpBigDecimal(final BigDecimal i) { + + final BigInteger unscaled = i.unscaledValue(); + + final String msg = i.toString() + ", scale=" + i.scale() + + // + ", precision=" + i.precision() + + // + ", unscaled=" + unscaled + + // + ", unscaled_byte[]=" + + BytesUtil.toString(unscaled.toByteArray())// + ; + + return msg; + + } // // /** // * Note: must have normalized representation of the BigDecimal to do @@ -2308,101 +2348,101 @@ // // } // -// private enum CompareEnum { -// -// LT(-1), EQ(0), GT(1); -// -// private CompareEnum(final int ret) { -// this.ret = ret; -// } -// -// private int ret; -// -// static public CompareEnum valueOf(final int ret) { -// if(ret<0) return LT; -// if(ret>0) return GT; -// return EQ; -// } -// -// } -// -// protected void doCompareTest(BigDecimal i1, BigDecimal i2, final CompareEnum cmp) { -// -// i1 = normalizeBigDecimal(i1); -// i2 = normalizeBigDecimal(i2); -// -// final byte[] k1 = encodeBigDecimal(i1); -// -// final byte[] k2 = encodeBigDecimal(i2); -// -// final int ret = BytesUtil.compareBytes(k1, k2); -// -// final CompareEnum cmp2 = CompareEnum.valueOf(ret); -// -// if (cmp2 != cmp) { -// -// fail("BigDecimal" + // -// "\ni1=" + dumpBigDecimal(i1) + // -// "\ni2=" + dumpBigDecimal(i2) + // -// "\nk1=" + BytesUtil.toString(k1) + // -// "\nk2=" + BytesUtil.toString(k2) + // -// "\nret=" + cmp2 +", but expected="+cmp// -// ); -// -// } -// -// } -// -// /** -// * Test encode/decode for various values of zero. -// */ -// public void test_BigDecimal0() { -// -// final BigDecimal[] a = new BigDecimal[] { -// new BigDecimal("0"), // scale=0, precision=1 -// new BigDecimal("0."), // scale=0, precision=1 -// new BigDecimal("0.0"), // scale=1, precision=1 -// new BigDecimal("0.00"), // scale=2, precision=1 -// new BigDecimal("00.0"), // scale=1, precision=1 -// new BigDecimal("00.00"),// scale=2, precision=1 -// new BigDecimal(".0"), // scale=1, precision=1 -// // NB: The precision is the #of decimal digits in the unscaled value. -// // NB: scaled := unscaled * (10 ^ -scale) -// new BigDecimal(".010"), // scale=3, precision=2 -// new BigDecimal(".01"), // scale=2, precision=1 -// new BigDecimal(".1"), // scale=1, precision=1 -// new BigDecimal("1."), // scale=0, precision=1 -// new BigDecimal("10."), // scale=0, precision=2 -// new BigDecimal("10.0"), // scale=1, precision=3 -// new BigDecimal("010.0"), // scale=1, precision=3 -// new BigDecimal("0010.00"), // scale=2, precision=4 -// // @todo Test with cases where scale is negative (large powers of 10). -// }; -// -// for (BigDecimal i : a) { -// i = i.stripTrailingZeros(); -// System.err.println("i=" -// + i -// + "\t(scale=" -// + i.scale() -// + ",prec=" -// + i.precision() -// + ") : " -// + dumpBigDecimal(i) -//// i.scaleByPowerOfTen(i.scale()- i.precision())) -// ); -// } -// -// for (BigDecimal i : a) { -// doEncodeDecodeTest(i); -// } -// -// for (int i = 0; i < a.length; i++) { -// for (int j = 0; j < a.length; j++) { -// doCompareTest(a[i], a[j], CompareEnum.EQ); -// } -// } -// -// } + private enum CompareEnum { + + LT(-1), EQ(0), GT(1); + + private CompareEnum(final int ret) { + this.ret = ret; + } + + private int ret; + + static public CompareEnum valueOf(final int ret) { + if(ret<0) return LT; + if(ret>0) return GT; + return EQ; + } + + } + + protected void doCompareTest(BigDecimal i1, BigDecimal i2, final CompareEnum cmp) { + + i1 = normalizeBigDecimal(i1); + i2 = normalizeBigDecimal(i2); + + final byte[] k1 = encodeBigDecimal(i1); + + final byte[] k2 = encodeBigDecimal(i2); + + final int ret = BytesUtil.compareBytes(k1, k2); + + final CompareEnum cmp2 = CompareEnum.valueOf(ret); + + if (cmp2 != cmp) { + + fail("BigDecimal" + // + "\ni1=" + dumpBigDecimal(i1) + // + "\ni2=" + dumpBigDecimal(i2) + // + "\nk1=" + BytesUtil.toString(k1) + // + "\nk2=" + BytesUtil.toString(k2) + // + "\nret=" + cmp2 +", but expected="+cmp// + ); + + } + + } + + /** + * Test encode/decode for various values of zero. + */ + public void test_BigDecimal0() { + + final BigDecimal[] a = new BigDecimal[] { + new BigDecimal("0"), // scale=0, precision=1 + new BigDecimal("0."), // scale=0, precision=1 + new BigDecimal("0.0"), // scale=1, precision=1 + new BigDecimal("0.00"), // scale=2, precision=1 + new BigDecimal("00.0"), // scale=1, precision=1 + new BigDecimal("00.00"),// scale=2, precision=1 + new BigDecimal(".0"), // scale=1, precision=1 + // NB: The precision is the #of decimal digits in the unscaled value. + // NB: scaled := unscaled * (10 ^ -scale) + new BigDecimal(".010"), // scale=3, precision=2 + new BigDecimal(".01"), // scale=2, precision=1 + new BigDecimal(".1"), // scale=1, precision=1 + new BigDecimal("1."), // scale=0, precision=1 + new BigDecimal("10."), // scale=0, precision=2 + new BigDecimal("10.0"), // scale=1, precision=3 + new BigDecimal("010.0"), // scale=1, precision=3 + new BigDecimal("0010.00"), // scale=2, precision=4 + // @todo Test with cases where scale is negative (large powers of 10). + }; + + for (BigDecimal i : a) { + i = i.stripTrailingZeros(); + System.err.println("i=" + + i + + "\t(scale=" + + i.scale() + + ",prec=" + + i.precision() + + ") : " + + dumpBigDecimal(i) +// i.scaleByPowerOfTen(i.scale()- i.precision())) + ); + } + + for (BigDecimal i : a) { + doEncodeDecodeTest(i); + } + + for (int i = 0; i < a.length; i++) { + for (int j = 0; j < a.length; j++) { + doCompareTest(a[i], a[j], CompareEnum.EQ); + } + } + + } } Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java 2010-07-26 02:16:07 UTC (rev 3287) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java 2010-07-26 14:24:18 UTC (rev 3288) @@ -27,7 +27,7 @@ import java.math.BigDecimal; import java.math.BigInteger; -import com.bigdata.rawstore.Bytes; +import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataValueFactory; @@ -137,23 +137,18 @@ } public int byteLength() { + if (byteLength == 0) { /* * Cache the byteLength if not yet set. */ - int dataLen = value.unscaledValue().toString().length(); - - byteLength = - 1 /* flags */ - + 1 /* sign */ - + 4 /* exponent */ - + dataLen - + 1 /* data and null termination */; + byteLength = 1 /* flags */ + KeyBuilder.byteLength(value); } return byteLength; + } @Override Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java =================================================================== --- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java 2010-07-26 02:16:07 UTC (rev 3287) +++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java 2010-07-26 14:24:18 UTC (rev 3288) @@ -27,6 +27,7 @@ import java.math.BigDecimal; import java.math.BigInteger; +import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.rdf.model.BigdataLiteral; import com.bigdata.rdf.model.BigdataValueFactory; @@ -143,7 +144,7 @@ * Cache the byteLength if not yet set. */ - byteLength = 1 /* prefix */+ 2/* runLength */+ (value.bitLength() / 8 + 1)/* data */; + byteLength = 1 /* prefix */+ KeyBuilder.byteLength(value); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |