|
From: <tho...@us...> - 2010-07-26 14:24:25
|
Revision: 3288
http://bigdata.svn.sourceforge.net/bigdata/?rev=3288&view=rev
Author: thompsonbry
Date: 2010-07-26 14:24:18 +0000 (Mon, 26 Jul 2010)
Log Message:
-----------
Working through xsd:decimal issues with MartynC.
Moved much of the byteLength() logic for xsd:integer and xsd:decimal onto KeyBuilder.
Modified Paths:
--------------
branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java
branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java
branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java
branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java
Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java
===================================================================
--- branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-26 02:16:07 UTC (rev 3287)
+++ branches/LEXICON_REFACTOR_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-07-26 14:24:18 UTC (rev 3288)
@@ -247,7 +247,7 @@
* {@link #buf buffer} may be grown by this operation but it will not be
* truncated.
* <p>
- * This operation is equivilent to
+ * This operation is equivalent to
*
* <pre>
* ensureCapacity(this.len + len)
@@ -945,28 +945,57 @@
}
- // The encoding ot a BigDecimal requires the expression
- // of scale and length
- // BigDecimal.scale indicates the precision of the number, where
- // '3' is three decimal places and '-3' rounded to '000'
- // BigDecimal.precision() is the number of unscaled digits
- // therefore the precision - scale is an expression of the
- // exponent of the normalised number.
- // This means that the exponent could be zero or negative so the sign of the
- // number cannot be indicated by adding to the exponent.
- // Instead an explicit sign byte,'0' or '1' is used.
- // The actual BigDecimal serialization uses the String conversions
- // supported by BigDecimal, less the '-' sign character if applicable.
- // The length of this data is terminated by a zero byte.
- //
- // The variable encoding of BigNumbers requires this String representation
- // and negative representations are further encoded using flipDigits
- // for the equivalent of 2s compliment negative representation.
-
+ /**
+ * Return the #of bytes in the unsigned byte[] representation of the
+ * {@link BigInteger} value.
+ *
+ * @param value
+ * The {@link BigInteger} value.
+ *
+ * @return The byte length of its unsigned byte[] representation.
+ */
+ static public int byteLength(final BigInteger value) {
+
+ return 2/* runLength */+ (value.bitLength() / 8 + 1)/* data */;
+
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>
+ * Note: Precision is NOT preserved by this encoding.
+ * <h2>Implementation details</h2>
+ * The encoding to a BigDecimal requires the expression of scale and length
+ * {@link BigDecimal#scale()} indicates the precision of the number, where
+ * '3' is three decimal places and '-3' rounded to '000'
+ * {@link BigDecimal#precision()} is the number of unscaled digits therefore
+ * <code>precision - scale</code> is an expression of the exponent of the
+ * normalized number. This means that the exponent could be zero or negative
+ * so the sign of the number cannot be indicated by adding to the exponent.
+ * Instead an explicit sign byte,'0' or '1' is used. The actual
+ * {@link BigDecimal} serialization uses the {@link String} conversions
+ * supported by {@link BigDecimal}, less the '-' sign character if
+ * applicable. The length of this data is terminated by a trailing byte. The
+ * value of that byte depends on the sign of the original {@link BigDecimal}
+ * and is used to impose the correct sort order on negative
+ * {@link BigDecimal} values which differ only in the digits in the decimal
+ * portion.
+ *<p>
+ * The variable encoding of BigNumbers requires this String representation
+ * and negative representations are further encoded using
+ * {@link #flipDigits(String)} for the equivalent of 2s compliment negative
+ * representation.
+ *
+ * @todo document the handling of trailing zeros. these are normalized out
+ * as they reflect a quality of precision which is not preserved by
+ * the corresponding sort key.
+ *
+ * @see #decodeBigDecimal(int, byte[])
+ */
public KeyBuilder append(final BigDecimal d) {
- int sign = d.signum();
- int precision = d.precision();
- int scale = d.scale();
+ final int sign = d.signum();
+ final int precision = d.precision();
+ final int scale = d.scale();
int exponent = precision - scale;
if (sign == -1) {
exponent = -exponent;
@@ -975,16 +1004,43 @@
append((byte) sign);
append(exponent);
+ // Note: coded as digits
String unscaledStr = d.unscaledValue().toString();
if (sign == -1) {
unscaledStr = flipDigits(unscaledStr);
}
appendASCII(unscaledStr); // the unscaled BigInteger representation
- append((byte) 0);
+ // Note: uses unsigned 255 if negative and unsigned 0 if positive.
+ append(sign == -1 ? (byte) Byte.MAX_VALUE: (byte) 0);
return this;
}
+ /**
+ * Return the #of bytes in the unsigned byte[] representation of the
+ * {@link BigDecimal} value.
+ *
+ * @param value
+ * The {@link BigDecimal} value.
+ *
+ * @return The byte length of its unsigned byte[] representation.
+ */
+ static public int byteLength(final BigDecimal value) {
+
+ // FIXME Make sure to normalize trailing zeros first if necessary.
+ final int dataLen = value.unscaledValue().toString().length();
+
+ final int byteLength =
+ + 1 /* sign */
+ + 4 /* exponent */
+ + dataLen /* data */
+ + 1 /* termination byte */
+ ;
+
+ return byteLength;
+
+ }
+
/*
* static helper methods.
*/
@@ -1420,47 +1476,73 @@
}
- private static final byte eos = decodeByte(0);
- private static final byte negSign = decodeByte(-1);
-
- // FIXME decodeBigDecimal(int, byte[])
+ /**
+ * Decodes a {@link BigDecimal} key, returning a byte[] which may be used to
+ * construct a {@link BigDecimal} having the decoded value.
+ *
+ * The number of bytes consumed by the key component is
+ * <code>2 + runLength</2>. The
+ * <code>2</code> is a fixed length field coding the signum of the value and
+ * its runLength. The length of the returned array is the runLength of the
+ * variable length portion of the value.
+ *
+ * This method may be used to scan through a key containing
+ * {@link BigDecimal} components.
+ *
+ * @param offset
+ * The offset of the start of the {@link BigDecimal} in the key.
+ * @param key
+ * The key.
+ * @return The byte[] to be passed to {@link BigDecimal#BigInteger(byte[])}.
+ *
+ * @todo update javadoc
+ *
+ * FIXME We need a version which has all the metadata to support scanning
+ * through a key as well as one that does a simple decode.
+ */
static public BigDecimal decodeBigDecimal(final int offset, final byte[] key) {
int curs = offset;
- byte sign = key[curs++];
+ final byte sign = key[curs++];
int exponent = decodeInt(key, curs);
- boolean neg = sign == negSign;
+ final boolean neg = sign == negSign;
if (neg) {
exponent = -exponent;
}
curs += 4;
int len = 0;
- for (int i = curs; key[i] != eos; i++) len++;
+ for (int i = curs; key[i] != (neg ? eos2 : eos); i++) len++;
String unscaledStr = decodeASCII(key, curs, len);
if (neg) {
unscaledStr = flipDigits(unscaledStr);
}
- BigInteger unscaled = new BigInteger(unscaledStr);
+ final BigInteger unscaled = new BigInteger(unscaledStr);
- int precision = len;
- int scale = precision - exponent - (neg ? 1 : 0);
+ final int precision = len;
+ final int scale = precision - exponent - (neg ? 1 : 0);
- BigDecimal ret = new BigDecimal(unscaled, scale);
+ final BigDecimal ret = new BigDecimal(unscaled, scale);
return ret; // relative scale adjustment
}
- static final char[] flipMap = {'0', '1', '2', '3', '4',
+ private static final byte eos = decodeByte(0);
+ private static final byte eos2 = decodeByte(Byte.MAX_VALUE);
+ private static final byte negSign = decodeByte(-1);
+
+ private static final char[] flipMap = {'0', '1', '2', '3', '4',
'5', '6', '7', '8', '9'
};
-
- /*
- * Flip numbers such that 0/9,1/8,2/7,3/6,4/5
+
+ /**
+ * Flip numbers such that <code>0/9,1/8,2/7,3/6,4/5</code> - this is the
+ * equivalent of a two-complement representation for the base 10 character
+ * digits.
*/
- static private String flipDigits(String str) {
- char[] chrs = str.toCharArray();
+ static private String flipDigits(final String str) {
+ final char[] chrs = str.toCharArray();
for (int i = 0; i < chrs.length; i++) {
- int flip = '9' - chrs[i];
+ final int flip = '9' - chrs[i];
if (flip >= 0 && flip < 10) {
chrs[i] = flipMap[flip];
}
Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java
===================================================================
--- branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2010-07-26 02:16:07 UTC (rev 3287)
+++ branches/LEXICON_REFACTOR_BRANCH/bigdata/src/test/com/bigdata/btree/keys/TestKeyBuilder.java 2010-07-26 14:24:18 UTC (rev 3288)
@@ -1995,8 +1995,52 @@
}
}
-
+
/**
+ * Test with positive and negative {@link BigInteger}s having a common
+ * prefix with varying digits after the prefix.
+ */
+ public void test_BigInteger_sortOrder() {
+
+ final BigInteger p1 = new BigInteger("15");
+ final BigInteger p2 = new BigInteger("151");
+ final BigInteger m1 = new BigInteger("-15");
+ final BigInteger m2 = new BigInteger("-151");
+
+ doEncodeDecodeTest(p1);
+ doEncodeDecodeTest(p2);
+ doEncodeDecodeTest(m1);
+ doEncodeDecodeTest(m2);
+
+ doLTTest(p1, p2); // 15 LT 151
+ doLTTest(m1, p1); // -15 LT 15
+ doLTTest(m2, m1); // -151 LT -15
+
+ }
+
+ /**
+ * Test with positive and negative {@link BigDecimal}s having varying
+ * digits after the decimals.
+ */
+ public void test_BigDecimal_negativeSortOrder() {
+
+ final BigDecimal p1 = new BigDecimal("1.5");
+ final BigDecimal p2 = new BigDecimal("1.51");
+ final BigDecimal m1 = new BigDecimal("-1.5");
+ final BigDecimal m2 = new BigDecimal("-1.51");
+
+ doEncodeDecodeTest(p1);
+ doEncodeDecodeTest(p2);
+ doEncodeDecodeTest(m1);
+ doEncodeDecodeTest(m2);
+
+ doLTTest(p1, p2); // 1.5 LT 1.51
+ doLTTest(m1, p1); // -1.5 LT 1.5
+ doLTTest(m2, m1); // -1.51 LT -1.5
+
+ }
+
+ /**
* Stress test with random byte[]s from which we then construct
* {@link BigInteger}s.
*/
@@ -2074,12 +2118,8 @@
/**
* Stress test with random byte[]s from which we then construct
* {@link BigDecimal}s.
- *
- * FIXME: At present the comparisons fail proably due to a failure to
- * understand the limits of the BigDecimal representations - so this
- * test has been deactivated by name prefix.
*/
- public void badTest_BigDecimal_stress_byteArray_values() {
+ public void test_BigDecimal_stress_byteArray_values() {
final Random r = new Random();
@@ -2211,48 +2251,48 @@
// return key;
//
// }
-//
-// /**
-// * Normalize the {@link BigDecimal} by setting the scale such that there are
-// * no digits before the decimal point.
-// *
-// * FIXME This fails for "0" and "0.0". The trailing .0 is considered a
-// * significant digit and is not being stripped. We need to also strip
-// * trailing zeros which are significant.
-// *
-// * <pre>
-// * i=0 (scale=0,prec=1) : 0, scale=0, precision=1, unscaled=0, unscaled_byte[]=[0]
-// * i=0.0 (scale=1,prec=1) : 0.0, scale=1, precision=1, unscaled=0, unscaled_byte[]=[0]
-// * </pre>
-// */
-// private BigDecimal normalizeBigDecimal(final BigDecimal i) {
-//
-// return i.stripTrailingZeros();
-//
-// }
-//
-// /**
-// * Dumps out interesting bits of the {@link BigDecimal} state.
-// *
-// * @return The dump.
-// */
-// private String dumpBigDecimal(final BigDecimal i) {
-//
-// final BigInteger unscaled = i.unscaledValue();
-//
-// final String msg = i.toString() + ", scale=" + i.scale()
-// + //
-// ", precision=" + i.precision()
-// + //
-// ", unscaled=" + unscaled
-// + //
-// ", unscaled_byte[]="
-// + BytesUtil.toString(unscaled.toByteArray())//
-// ;
-//
-// return msg;
-//
-// }
+
+ /**
+ * Normalize the {@link BigDecimal} by setting the scale such that there are
+ * no digits before the decimal point.
+ *
+ * FIXME This fails for "0" and "0.0". The trailing .0 is considered a
+ * significant digit and is not being stripped. We need to also strip
+ * trailing zeros which are significant.
+ *
+ * <pre>
+ * i=0 (scale=0,prec=1) : 0, scale=0, precision=1, unscaled=0, unscaled_byte[]=[0]
+ * i=0.0 (scale=1,prec=1) : 0.0, scale=1, precision=1, unscaled=0, unscaled_byte[]=[0]
+ * </pre>
+ */
+ private BigDecimal normalizeBigDecimal(final BigDecimal i) {
+
+ return i.stripTrailingZeros();
+
+ }
+
+ /**
+ * Dumps out interesting bits of the {@link BigDecimal} state.
+ *
+ * @return The dump.
+ */
+ private String dumpBigDecimal(final BigDecimal i) {
+
+ final BigInteger unscaled = i.unscaledValue();
+
+ final String msg = i.toString() + ", scale=" + i.scale()
+ + //
+ ", precision=" + i.precision()
+ + //
+ ", unscaled=" + unscaled
+ + //
+ ", unscaled_byte[]="
+ + BytesUtil.toString(unscaled.toByteArray())//
+ ;
+
+ return msg;
+
+ }
//
// /**
// * Note: must have normalized representation of the BigDecimal to do
@@ -2308,101 +2348,101 @@
//
// }
//
-// private enum CompareEnum {
-//
-// LT(-1), EQ(0), GT(1);
-//
-// private CompareEnum(final int ret) {
-// this.ret = ret;
-// }
-//
-// private int ret;
-//
-// static public CompareEnum valueOf(final int ret) {
-// if(ret<0) return LT;
-// if(ret>0) return GT;
-// return EQ;
-// }
-//
-// }
-//
-// protected void doCompareTest(BigDecimal i1, BigDecimal i2, final CompareEnum cmp) {
-//
-// i1 = normalizeBigDecimal(i1);
-// i2 = normalizeBigDecimal(i2);
-//
-// final byte[] k1 = encodeBigDecimal(i1);
-//
-// final byte[] k2 = encodeBigDecimal(i2);
-//
-// final int ret = BytesUtil.compareBytes(k1, k2);
-//
-// final CompareEnum cmp2 = CompareEnum.valueOf(ret);
-//
-// if (cmp2 != cmp) {
-//
-// fail("BigDecimal" + //
-// "\ni1=" + dumpBigDecimal(i1) + //
-// "\ni2=" + dumpBigDecimal(i2) + //
-// "\nk1=" + BytesUtil.toString(k1) + //
-// "\nk2=" + BytesUtil.toString(k2) + //
-// "\nret=" + cmp2 +", but expected="+cmp//
-// );
-//
-// }
-//
-// }
-//
-// /**
-// * Test encode/decode for various values of zero.
-// */
-// public void test_BigDecimal0() {
-//
-// final BigDecimal[] a = new BigDecimal[] {
-// new BigDecimal("0"), // scale=0, precision=1
-// new BigDecimal("0."), // scale=0, precision=1
-// new BigDecimal("0.0"), // scale=1, precision=1
-// new BigDecimal("0.00"), // scale=2, precision=1
-// new BigDecimal("00.0"), // scale=1, precision=1
-// new BigDecimal("00.00"),// scale=2, precision=1
-// new BigDecimal(".0"), // scale=1, precision=1
-// // NB: The precision is the #of decimal digits in the unscaled value.
-// // NB: scaled := unscaled * (10 ^ -scale)
-// new BigDecimal(".010"), // scale=3, precision=2
-// new BigDecimal(".01"), // scale=2, precision=1
-// new BigDecimal(".1"), // scale=1, precision=1
-// new BigDecimal("1."), // scale=0, precision=1
-// new BigDecimal("10."), // scale=0, precision=2
-// new BigDecimal("10.0"), // scale=1, precision=3
-// new BigDecimal("010.0"), // scale=1, precision=3
-// new BigDecimal("0010.00"), // scale=2, precision=4
-// // @todo Test with cases where scale is negative (large powers of 10).
-// };
-//
-// for (BigDecimal i : a) {
-// i = i.stripTrailingZeros();
-// System.err.println("i="
-// + i
-// + "\t(scale="
-// + i.scale()
-// + ",prec="
-// + i.precision()
-// + ") : "
-// + dumpBigDecimal(i)
-//// i.scaleByPowerOfTen(i.scale()- i.precision()))
-// );
-// }
-//
-// for (BigDecimal i : a) {
-// doEncodeDecodeTest(i);
-// }
-//
-// for (int i = 0; i < a.length; i++) {
-// for (int j = 0; j < a.length; j++) {
-// doCompareTest(a[i], a[j], CompareEnum.EQ);
-// }
-// }
-//
-// }
+ private enum CompareEnum {
+
+ LT(-1), EQ(0), GT(1);
+
+ private CompareEnum(final int ret) {
+ this.ret = ret;
+ }
+
+ private int ret;
+
+ static public CompareEnum valueOf(final int ret) {
+ if(ret<0) return LT;
+ if(ret>0) return GT;
+ return EQ;
+ }
+
+ }
+
+ protected void doCompareTest(BigDecimal i1, BigDecimal i2, final CompareEnum cmp) {
+
+ i1 = normalizeBigDecimal(i1);
+ i2 = normalizeBigDecimal(i2);
+
+ final byte[] k1 = encodeBigDecimal(i1);
+
+ final byte[] k2 = encodeBigDecimal(i2);
+
+ final int ret = BytesUtil.compareBytes(k1, k2);
+
+ final CompareEnum cmp2 = CompareEnum.valueOf(ret);
+
+ if (cmp2 != cmp) {
+
+ fail("BigDecimal" + //
+ "\ni1=" + dumpBigDecimal(i1) + //
+ "\ni2=" + dumpBigDecimal(i2) + //
+ "\nk1=" + BytesUtil.toString(k1) + //
+ "\nk2=" + BytesUtil.toString(k2) + //
+ "\nret=" + cmp2 +", but expected="+cmp//
+ );
+
+ }
+
+ }
+
+ /**
+ * Test encode/decode for various values of zero.
+ */
+ public void test_BigDecimal0() {
+
+ final BigDecimal[] a = new BigDecimal[] {
+ new BigDecimal("0"), // scale=0, precision=1
+ new BigDecimal("0."), // scale=0, precision=1
+ new BigDecimal("0.0"), // scale=1, precision=1
+ new BigDecimal("0.00"), // scale=2, precision=1
+ new BigDecimal("00.0"), // scale=1, precision=1
+ new BigDecimal("00.00"),// scale=2, precision=1
+ new BigDecimal(".0"), // scale=1, precision=1
+ // NB: The precision is the #of decimal digits in the unscaled value.
+ // NB: scaled := unscaled * (10 ^ -scale)
+ new BigDecimal(".010"), // scale=3, precision=2
+ new BigDecimal(".01"), // scale=2, precision=1
+ new BigDecimal(".1"), // scale=1, precision=1
+ new BigDecimal("1."), // scale=0, precision=1
+ new BigDecimal("10."), // scale=0, precision=2
+ new BigDecimal("10.0"), // scale=1, precision=3
+ new BigDecimal("010.0"), // scale=1, precision=3
+ new BigDecimal("0010.00"), // scale=2, precision=4
+ // @todo Test with cases where scale is negative (large powers of 10).
+ };
+
+ for (BigDecimal i : a) {
+ i = i.stripTrailingZeros();
+ System.err.println("i="
+ + i
+ + "\t(scale="
+ + i.scale()
+ + ",prec="
+ + i.precision()
+ + ") : "
+ + dumpBigDecimal(i)
+// i.scaleByPowerOfTen(i.scale()- i.precision()))
+ );
+ }
+
+ for (BigDecimal i : a) {
+ doEncodeDecodeTest(i);
+ }
+
+ for (int i = 0; i < a.length; i++) {
+ for (int j = 0; j < a.length; j++) {
+ doCompareTest(a[i], a[j], CompareEnum.EQ);
+ }
+ }
+
+ }
}
Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java
===================================================================
--- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java 2010-07-26 02:16:07 UTC (rev 3287)
+++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDDecimalIV.java 2010-07-26 14:24:18 UTC (rev 3288)
@@ -27,7 +27,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
-import com.bigdata.rawstore.Bytes;
+import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataValueFactory;
@@ -137,23 +137,18 @@
}
public int byteLength() {
+
if (byteLength == 0) {
/*
* Cache the byteLength if not yet set.
*/
- int dataLen = value.unscaledValue().toString().length();
-
- byteLength =
- 1 /* flags */
- + 1 /* sign */
- + 4 /* exponent */
- + dataLen
- + 1 /* data and null termination */;
+ byteLength = 1 /* flags */ + KeyBuilder.byteLength(value);
}
return byteLength;
+
}
@Override
Modified: branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java
===================================================================
--- branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java 2010-07-26 02:16:07 UTC (rev 3287)
+++ branches/LEXICON_REFACTOR_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/XSDIntegerIV.java 2010-07-26 14:24:18 UTC (rev 3288)
@@ -27,6 +27,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
+import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataValueFactory;
@@ -143,7 +144,7 @@
* Cache the byteLength if not yet set.
*/
- byteLength = 1 /* prefix */+ 2/* runLength */+ (value.bitLength() / 8 + 1)/* data */;
+ byteLength = 1 /* prefix */+ KeyBuilder.byteLength(value);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|