From: <tho...@us...> - 2013-10-11 18:44:14
|
Revision: 7453 http://bigdata.svn.sourceforge.net/bigdata/?rev=7453&view=rev Author: thompsonbry Date: 2013-10-11 18:44:06 +0000 (Fri, 11 Oct 2013) Log Message: ----------- Bug fix for #736 (MIN/MAX should use ORDER BY semantics). Workaround for #740 (NSPIN) so we can inspect this further. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/IVComparator.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMAX.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMIN.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/IVComparator.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/IVComparator.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/IVComparator.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -51,7 +51,7 @@ /** * A comparator that compares {@link IV}s according the SPARQL value ordering as - * specified in <A + * specified in <a * href="http://www.w3.org/TR/rdf-sparql-query/#modOrderBy">SPARQL Query * Language for RDF</a>. This implementation is based on the openrdf * {@link ValueComparator} but has been modified to work with {@link IV}s. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/relation/accesspath/BlockingBuffer.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -115,19 +115,28 @@ /** * The #of times that we will use {@link BlockingQueue#offer(Object)} or - * {@link Queue#poll()} before converting to the variants of those methods - * which accept a timeout. The timeouts are used to reduce the contention - * for the queue if either the producer or the consumer is lagging. + * before converting to the variant of that method which accept a timeout. + * The timeouts are used to reduce the contention for the queue if either + * the consumer is lagging. */ - private static final int NSPIN = 100; - + private static final int NSPIN_ADD = Integer.valueOf(System.getProperty( + BlockingBuffer.class.getName() + ".NSPIN.ADD", "100")); + /** + * The #of times that we will use {@link Queue#poll()} before converting to + * the variant of that method which accept a timeout. The timeouts are used + * to reduce the contention for the queue if either the producer is lagging. + */ + private static final int NSPIN_READ = Integer.valueOf(System.getProperty( + BlockingBuffer.class.getName() + ".NSPIN.READ", "100")); + + /** * The timeout for offer() or poll() as a function of the #of tries that * have already been made to {@link #add(Object)} or read a chunk. * * @param ntries * The #of tries. - * + * * @return The timeout (milliseconds). */ private static final long getTimeoutMillis(final int ntries) { @@ -1038,7 +1047,7 @@ final boolean added; - if (ntries < NSPIN) { + if (ntries < NSPIN_ADD) { // offer (non-blocking). added = queue.offer(e); @@ -1804,7 +1813,7 @@ * inside of poll(timeout,unit) [@todo This could be fixed by a poison pill.] */ - if (ntries < NSPIN) { + if (ntries < NSPIN_READ) { /* * This is basically a spin lock (it can spin without Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MAX.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -25,13 +25,11 @@ import java.util.Map; -import org.openrdf.query.algebra.Compare.CompareOp; -import org.openrdf.query.algebra.evaluation.util.ValueComparator; - import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.aggregate.AggregateBase; +import com.bigdata.bop.solutions.IVComparator; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.INeedsMaterialization; @@ -43,7 +41,7 @@ * <p> * Note: MIN (and MAX) are defined in terms of the ORDER_BY semantics for * SPARQL. Therefore, this must handle comparisons when the value is not an IV, - * e.g., using {@link ValueComparator}. + * e.g., using {@link IVComparator}. * * @author thompsonbry * @@ -58,15 +56,23 @@ */ private static final long serialVersionUID = 1L; - public MAX(MAX op) { + /** + * Provides SPARQL <em>ORDER BY</em> semantics. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/736"> + * MIN() malfunction </a> + */ + private static final transient IVComparator comparator = new IVComparator(); + + public MAX(final MAX op) { super(op); } - public MAX(BOp[] args, Map<String, Object> annotations) { + public MAX(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); } - public MAX(boolean distinct, IValueExpression...expr) { + public MAX(final boolean distinct, final IValueExpression...expr) { super(distinct, expr); } @@ -103,46 +109,44 @@ } private IV doGet(final IBindingSet bindingSet) { + for (int i = 0; i < arity(); i++) { final IValueExpression<IV<?, ?>> expr = (IValueExpression<IV<?, ?>>) get(i); - final IV iv = expr.get(bindingSet); + final IV iv = expr.get(bindingSet); - if (iv != null) { + if (iv != null) { - /* - * Aggregate non-null values. - */ + /* + * Aggregate non-null values. + */ - if (max == null) { + if (max == null) { - max = iv; + max = iv; - } else { + } else { - /** - * FIXME This needs to use the ordering define by ORDER_BY. The - * CompareBOp imposes the ordering defined for the "<" operator - * which is less robust and will throw a type exception if you - * attempt to compare unlike Values. - * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/300#comment:5 - */ - if (CompareBOp.compare(iv, max, CompareOp.GT)) { + // Note: This is SPARQL GT semantics, not ORDER BY. +// if (CompareBOp.compare(iv, max, CompareOp.GT)) { - max = iv; + // SPARQL ORDER_BY semantics + if (comparator.compare(iv, max) > 0) { + max = iv; + + } + } } - } - } return max; } + @Override synchronized public void reset() { max = null; @@ -172,6 +176,7 @@ * * FIXME MikeP: What is the right return value here? */ + @Override public Requirement getRequirement() { return INeedsMaterialization.Requirement.ALWAYS; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/aggregate/MIN.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -25,20 +25,14 @@ import java.util.Map; -import org.apache.log4j.Logger; -import org.openrdf.query.algebra.Compare.CompareOp; -import org.openrdf.query.algebra.evaluation.util.ValueComparator; - import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpBase; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.aggregate.AggregateBase; -import com.bigdata.bop.aggregate.IAggregate; +import com.bigdata.bop.solutions.IVComparator; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.constraints.CompareBOp; import com.bigdata.rdf.internal.constraints.INeedsMaterialization; -import com.bigdata.rdf.internal.constraints.INeedsMaterialization.Requirement; /** * Operator reports the minimum observed value over the presented binding sets @@ -47,11 +41,11 @@ * <p> * Note: MIN (and MAX) are defined in terms of the ORDER_BY semantics for * SPARQL. Therefore, this must handle comparisons when the value is not an IV, - * e.g., using {@link ValueComparator}. - * + * e.g., using the {@link IVComparator}. + * * @author thompsonbry - * - * TODO What is reported if there are no non-null observations? + * + * TODO What is reported if there are no non-null observations? */ public class MIN extends AggregateBase<IV> implements INeedsMaterialization{ @@ -62,15 +56,23 @@ */ private static final long serialVersionUID = 1L; - public MIN(MIN op) { + /** + * Provides SPARQL <em>ORDER BY</em> semantics. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/736"> + * MIN() malfunction </a> + */ + private static final transient IVComparator comparator = new IVComparator(); + + public MIN(final MIN op) { super(op); } - public MIN(BOp[] args, Map<String, Object> annotations) { + public MIN(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); } - public MIN(boolean distinct, IValueExpression...expr) { + public MIN(final boolean distinct, final IValueExpression...expr) { super(distinct, expr); } @@ -107,46 +109,44 @@ } private IV doGet(final IBindingSet bindingSet) { - for(int i=0;i<arity();i++){ + for (int i = 0; i < arity(); i++) { + final IValueExpression<IV> expr = (IValueExpression<IV>) get(i); - final IV iv = expr.get(bindingSet); + final IV iv = expr.get(bindingSet); - if (iv != null) { + if (iv != null) { - /* - * Aggregate non-null values. - */ + /* + * Aggregate non-null values. + */ - if (min == null) { + if (min == null) { - min = iv; + min = iv; - } else { + } else { - /** - * FIXME This needs to use the ordering define by ORDER_BY. The - * CompareBOp imposes the ordering defined for the "<" operator - * which is less robust and will throw a type exception if you - * attempt to compare unlike Values. - * - * @see https://sourceforge.net/apps/trac/bigdata/ticket/300#comment:5 - */ - if (CompareBOp.compare(iv, min, CompareOp.LT)) { + // Note: This is SPARQL LT semantics, not ORDER BY. +// if (CompareBOp.compare(iv, min, CompareOp.LT)) { - min = iv; + // SPARQL ORDER_BY semantics + if (comparator.compare(iv, min) < 0) { + min = iv; + + } + } } - } - } return min; } + @Override synchronized public void reset() { min = null; @@ -176,6 +176,7 @@ * * FIXME MikeP: What is the right return value here? */ + @Override public Requirement getRequirement() { return INeedsMaterialization.Requirement.ALWAYS; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/CompareBOp.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -36,14 +36,21 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.NV; +import com.bigdata.bop.solutions.IVComparator; import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.impl.literal.LiteralExtensionIV; import com.bigdata.rdf.model.BigdataValue; -import com.bigdata.rdf.sparql.ast.GlobalAnnotations; /** - * Perform open-world value comparison operations per the SPARQL spec. + * Perform open-world value comparison operations per the SPARQL spec (the LT + * operator). This does NOT implement the broader ordering for ORDER BY. That is + * handled by {@link IVComparator}. + * + * @see <a + * href="http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#op_lt"><</a> + * + * @see IVComparator */ public class CompareBOp extends XSDBooleanIVValueExpression implements INeedsMaterialization { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMAX.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMAX.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMAX.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -227,7 +227,19 @@ } - public void test_max_with_errors() { + /** + * MAX is defined in terms of SPARQL <code>ORDER BY</code> rather than + * <code>GT</code>. + * + * @see <a href="http://www.w3.org/TR/rdf-sparql-query/#modOrderBy">SPARQL + * Query Language for RDF</a> + * @see <a + * href="http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#op_lt"><</a> + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/736"> + * MIN() malfunction </a> + */ + public void test_max_uses_ORDER_BY_not_GT() { final BigdataValueFactory f = BigdataValueFactoryImpl.getInstance(getName()); @@ -261,7 +273,7 @@ * ?org ?auth ?book ?lprice * org1 auth1 book1 9 * org1 auth1 book3 5 - * org1 auth2 book3 7 + * org1 auth2 book3 auth2 * org2 auth3 book4 7 * </pre> */ @@ -285,44 +297,50 @@ assertFalse(op.isDistinct()); assertFalse(op.isWildcard()); - try { - op.reset(); - for (IBindingSet bs : data) { - op.get(bs); - } - fail("Expecting: " + SparqlTypeErrorException.class); - } catch (RuntimeException ex) { - if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { - if (log.isInfoEnabled()) { - log.info("Ignoring expected exception: " + ex); - } - } else { - fail("Expecting: " + SparqlTypeErrorException.class, ex); - } + op.reset(); + for (IBindingSet bs : data) { + op.get(bs); } + assertEquals(price9.get(), op.done()); + +// try { +// op.reset(); +// for (IBindingSet bs : data) { +// op.get(bs); +// } +// fail("Expecting: " + SparqlTypeErrorException.class); +// } catch (RuntimeException ex) { +// if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { +// if (log.isInfoEnabled()) { +// log.info("Ignoring expected exception: " + ex); +// } +// } else { +// fail("Expecting: " + SparqlTypeErrorException.class, ex); +// } +// } +// +// /* +// * Now verify that the error is sticky. +// */ +// try { +// op.done(); +// fail("Expecting: " + SparqlTypeErrorException.class); +// } catch (RuntimeException ex) { +// if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { +// if (log.isInfoEnabled()) { +// log.info("Ignoring expected exception: " + ex); +// } +// } else { +// fail("Expecting: " + SparqlTypeErrorException.class, ex); +// } +// } +// +// /* +// * Now verify that reset() clears the error. +// */ +// op.reset(); +// op.done(); - /* - * Now verify that the error is sticky. - */ - try { - op.done(); - fail("Expecting: " + SparqlTypeErrorException.class); - } catch (RuntimeException ex) { - if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { - if (log.isInfoEnabled()) { - log.info("Ignoring expected exception: " + ex); - } - } else { - fail("Expecting: " + SparqlTypeErrorException.class, ex); - } - } - - /* - * Now verify that reset() clears the error. - */ - op.reset(); - op.done(); - } } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMIN.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMIN.java 2013-10-11 15:49:09 UTC (rev 7452) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/bop/rdf/aggregate/TestMIN.java 2013-10-11 18:44:06 UTC (rev 7453) @@ -34,7 +34,6 @@ import com.bigdata.bop.Var; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.journal.ITx; -import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.XSD; @@ -47,7 +46,6 @@ import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.model.BigdataValueFactoryImpl; import com.bigdata.rdf.sparql.ast.GlobalAnnotations; -import com.bigdata.util.InnerCause; /** * Unit tests for {@link MIN}. @@ -227,7 +225,19 @@ } - public void test_min_with_errors() { + /** + * MIN is defined in terms of SPARQL <code>ORDER BY</code> rather than + * <code>LT</code>. + * + * @see <a href="http://www.w3.org/TR/rdf-sparql-query/#modOrderBy">SPARQL + * Query Language for RDF</a> + * @see <a + * href="http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#op_lt"><</a> + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/736"> + * MIN() malfunction </a> + */ + public void test_min_uses_ORDER_BY_not_LT() { final BigdataValueFactory f = BigdataValueFactoryImpl.getInstance(getName()); @@ -261,7 +271,7 @@ * ?org ?auth ?book ?lprice * org1 auth1 book1 9 * org1 auth1 book3 5 - * org1 auth2 book3 7 + * org1 auth2 book3 auth2 * org2 auth3 book4 7 * </pre> */ @@ -285,44 +295,51 @@ assertFalse(op.isDistinct()); assertFalse(op.isWildcard()); - try { - op.reset(); - for (IBindingSet bs : data) { - op.get(bs); - } - fail("Expecting: " + SparqlTypeErrorException.class); - } catch (RuntimeException ex) { - if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { - if (log.isInfoEnabled()) { - log.info("Ignoring expected exception: " + ex); - } - } else { - fail("Expecting: " + SparqlTypeErrorException.class, ex); - } + op.reset(); + for (IBindingSet bs : data) { + op.get(bs); } + + assertEquals(auth2.get(), op.done()); - /* - * Now verify that the error is sticky. - */ - try { - op.done(); - fail("Expecting: " + SparqlTypeErrorException.class); - } catch (RuntimeException ex) { - if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { - if (log.isInfoEnabled()) { - log.info("Ignoring expected exception: " + ex); - } - } else { - fail("Expecting: " + SparqlTypeErrorException.class, ex); - } - } +// try { +// op.reset(); +// for (IBindingSet bs : data) { +// op.get(bs); +// } +// fail("Expecting: " + SparqlTypeErrorException.class); +// } catch (RuntimeException ex) { +// if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { +// if (log.isInfoEnabled()) { +// log.info("Ignoring expected exception: " + ex); +// } +// } else { +// fail("Expecting: " + SparqlTypeErrorException.class, ex); +// } +// } +// +// /* +// * Now verify that the error is sticky. +// */ +// try { +// op.done(); +// fail("Expecting: " + SparqlTypeErrorException.class); +// } catch (RuntimeException ex) { +// if (InnerCause.isInnerCause(ex, SparqlTypeErrorException.class)) { +// if (log.isInfoEnabled()) { +// log.info("Ignoring expected exception: " + ex); +// } +// } else { +// fail("Expecting: " + SparqlTypeErrorException.class, ex); +// } +// } +// +// /* +// * Now verify that reset() clears the error. +// */ +// op.reset(); +// op.done(); - /* - * Now verify that reset() clears the error. - */ - op.reset(); - op.done(); - } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |