This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <tho...@us...> - 2014-01-15 15:07:59
|
Revision: 7803 http://bigdata.svn.sourceforge.net/bigdata/?rev=7803&view=rev Author: thompsonbry Date: 2014-01-15 15:07:52 +0000 (Wed, 15 Jan 2014) Log Message: ----------- @Override Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/NamedSubqueryRoot.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/NamedSubqueryRoot.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/NamedSubqueryRoot.java 2014-01-15 14:05:24 UTC (rev 7802) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/NamedSubqueryRoot.java 2014-01-15 15:07:52 UTC (rev 7803) @@ -112,15 +112,17 @@ } + @Override public String getName() { return (String) getProperty(Annotations.NAMED_SET); } + @Override public void setName(final String name) { - if(name == null) + if (name == null) throw new IllegalArgumentException(); setProperty(Annotations.NAMED_SET, name); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-15 14:05:34
|
Revision: 7802 http://bigdata.svn.sourceforge.net/bigdata/?rev=7802&view=rev Author: thompsonbry Date: 2014-01-15 14:05:24 +0000 (Wed, 15 Jan 2014) Log Message: ----------- final and @Override annotations. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/SubqueryRoot.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTOptimizerList.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/SubqueryRoot.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/SubqueryRoot.java 2014-01-15 13:17:18 UTC (rev 7801) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/SubqueryRoot.java 2014-01-15 14:05:24 UTC (rev 7802) @@ -138,6 +138,7 @@ /** * Returns <code>false</code>. */ + @Override final public boolean isOptional() { return false; @@ -147,12 +148,14 @@ /** * Returns <code>false</code>. */ + @Override final public boolean isMinus() { return false; } - + + @Override final public List<FilterNode> getAttachedJoinFilters() { @SuppressWarnings("unchecked") @@ -168,6 +171,7 @@ } + @Override final public void setAttachedJoinFilters(final List<FilterNode> filters) { setProperty(Annotations.FILTERS, filters); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTOptimizerList.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTOptimizerList.java 2014-01-15 13:17:18 UTC (rev 7801) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/optimizers/ASTOptimizerList.java 2014-01-15 14:05:24 UTC (rev 7802) @@ -55,12 +55,19 @@ */ private static final long serialVersionUID = 1L; - public ASTOptimizerList(Collection<IASTOptimizer> c) { - super(c); + public ASTOptimizerList(final Collection<IASTOptimizer> c) { + + super(c); + } - public ASTOptimizerList(IASTOptimizer ... optimizers) { - this(Arrays.asList(optimizers)); + + public ASTOptimizerList(final IASTOptimizer... optimizers) { + + this(Arrays.asList(optimizers)); + } + + @Override public boolean add(final IASTOptimizer opt) { if(opt == null) @@ -79,6 +86,7 @@ * Note: This makes a deep copy of the AST before applying destructive * modifications. */ + @Override public IQueryNode optimize(final AST2BOpContext context, IQueryNode queryNode, final IBindingSet[] bindingSets) { @@ -95,7 +103,8 @@ queryNode = opt.optimize(context, queryNode, bindingSets); - assert queryNode != null : "Optimized discarded query: " + opt; + if (queryNode == null) + throw new AssertionError("Optimized discarded query: " + opt); if (log.isDebugEnabled()) log.debug("Rewritten AST:\n" + queryNode); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-15 13:17:28
|
Revision: 7801 http://bigdata.svn.sourceforge.net/bigdata/?rev=7801&view=rev Author: thompsonbry Date: 2014-01-15 13:17:18 +0000 (Wed, 15 Jan 2014) Log Message: ----------- Added an example to start and run the NSS from embedded code. Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/samples/com/bigdata/samples/NSSEmbeddedExample.java Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/samples/com/bigdata/samples/NSSEmbeddedExample.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/samples/com/bigdata/samples/NSSEmbeddedExample.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/samples/com/bigdata/samples/NSSEmbeddedExample.java 2014-01-15 13:17:18 UTC (rev 7801) @@ -0,0 +1,148 @@ +package com.bigdata.samples; + +import java.net.URL; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.eclipse.jetty.server.Server; + +import com.bigdata.journal.IIndexManager; +import com.bigdata.rdf.sail.BigdataSail; +import com.bigdata.rdf.sail.webapp.NanoSparqlServer; +import com.bigdata.util.config.NicUtil; + +/** + * Class demonstrates how to start the {@link NanoSparqlServer} from within + * embedded code. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ +public class NSSEmbeddedExample implements Runnable { + + private static final Logger log = Logger + .getLogger(NSSEmbeddedExample.class); + + private int port; + private final IIndexManager indexManager; + private final Map<String, String> initParams; + + /** + * + * @param port + * The desired port -or- ZERO (0) to use a random open port. + */ + public NSSEmbeddedExample(final int port, final IIndexManager indexManager, + final Map<String, String> initParams) { + + if (indexManager == null) + throw new IllegalArgumentException(); + + if (initParams == null) + throw new IllegalArgumentException(); + + this.port = port; + this.indexManager = indexManager; + this.initParams = initParams; + + } + + @Override + public void run() { + + Server server = null; + try { + + server = NanoSparqlServer.newInstance(port, indexManager, + initParams); + + server.start(); + + final int actualPort = server.getConnectors()[0] + .getLocalPort(); + + String hostAddr = NicUtil.getIpAddress("default.nic", + "default", true/* loopbackOk */); + + if (hostAddr == null) { + + hostAddr = "localhost"; + + } + + final String serviceURL = new URL("http", hostAddr, actualPort, ""/* file */) + .toExternalForm(); + + System.out.println("serviceURL: " + serviceURL); + + // Block and wait. The NSS is running. + server.join(); + + } catch (Throwable t) { + + log.error(t, t); + + } finally { + + if (server != null) { + + try { + + server.stop(); + + } catch (Exception e) { + + log.error(e, e); + + } + + server = null; + + System.out.println("Halted."); + + } + + } + + } + + /** + * Start and run an {@link NanoSparqlServer} instance from embedded code. + * + * @param args + * ignored. + * + * @throws Exception + */ + public static void main(final String[] args) throws Exception { + + final int port = 0; // random port. + + /* + * Create or re-open a durable database instance using default + * configuration properties. There are other constructors that allow you + * to take more control over this process. + */ + final BigdataSail sail = new BigdataSail(); + + sail.initialize(); + + try { + + final IIndexManager indexManager = sail.getDatabase() + .getIndexManager(); + + final Map<String, String> initParams = new LinkedHashMap<String, String>(); + + new Thread(new NSSEmbeddedExample(port, indexManager, initParams)) + .run(); + + } finally { + + sail.shutDown(); + + } + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-15 12:52:26
|
Revision: 7800 http://bigdata.svn.sourceforge.net/bigdata/?rev=7800&view=rev Author: thompsonbry Date: 2014-01-15 12:52:20 +0000 (Wed, 15 Jan 2014) Log Message: ----------- javadoc fix Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/NanoSparqlServer.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/NanoSparqlServer.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/NanoSparqlServer.java 2014-01-15 12:33:55 UTC (rev 7799) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-sails/src/java/com/bigdata/rdf/sail/webapp/NanoSparqlServer.java 2014-01-15 12:52:20 UTC (rev 7800) @@ -132,7 +132,7 @@ * <dd>Force a compacting merge of all shards on all data * services in a bigdata federation (this option should only be * used for benchmarking purposes).</dd> - * <dt>readLock</dt> + * <dt>-readLock</dt> * <dd>The commit time against which the server will assert a * read lock by holding open a read-only transaction against that * commit point OR <code>-1</code> (MINUS ONE) to assert a read @@ -141,11 +141,11 @@ * will default to read against the most recent commit point on * the database. Regardless, each query will be issued against a * read-only transaction.</dt> - * </dl> - * <dt>servletContextListenerClass</dt> + * <dt>-servletContextListenerClass</dt> * <dd>The name of a class that extends * {@link BigdataRDFServletContextListener}. This allows you to * hook the {@link ServletContextListener} events.</dd> + * </dl> * </p> */ // * <dt>bufferCapacity [#bytes]</dt> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-15 12:34:04
|
Revision: 7799 http://bigdata.svn.sourceforge.net/bigdata/?rev=7799&view=rev Author: thompsonbry Date: 2014-01-15 12:33:55 +0000 (Wed, 15 Jan 2014) Log Message: ----------- javadoc related to query hints. Modified the javadoc ant task to generate documentation for package private as well as public and protected. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AtOnceHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/ChunkSizeHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/PipelineMaxParallelHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunFirstHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunLastHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunOnceHint.java branches/BIGDATA_RELEASE_1_3_0/build.xml Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -341,32 +341,37 @@ * * @see https://sourceforge.net/apps/trac/bigdata/ticket/283 */ - String QUERYID = "queryId";//QueryHints.class.getName() + ".queryId"; + String QUERYID = "queryId"; /** * This query hint may be applied to any {@link IJoinNode} and marks a * particular join to be run first among in a particular group. Only one * "run first" join is permitted in a given group. This query hint is not - * permitted on optional joins. + * permitted on optional joins. This hint must be used with + * {@link QueryHintScope#Prior}. */ - String RUN_FIRST = "runFirst";//QueryHints.class.getName() + ".runFirst"; + String RUN_FIRST = "runFirst"; /** * This query hint may be applied to any {@link IJoinNode} and marks a * particular join to be run last among in a particular group. Only one - * "run last" join is permitted in a given group. + * "run last" join is permitted in a given group. This hint must be used + * with {@link QueryHintScope#Prior}. */ - String RUN_LAST = "runLast";//QueryHints.class.getName() + ".runLast"; + String RUN_LAST = "runLast"; /** * Query hint indicating whether or not a Sub-Select should be transformed * into a <em>named subquery</em>, lifting its evaluation out of the main - * body of the query and replacing the subquery with an INCLUDE. This is - * similar to {@link #AT_ONCE 'at-once'} evaluation, but creates a different - * query plan by lifting out a named subquery. It is also only supported for - * a Sub-Select. The {@link #AT_ONCE} query hint can be applied to other - * things as well. + * body of the query and replacing the subquery with an INCLUDE. This hint + * must be used with {@link QueryHintScope#SubQuery}. * <p> + * This is similar to {@link #AT_ONCE 'atOnce'} evaluation, but creates a + * different query plan by lifting out a named subquery. The + * {@link #RUN_ONCE} query hint is only supported for + * {@link QueryHintScope#SubQuery} while {@link #AT_ONCE} query hint can be + * applied to other things as well. + * <p> * When <code>true</code>, the subquery will be lifted out. When * <code>false</code>, the subquery will not be lifted unless other * semantics require that it be lifted out regardless. @@ -381,20 +386,23 @@ * * @see #AT_ONCE */ - String RUN_ONCE = "runOnce";//QueryHints.class.getName() + ".runOnce"; + String RUN_ONCE = "runOnce"; /** * Query hint indicating whether or not a JOIN (including SERVICE, - * SUB-SELECT, etc) should be run as an "at-once" operator. All solutions - * for an "at-once" operator are materialized before the operator is - * evaluated. It is then evaluated against those materialized solutions - * exactly once. + * SUB-SELECT, etc) should be run as an "atOnce" operator. All solutions for + * an "atOnce" operator are materialized before the operator is evaluated. + * It is then evaluated against those materialized solutions exactly once. * <p> - * Note: "At-once" evaluation is a general property of the query engine. - * This query hint does not change the structure of the query plan, but - * simply serves as a directive to the query engine that it should buffer - * all source solutions before running the operator. This is more general + * Note: "atOnce" evaluation is a general property of the query engine. This + * query hint does not change the structure of the query plan, but simply + * serves as a directive to the query engine that it should buffer all + * source solutions before running the operator. This is more general * purpose than the {@link #RUN_ONCE} query hint. + * <p> + * This query hint is allowed in any scope. The hint is transferred as an + * annotation onto all query plan operators generated from the annotated + * scope. * * @see #RUN_ONCE * @@ -409,11 +417,14 @@ String AT_ONCE = "atOnce"; /** - * The target chunk (aka vector size) for the operator. + * Sets the target chunk size (aka vector size) for the output buffer of the operator. * <p> - * Note: The vectored query engine will buffer multiple chunks for an - * operator before the producer(s) (the operator(s) feeding into the - * annotated operator) must block. + * This query hint does not change the structure of the query plan, but + * simply serves as a directive to the query engine that it should allocate + * an output buffer for the operator that will emit chunks of the indicated + * target capacity. This query hint is allowed in any scope, but is + * generally used to effect the behavior of a join group, a subquery, or the + * entire query. * * @see BufferAnnotations#CHUNK_CAPACITY */ @@ -421,6 +432,14 @@ /** * The maximum parallelism for the operator within the query. + * <p> + * Note: "maxParallel" evaluation is a general property of the query engine. + * This query hint does not change the structure of the query plan, but + * simply serves as a directive to the query engine that it should not allow + * more than the indicated number of parallel instances of the operator to + * execute concurrently. This query hint is allowed in any scope. The hint is + * transferred as an annotation onto all query plan operators generated from + * the annotated scope. * * @see PipelineOp.Annotations#MAX_PARALLEL */ @@ -528,7 +547,7 @@ * elements (maximum) should be read from its access path. This * effectively limits the input into the join. * - * @see {@link Annotations#CUTOFF_LIMIT}. + * @see Annotations#CUTOFF_LIMIT */ String CUTOFF_LIMIT = "cutoffLimit"; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AtOnceHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AtOnceHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/AtOnceHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -35,14 +35,16 @@ import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; /** - * Query hint marks the operator as requiring "at-once" evaluation. All - * solutions will be buffered by the {@link QueryEngine} before the operator is - * evaluated. When it is evaluated, it will receive all solutions in a single - * invocation of that operator. However, the solutions MAY appear in multiple - * chunks since the {@link QueryEngine} does not guarantee that the chunk will - * be merged before the operator is invoked. + * Query hint marks the operator as requiring "atOnce" evaluation. All solutions + * will be buffered by the {@link QueryEngine} before the operator is evaluated. + * When it is evaluated, it will receive all solutions in a single invocation of + * that operator. However, the solutions MAY appear in multiple chunks since the + * {@link QueryEngine} does not guarantee that the chunk will be merged before + * the operator is invoked. This query hint is allowed in any scope. The hint is + * transferred as an annotation onto all query plan operators generated from the + * annotated scope. * <p> - * Note: The "at-once" hint is basically turned into <code>NOT(PIPELINED)</code>. + * Note: The "atOnce" hint is basically turned into <code>NOT(PIPELINED)</code>. * * @see PipelineOp.Annotations#PIPELINED */ Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/ChunkSizeHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/ChunkSizeHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/ChunkSizeHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -28,17 +28,24 @@ package com.bigdata.rdf.sparql.ast.hints; import com.bigdata.bop.BufferAnnotations; -import com.bigdata.bop.IBindingSet; import com.bigdata.rdf.sparql.ast.QueryHints; /** * This is identical to the {@link BufferChunkCapacityHint}, but it is accessed * through the well known name {@link QueryHints#CHUNK_SIZE}. * <p> - * Sets the capacity of the {@link IBindingSet}[]s used to accumulate a chunk of - * {@link IBindingSet}s (default - * {@value BufferAnnotations#DEFAULT_CHUNK_CAPACITY}). Partial chunks may be - * automatically combined into full chunks. + * Sets the capacity of the output buffer that used to accumulate chunks of + * solutions (default {@value BufferAnnotations#DEFAULT_CHUNK_CAPACITY}). + * Partial chunks may be automatically combined into full chunks. + * <p> + * Note: The "chunkSize" is a general property of the query engine. This query + * hint does not change the structure of the query plan, but simply serves as a + * directive to the query engine that it should allocate an output buffer for + * the operator that will emit chunks of the indicated target capacity. This + * query hint is allowed in any scope, but is generally used to effect the + * behavior of a join group, a subquery, or the entire query. The hint is + * transferred as an annotation onto all query plan operators generated from the + * annotated scope. * * @see BufferAnnotations#CHUNK_CAPACITY */ Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/PipelineMaxParallelHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/PipelineMaxParallelHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/PipelineMaxParallelHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -36,6 +36,14 @@ /** * Sets the maximum #of operator evaluation tasks which can execute * concurrently. + * <p> + * Note: "maxParallel" is a general property of the query engine. This query + * hint does not change the structure of the query plan, but simply serves as a + * directive to the query engine that it should not allow more than the + * indicated number of parallel instances of the operator to execute + * concurrently. This query hint is allowed in any scope. The hint is + * transferred as an annotation onto all query plan operators generated from the + * annotated scope. * * @see PipelineOp.Annotations#MAX_PARALLEL */ Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunFirstHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunFirstHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunFirstHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -35,7 +35,8 @@ import com.bigdata.rdf.sparql.ast.optimizers.ASTStaticJoinOptimizer; /** - * Query hint to run a join first in a join group. + * Query hint to run a join first in a join group. This hint must be used + * with {@link QueryHintScope#Prior}. * <p> * Note: This sets an AST annotation which is interpreted by the * {@link ASTRunFirstRunLastOptimizer} and {@link ASTStaticJoinOptimizer}. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunLastHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunLastHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunLastHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -35,7 +35,8 @@ import com.bigdata.rdf.sparql.ast.optimizers.ASTStaticJoinOptimizer; /** - * Query hint to run a join last in a join group. + * Query hint to run a join last in a join group. This hint must be used with + * {@link QueryHintScope#Prior}. * <p> * Note: This sets an AST annotation which is interpreted by the * {@link ASTRunFirstRunLastOptimizer} and {@link ASTStaticJoinOptimizer}. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunOnceHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunOnceHint.java 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RunOnceHint.java 2014-01-15 12:33:55 UTC (rev 7799) @@ -40,6 +40,7 @@ * and replacing the subquery with an INCLUDE. When <code>true</code>, the * subquery will be lifted out. When <code>false</code>, the subquery will not * be lifted unless other semantics require that it be lifted out regardless. + * This hint must be used with {@link QueryHintScope#SubQuery}. * <p> * For example, the following may be used to lift out the sub-select in which it * appears into a {@link NamedSubqueryRoot}. The lifted expression will be Modified: branches/BIGDATA_RELEASE_1_3_0/build.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/build.xml 2014-01-14 16:59:02 UTC (rev 7798) +++ branches/BIGDATA_RELEASE_1_3_0/build.xml 2014-01-15 12:33:55 UTC (rev 7799) @@ -385,6 +385,10 @@ overview="${bigdata.dir}/overview.html" windowtitle="bigdata® v${build.ver}" classpathref="build.classpath" + package="true" + protected="true" + public="true" + private="false" > <arg value="-J-Xmx1000m" /> <arg value="-quiet" /> @@ -401,7 +405,7 @@ <doctitle> <![CDATA[<h1>bigdata® v${build.ver}</h1>]]></doctitle> <bottom> - <![CDATA[<i>Copyright © 2006-2012 SYSTAP, LLC. All Rights Reserved.</i>]]></bottom> + <![CDATA[<i>Copyright © 2006-2014 SYSTAP, LLC. All Rights Reserved.</i>]]></bottom> <tag name="todo" scope="all" description="TODO:" /> <tag name="issue" scope="all" description="ISSUE:" /> <!--tag name="FIXME" scope="all" description="FIXME:"/--> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 16:59:09
|
Revision: 7798 http://bigdata.svn.sourceforge.net/bigdata/?rev=7798&view=rev Author: thompsonbry Date: 2014-01-14 16:59:02 +0000 (Tue, 14 Jan 2014) Log Message: ----------- wrong version # in build.properties.... Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/build.properties Modified: branches/BIGDATA_RELEASE_1_3_0/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 16:51:46 UTC (rev 7797) +++ branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 16:59:02 UTC (rev 7798) @@ -65,7 +65,7 @@ log4j.version=1.2.17 fastutil.version=5.1.5 dsiutils.version=1.0.6-020610 -lgplutils.version=1.0.7-040114 +lgplutils.version=1.0.7-140114 ganglia-version=1.0.1 gas-version=0.1.0 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 16:51:52
|
Revision: 7797 http://bigdata.svn.sourceforge.net/bigdata/?rev=7797&view=rev Author: thompsonbry Date: 2014-01-14 16:51:46 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Commit of version of new lgpl-utils jar compiled with 1.6 compatability. Modified lgpl-utils to build for 1.6 compatibility. See #764 (Stochastic results in analytic query mode). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar =================================================================== (Binary files differ) Property changes on: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 16:50:34 UTC (rev 7796) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 16:51:46 UTC (rev 7797) @@ -20,8 +20,8 @@ # debuglevel=lines,vars,source (or any combination thereof). javac.debuglevel=lines,vars,source javac.verbose=off -#javac.target=1.6 -#javac.source=1.6 +javac.target=1.6 +javac.source=1.6 javac.encoding=Cp1252 # where to find the unimi dependencies. Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 16:50:34 UTC (rev 7796) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 16:51:46 UTC (rev 7797) @@ -85,6 +85,8 @@ <javac destdir="${build.dir}/classes" classpathref="build.classpath" debug="${javac.debug}" debuglevel="${javac.debuglevel}" verbose="${javac.verbose}" encoding="${javac.encoding}" + source="${javac.source}" + target="${javac.target}" > <!-- note: must also specify -bootclasspath and -extdirs when cross-compiling --> <!-- target="${javac.target}" source="${javac.source}" --> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 16:50:40
|
Revision: 7796 http://bigdata.svn.sourceforge.net/bigdata/?rev=7796&view=rev Author: thompsonbry Date: 2014-01-14 16:50:34 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Commit of version of new lgpl-utils jar compiled with 1.6 compatability. Removed Paths: ------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar Deleted: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 16:06:29
|
Revision: 7795 http://bigdata.svn.sourceforge.net/bigdata/?rev=7795&view=rev Author: thompsonbry Date: 2014-01-14 16:06:20 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Commit provides partial fix for #763. Martyn has modified the CustomByteArrayFrontCodedList to support search against a front-coded list with duplicate keys. However, the tests developed for the HTree show a problem recovering all keys when duplicates are inserted. I have extended the tests that he developed to cover more dups and a variety of valued for the #of address bits in the HTree. The dups test is now integrated into CI and will show 2 new test failures. I have generated a new lgpl-utils dependency, published it to our maven repo, and updated the top-level build.properties, .classpath, and pom.xml files. mvn deploy:deploy-file \ -DgroupId=com.bigdata \ -DartifactId=lgpl-utils \ -Dversion=1.0.7-011414 \ -Dpackaging=jar \ -DrepositoryId=bigdata.releases \ -Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \ -Dfile=bigdata/lib/lgpl-utils-1.0.7-011414.jar Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/.classpath branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java branches/BIGDATA_RELEASE_1_3_0/build.properties branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java branches/BIGDATA_RELEASE_1_3_0/pom.xml Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java Removed Paths: ------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar Modified: branches/BIGDATA_RELEASE_1_3_0/.classpath =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/.classpath 2014-01-14 16:06:20 UTC (rev 7795) @@ -32,7 +32,7 @@ <classpathentry kind="src" path="bigdata-gas/src/java"/> <classpathentry kind="src" path="bigdata-gas/src/test"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/dsi-utils-1.0.6-020610.jar"/> - <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.6-020610.jar"/> + <classpathentry exported="true" kind="lib" path="bigdata/lib/lgpl-utils-1.0.7-140114.jar"/> <classpathentry kind="lib" path="bigdata-jini/lib/apache/zookeeper-3.3.3.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-continuation-7.2.2.v20101205.jar"/> <classpathentry exported="true" kind="lib" path="bigdata/lib/jetty/jetty-http-7.2.2.v20101205.jar"/> Deleted: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.6-020610.jar =================================================================== (Binary files differ) Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar =================================================================== (Binary files differ) Property changes on: branches/BIGDATA_RELEASE_1_3_0/bigdata/lib/lgpl-utils-1.0.7-140114.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestAll_HTree.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -92,6 +92,8 @@ suite.addTestSuite(TestReopen.class); // test of storing null values under a key with persistence. suite.addTestSuite(TestNullValues.class); + // test duplicate keys (with index checkpoint). + suite.addTestSuite(TestDuplicates.class); /* * test of transient HTree's (no backing store). Added: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/htree/TestDuplicates.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -0,0 +1,297 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.htree; + +import java.io.IOException; + +import com.bigdata.btree.ITupleIterator; +import com.bigdata.rawstore.IRawStore; +import com.bigdata.rawstore.SimpleMemoryRawStore; + +/** + * Test {@link HTree} with duplicate Keys. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/763" > + * Stochastic Results With Analytic Query Mode </a> + * + * @author Martyn Cutcher + */ +public class TestDuplicates extends AbstractHTreeTestCase { + + /** + * + */ + public TestDuplicates() { + + } + + /** + * @param name + */ + public TestDuplicates(String name) { + super(name); + } + +// private static final boolean bufferNodes = true; + + /** + * Tests the ability to store values against duplicate + * keys. + * + * @throws IOException + * @throws Exception + */ + public void test_duplicateKeys() throws IOException, Exception { + + final IRawStore store = new SimpleMemoryRawStore(); + + try { + + HTree htree = getHTree(store, 3/* addressBits */, + false/* rawRecords */, true/* persistent */); + + final byte[] k1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + final byte[] v3 = new byte[] { 3 }; + + assertNull(htree.lookupFirst(k1)); + assertFalse(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + assertNull(htree.insert(k1, v3)); + + // test before checkpoint. + assertEquals(2, getCount(htree, k1)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertEquals(htree.lookupFirst(k1), v3); + assertTrue(htree.contains(k1)); + + // test after checkpoint. + assertEquals(2, getCount(htree, k1)); + + } finally { + + store.destroy(); + + } + + } + + public void test_duplicateKeyRangeScans_3bits_500dups() { + + doTest(3, 500); + + } + + public void test_duplicateKeyRangeScans_4bits_500dups() { + + doTest(4, 500); + + } + + public void test_duplicateKeyRangeScans_10bits_500dups() { + + doTest(10, 500); + + } + + public void test_duplicateKeyRangeScans_3bits_5000dups() { + + doTest(3, 5000); + + } + + public void test_duplicateKeyRangeScans_4bits_5000dups() { + + doTest(4, 5000); + + } + + public void test_duplicateKeyRangeScans_10bits_5000dups() { + + doTest(10, 5000); + + } + + /** + * Test helper for a test based on duplicate keys. The test is designed to + * verify that the keys are stored correctly and that a scan of the records + * having that key returns all entries stored under that key. + * + * @param addressBits + * The #of address bits. + * @param ndups + * The #of duplicate entries for a given key. + * + * FIXME If the addressBits is 10 then this test can fail for + * specific dups values - eg 500 or 2000. This appears to be an + * independent failure at the {@link HTree} layer rather than in + * the key buffer search layer. + */ + private void doTest(final int addressBits, final int ndups) { + + final IRawStore store = new SimpleMemoryRawStore(); + // final IRawStore store = getRWStore(); + + try { + HTree htree = getHTree(store, addressBits, false/* rawRecords */, + true/* persistent */); + + final byte[] k1 = new byte[] { 1, 2, 3, 4 }; + final byte[] k2 = new byte[] { 2, 3, 4 }; + final byte[] k3 = new byte[] { 3, 4 }; + final byte[] v1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + final byte[] v3 = new byte[] { 3 }; + + for (int i = 0; i < ndups; i++) { + assertNull(htree.insert(k1, v1)); + assertNull(htree.insert(k2, v2)); + assertNull(htree.insert(k3, v3)); + } + + assertTrue(htree.contains(k1)); // 2000 dups fails here with 10 bit depth + assertTrue(htree.contains(k2)); + assertTrue(htree.contains(k3)); + + // Check first with MutableBuckets + assertEquals(ndups, getCount(htree, k1)); + assertEquals(ndups, getCount(htree, k2)); + assertEquals(ndups, getCount(htree, k3)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertTrue(htree.contains(k1)); + assertTrue(htree.contains(k2)); + assertTrue(htree.contains(k3)); + + // Test after checkpoint. + assertEquals(ndups, getCount(htree, k1)); + assertEquals(ndups, getCount(htree, k2)); + assertEquals(ndups, getCount(htree, k3));// 500 dups fails here with 10bit depth + + } finally { + + store.destroy(); + + } + + } + +// private IRawStore getRWStore() { +// +// final Properties properties = getProperties(); +// +// properties.setProperty(Options.CREATE_TEMP_FILE, "true"); +// +// properties.setProperty(Options.DELETE_ON_EXIT, "true"); +// +// properties.setProperty(Options.BUFFER_MODE, BufferMode.DiskRW.toString()); +// +// properties.setProperty(Options.WRITE_CACHE_ENABLED, "" + true); +// +// return new Journal(properties);//.getBufferStrategy(); +// +// } + + /** + * Return the #of entries having the specified key. + * + * @param htree + * The index. + * @param k1 + * The key. + * @return + */ + private int getCount(final HTree htree, final byte[] k1) { + final ITupleIterator<?> iter = htree.lookupAll(k1); + int count = 0; + while (iter.hasNext()) { + iter.next(); + count++; + } + + return count; + } + + /** + * Simple test where the keys and the values are dups. + * + * @throws IOException + * @throws Exception + */ + public void test_duplicateKeyValues() throws IOException, Exception { + + final IRawStore store = new SimpleMemoryRawStore(); + + try { + + HTree htree = getHTree(store, 3/* addressBits */, + false/* rawRecord */, true/* persistent */); + + final byte[] k1 = new byte[] { 1 }; + final byte[] v2 = new byte[] { 2 }; + + assertNull(htree.lookupFirst(k1)); + assertFalse(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + assertNull(htree.insert(k1, v2)); + + // before checkpoint. + assertEquals(2, getCount(htree, k1)); + + final long addrCheckpoint1 = htree.writeCheckpoint(); + + htree = HTree.load(store, addrCheckpoint1, true/* readOnly */); + + assertEquals(htree.lookupFirst(k1), v2); + assertTrue(htree.contains(k1)); + + // after checkpoint. + assertEquals(2, getCount(htree, k1)); + + } finally { + + store.destroy(); + + } + + } + +} Modified: branches/BIGDATA_RELEASE_1_3_0/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/build.properties 2014-01-14 16:06:20 UTC (rev 7795) @@ -65,7 +65,7 @@ log4j.version=1.2.17 fastutil.version=5.1.5 dsiutils.version=1.0.6-020610 -lgplutils.version=1.0.6-020610 +lgplutils.version=1.0.7-040114 ganglia-version=1.0.1 gas-version=0.1.0 Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.properties 2014-01-14 16:06:20 UTC (rev 7795) @@ -39,7 +39,7 @@ release.dir=ant-release # The build version. -build.ver=1.0.6 +build.ver=1.0.7 # Set true to do a snapshot build. This changes the value of ${version} to # include the date. Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/build.xml 2014-01-14 16:06:20 UTC (rev 7795) @@ -124,7 +124,7 @@ <bottom> <![CDATA[ <i> -Portions copyright © 2006-2009 SYSTAP, LLC. All Rights Reserved.<br> +Portions copyright © 2006-2014 SYSTAP, LLC. All Rights Reserved.<br> Portions copyright © 2005-2009 Sebastiano Vigna </i>]]></bottom> <tag name="todo" scope="all" description="TODO:" /> Modified: branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/lgpl-utils/src/java/it/unimi/dsi/fastutil/bytes/custom/CustomByteArrayFrontCodedList.java 2014-01-14 16:06:20 UTC (rev 7795) @@ -1373,7 +1373,7 @@ // final int base = 0; - final BackingBuffer bb = this.bb; +// final BackingBuffer bb = this.bb; /* * We will test each entry having an index that is an even multiple of @@ -1396,23 +1396,8 @@ /* * Compare the probe with the full length byte[] at index [mid]. */ - final int tmp; - { + final int tmp = comparePos(mid, key); - // The index into the backing buffer of index [mid]. - int pos = p[mid]; - - // The #of bytes in the full length byte[] at index [mid]. - final int blen = bb.readInt(pos); - - // Skip the #of bytes required to code that length. - pos += count(blen); - - // Compare key vs actual (in buffer). - tmp = compareBytes(key, 0, key.length, bb, pos, blen); - - } - if (tmp > 0) { // Actual GT probe, restrict lower bound and try again. @@ -1425,10 +1410,19 @@ } else { - // Found: return offset. + // duplicate check to see if previous is also a match + if (mid > 0 && comparePos(mid - 1, key) == 0) { - return offset; + // in which case set it as the highest + high = mid - 1; + } else { + + // Found: return offset. + return offset; + + } + } } @@ -1440,7 +1434,35 @@ return -(offset + 1); } + + /** + * Compares the caller's key to a full length key at a specific offset + * in the {@link BackingBuffer}. + * + * @param index + * The index into the full length keys. + * @param key + * The probe key. + * + * @return A value which indicates whether the key at that offset into the + * backing buffer is LT, GT, or EQ to the caller's key. + */ + private int comparePos(final int index, final byte[] key) { + // The index into the backing buffer of index [index]. + int pos = p[index]; + + // The #of bytes in the full length byte[] at index [index]. + final int blen = bb.readInt(pos); + + // Skip the #of bytes required to code that length. + pos += count(blen); + + // Compare key vs actual (in buffer). + return compareBytes(key, 0, key.length, bb, pos, blen); + + } + /** * Compare up to <i>len</i> bytes in <i>a</i> interpreted as unsigned bytes * against the bytes in the {@link BackingBuffer} starting at offset Modified: branches/BIGDATA_RELEASE_1_3_0/pom.xml =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 13:57:27 UTC (rev 7794) +++ branches/BIGDATA_RELEASE_1_3_0/pom.xml 2014-01-14 16:06:20 UTC (rev 7795) @@ -94,7 +94,7 @@ <log4j.version>1.2.17</log4j.version> <fastutil.version>5.1.5</fastutil.version> <dsiutils.version>1.0.6-020610</dsiutils.version> - <lgplutils.version>1.0.6-020610</lgplutils.version> + <lgplutils.version>1.0.7-140114</lgplutils.version> <bigdata.ganglia.version>1.0.1</bigdata.ganglia.version> </properties> <!-- TODO Can we declare the versions of the dependencies here as @@ -387,11 +387,11 @@ mvn deploy:deploy-file \ -DgroupId=com.bigdata \ -DartifactId=lgpl-utils \ - -Dversion=1.0.6-020610 \ + -Dversion=1.0.7-140114 \ -Dpackaging=jar \ -DrepositoryId=bigdata.releases \ -Durl=scpexe://www.systap.com/srv/www/htdocs/systap.com/maven/releases/ \ - -Dfile=bigdata/lib/lgpl-utils-1.0.6-020610.jar + -Dfile=bigdata/lib/lgpl-utils-1.0.7-140114.jar --> <groupId>com.bigdata</groupId> <artifactId>lgpl-utils</artifactId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 13:57:34
|
Revision: 7794 http://bigdata.svn.sourceforge.net/bigdata/?rev=7794&view=rev Author: thompsonbry Date: 2014-01-14 13:57:27 +0000 (Tue, 14 Jan 2014) Log Message: ----------- removed redundant logging for cardinality underflow. javadoc edits. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:34:22 UTC (rev 7793) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:57:27 UTC (rev 7794) @@ -545,23 +545,12 @@ * Show information about the paths and the paths that are * experiencing cardinality underflow. */ - + log.warn("Cardinality estimate underflow - resampling: round=" + round + ", npaths=" + paths.length + ", nunderflow=" + nunderflow + ", limit=" + limit + "\n" - + showTable(paths)); + + showTable(paths, null/* pruned */, edgeSamples)); - for(Path p : paths) { - final EdgeSample edgeSample; - synchronized(edgeSamples) { - edgeSample = edgeSamples.get(new PathIds(p)); - } - if (edgeSample.isUnderflow()) { - log.warn("Underflow on path::\n" - + showPath(p, edgeSamples)); - } - } - } if (nunderflow > 0) { @@ -1474,6 +1463,11 @@ * * TODO Only sample vertices with an index. * + * TODO If any required join has a vertex with a proven exact + * cardinality of zero, then there are no solutions for the join + * group. Throw a {@link NoSolutionsException} and have the + * caller handle it? + * * TODO Consider other cases where we can avoid sampling a vertex * or an initial edge. * <p> @@ -1486,21 +1480,16 @@ * not share a variable directly and hence will materialize the * full cross product before filtering which is *really* * expensive. - * - * FIXME We need attach any access path filters that are required - * for named graphs or scale-out for the RTO to function in those - * environments. We DO NOT need to attach SPARQL FILTERs here - - * those get applied when we evaluate the cutoff joins from one - * vertex to another. */ - private void sampleAllVertices(final QueryEngine queryEngine, final int limit) { + private void sampleAllVertices(final QueryEngine queryEngine, + final int limit) { final Map<Vertex, AtomicInteger> vertexLimit = new LinkedHashMap<Vertex, AtomicInteger>(); - + for (Vertex v : V) { - vertexLimit.put(v,new AtomicInteger(limit)); - + vertexLimit.put(v, new AtomicInteger(limit)); + } sampleVertices(queryEngine, vertexLimit); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 13:34:28
|
Revision: 7793 http://bigdata.svn.sourceforge.net/bigdata/?rev=7793&view=rev Author: thompsonbry Date: 2014-01-14 13:34:22 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Added logic to select the join path having the minimum sumEstCard if there is cardinality underflow for some of the join paths. This addresses a corner case where the RTO has multiple possible paths. If all paths have underflow, it accepts the first path. Note: This does not eliminate paths for the same sets of vertices where some (or all) paths have underflow during the expansion rounds. Therefore it might do too much work on such queries. Note: We should, perhaps, throw out a NoSolutionsException instead when all paths have underflow. See #64 (RTO). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:59:09 UTC (rev 7792) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:34:22 UTC (rev 7793) @@ -498,7 +498,8 @@ final int nvertices = V.length; - int round = 1; + int round = 1; // #of rounds. + int nunderflow = 0; // #of paths with card. underflow (no solutions). while (paths.length > 0 && round < nvertices - 1) { @@ -528,8 +529,6 @@ * limit, e.g., by specifying a MAX_LIMIT annotation on the * JoinGraph operator. */ - int nunderflow = 0; - for (int i = 0; i < 3; i++) { nunderflow = resamplePaths(queryEngine, limit, round, paths, @@ -558,7 +557,7 @@ edgeSample = edgeSamples.get(new PathIds(p)); } if (edgeSample.isUnderflow()) { - log.warn("Underflow on path::" + log.warn("Underflow on path::\n" + showPath(p, edgeSamples)); } } @@ -584,24 +583,84 @@ throw new NoSolutionsException(); } + + /* + * In general, there should be one winner. However, if there are paths + * with cardinality estimate underflow (that is, paths that do not have + * any solutions when sampling the path) then there can be multiple + * solutions. + * + * When this occurs we have a choice. We can either the path with the + * minimum cost (min sumEstCard) or we can take a path that does not + * have a cardinality estimate underflow because we actually have an + * estimate for that path. In principle, the path with the minimum + * estimated cost should be the better choice, but we do not have any + * information about the order of the joins beyond the point where the + * cardinality underflow begins on that path. If we take a path that + * does not have a cardinality estimate underflow, then at least we know + * that the join order has been optimized for the entire path. + */ + + final Path selectedPath; - // Should be one winner. if (paths.length != 1) { - throw new AssertionError("Expected one path but have " - + paths.length + " paths."); + log.warn("Multiple paths exist: npaths=" + paths.length + + ", nunderflow=" + nunderflow + "\n" + + showTable(paths, null/* pruned */, edgeSamples)); + + Path t = null; + for (Path p : paths) { + + if (p.edgeSample.isUnderflow()) { + + /* + * Skip paths with cardinality estimate underflow. They are + * not fully tested in the data since no solutions have made + * it through all of the joins. + */ + + continue; + + } + + if (t == null || p.sumEstCard < t.sumEstCard) { + + // Accept path with the least cost. + t = p; + + } + + } + + if (t == null) { + + /* Arbitrary choice if all paths underflow. + * + * TODO Or throw out NoSolutionsException? + */ + t = paths[0]; + + } + + selectedPath = t; + + } else { + + selectedPath = paths[0]; + } if (log.isInfoEnabled()) { log.info("\n*** Selected join path: " - + Arrays.toString(paths[0].getVertexIds()) + "\n" - + showPath(paths[0], edgeSamples)); + + Arrays.toString(selectedPath.getVertexIds()) + "\n" + + showPath(selectedPath, edgeSamples)); } - return paths[0]; + return selectedPath; } @@ -1941,9 +2000,9 @@ } /** - * Comma delimited table showing the estimated join hit ratio, the estimated - * cardinality, and the set of vertices for each of the specified join - * paths. + * Return a comma delimited table showing the estimated join hit ratio, the + * estimated cardinality, and the set of vertices for each of the specified + * join paths. * * @param a * A set of paths (typically those before pruning). @@ -1954,8 +2013,35 @@ * * @return A table with that data. */ - static public String showTable(final Path[] a,final Path[] pruned) { - final StringBuilder sb = new StringBuilder(); + static public String showTable(final Path[] a, final Path[] pruned) { + + return showTable(a, pruned, null/* edgeSamples */); + + } + + /** + * Return a comma delimited table showing the estimated join hit ratio, the + * estimated cardinality, and the set of vertices for each of the specified + * join paths. + * + * @param a + * A set of paths (typically those before pruning). + * @param pruned + * The set of paths after pruning (those which were retained) + * (optional). When given, the paths which were pruned are marked + * in the table. + * @param edgeSamples + * When non-<code>null</code>, the details will be shown (using + * {@link #showPath(Path, Map)}) for each path that is + * experiencing cardinality estimate underflow (no solutions in + * the data). + * + * @return A table with that data. + */ + static public String showTable(final Path[] a, final Path[] pruned, + final Map<PathIds, EdgeSample> edgeSamples) { + final StringBuilder sb = new StringBuilder(128 * a.length); + final List<Path> underflowPaths = new LinkedList<Path>(); final Formatter f = new Formatter(sb); f.format("%-4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s %10s", "path",// @@ -2030,8 +2116,28 @@ // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() // + ")"); sb.append("\n"); + if(x.edgeSample.isUnderflow()) { + underflowPaths.add(x); + } } + + if (edgeSamples != null && !underflowPaths.isEmpty()) { + + // Show paths with cardinality estimate underflow. + sb.append("\nPaths with cardinality estimate underflow::\n"); + + for (Path p : underflowPaths) { + + sb.append(showPath(p, edgeSamples)); + + sb.append("----\n"); + + } + + } + return sb.toString(); + } /** @@ -2039,14 +2145,17 @@ * join hit ratio for each step in the path. * * @param p - * The join path. + * The join path (required). * @param edgeSamples - * A map containing the samples utilized by the {@link Path}. + * A map containing the samples utilized by the {@link Path} + * (required). */ static public String showPath(final Path x, final Map<PathIds, EdgeSample> edgeSamples) { if (x == null) throw new IllegalArgumentException(); + if (edgeSamples == null) + throw new IllegalArgumentException(); final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 12:59:16
|
Revision: 7792 http://bigdata.svn.sourceforge.net/bigdata/?rev=7792&view=rev Author: thompsonbry Date: 2014-01-14 12:59:09 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Fixing an NPE in RTO logging. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/PathIds.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:55:58 UTC (rev 7791) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:59:09 UTC (rev 7792) @@ -555,7 +555,7 @@ for(Path p : paths) { final EdgeSample edgeSample; synchronized(edgeSamples) { - edgeSample = edgeSamples.get(p.getVertexIds()); + edgeSample = edgeSamples.get(new PathIds(p)); } if (edgeSample.isUnderflow()) { log.warn("Underflow on path::" Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/PathIds.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/PathIds.java 2014-01-14 12:55:58 UTC (rev 7791) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/PathIds.java 2014-01-14 12:59:09 UTC (rev 7792) @@ -50,6 +50,25 @@ } + /** + * Convenience constructor. + * + * @param p + * A path. + */ + public PathIds(final Path p) { + + this(p.getVertexIds()); + + } + + /** + * Core constructor. + * + * @param ids + * The ordered set of vertex identifiers for some join path + * segment. + */ public PathIds(final int[] ids) { if (ids == null) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 12:56:04
|
Revision: 7791 http://bigdata.svn.sourceforge.net/bigdata/?rev=7791&view=rev Author: thompsonbry Date: 2014-01-14 12:55:58 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Added isUnderflow() method. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2014-01-14 12:55:11 UTC (rev 7790) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2014-01-14 12:55:58 UTC (rev 7791) @@ -100,6 +100,20 @@ } /** + * Return <code>true</code> iff this sample has cardinality underflow (the + * sample is empty). Cardinality underflow occurs when the sampling process + * was unable to find any solutions. Underflow is typically addressed by + * increasing the sample size, but sometimes underflow indicates that an + * access path (if it has filters) or a join may not have any solutions in + * the data. + */ + public boolean isUnderflow() { + + return estimateEnum == EstimateEnum.Underflow; + + } + + /** * Sample. */ private final AtomicReference<IBindingSet[]> sampleRef = new AtomicReference<IBindingSet[]>(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 12:55:21
|
Revision: 7790 http://bigdata.svn.sourceforge.net/bigdata/?rev=7790&view=rev Author: thompsonbry Date: 2014-01-14 12:55:11 +0000 (Tue, 14 Jan 2014) Log Message: ----------- More information on cardinality underflow case. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:45:59 UTC (rev 7789) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:55:11 UTC (rev 7790) @@ -542,11 +542,27 @@ } + /* + * Show information about the paths and the paths that are + * experiencing cardinality underflow. + */ + log.warn("Cardinality estimate underflow - resampling: round=" + round + ", npaths=" + paths.length + ", nunderflow=" + nunderflow + ", limit=" + limit + "\n" + showTable(paths)); + for(Path p : paths) { + final EdgeSample edgeSample; + synchronized(edgeSamples) { + edgeSample = edgeSamples.get(p.getVertexIds()); + } + if (edgeSample.isUnderflow()) { + log.warn("Underflow on path::" + + showPath(p, edgeSamples)); + } + } + } if (nunderflow > 0) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 12:46:06
|
Revision: 7789 http://bigdata.svn.sourceforge.net/bigdata/?rev=7789&view=rev Author: thompsonbry Date: 2014-01-14 12:45:59 +0000 (Tue, 14 Jan 2014) Log Message: ----------- javadoc on NoSolutionsException. Showing the paths table if there is cardinality underflow to analyze query41.rq behavior with the RTO. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java 2014-01-14 00:31:41 UTC (rev 7788) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java 2014-01-14 12:45:59 UTC (rev 7789) @@ -1,8 +1,33 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ package com.bigdata.bop.joinGraph; /** - * Exception thrown when the join graph does not have any solutions in the - * data (running the query does not produce any results). + * Exception thrown when the join graph does not have any solutions in the data + * (running the cutoff joins to explore the join graph does not produce any + * solutions such that we can not complete a path through the join graph without + * a zero cardinality). */ public class NoSolutionsException extends RuntimeException { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 00:31:41 UTC (rev 7788) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:45:59 UTC (rev 7789) @@ -544,7 +544,8 @@ log.warn("Cardinality estimate underflow - resampling: round=" + round + ", npaths=" + paths.length + ", nunderflow=" - + nunderflow + ", limit=" + limit); + + nunderflow + ", limit=" + limit + "\n" + + showTable(paths)); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 00:31:51
|
Revision: 7788 http://bigdata.svn.sourceforge.net/bigdata/?rev=7788&view=rev Author: thompsonbry Date: 2014-01-14 00:31:41 +0000 (Tue, 14 Jan 2014) Log Message: ----------- I missed some tests where the SliceOp constructor was invoked in the last commit. This should bring the build back to normal. See #64 (RTO) See #798 (Solution order not always preserved) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java 2014-01-13 21:55:30 UTC (rev 7787) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestSubqueryOp.java 2014-01-14 00:31:41 UTC (rev 7788) @@ -633,6 +633,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false)// })); final PipelineOp query = sliceOp; @@ -900,6 +901,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false)// })); final PipelineOp query = sliceOp; @@ -1163,6 +1165,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false)// })); final PipelineOp query = sliceOp; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java 2014-01-13 21:55:30 UTC (rev 7787) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java 2014-01-14 00:31:41 UTC (rev 7788) @@ -309,7 +309,8 @@ new NV(Union.Annotations.BOP_ID, sliceId),// new NV(Union.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// - new NV(PipelineOp.Annotations.SHARED_STATE,true)// + new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false)// )); final BOp query = sliceOp; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2014-01-13 21:55:30 UTC (rev 7787) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2014-01-14 00:31:41 UTC (rev 7788) @@ -337,6 +337,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })// @@ -455,6 +456,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })// @@ -624,6 +626,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })// @@ -770,6 +773,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })// @@ -916,6 +920,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })); @@ -1125,6 +1130,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// new NV(FederatedQueryEngine.Annotations.CHUNK_HANDLER, FederationChunkHandler.TEST_INSTANCE),// })); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 21:55:41
|
Revision: 7787 http://bigdata.svn.sourceforge.net/bigdata/?rev=7787&view=rev Author: thompsonbry Date: 2014-01-13 21:55:30 +0000 (Mon, 13 Jan 2014) Log Message: ----------- The root cause of the OutOfOrderEvaluationException was the ChunkedRunningQuery.HandleChunkBuffer class. That class was allowing reordering of chunks in order to output full chunks immediately and gather smaller chunks together until they can be combined as a single full chunk. The class has been rewritten and now has two distinct behaviors. If reordering is allowed, then the old behavior is preserved (except that it can output chunks of up to 150% of the target chunk size). If reordering is disallowed, then it will still combine chunks as much as possible, but not if that would violate an order preserving guarantee. The HandleChunkBuffer class breaks the semantics for ORDER BY and SLICE, both of which should preserve order. It also breaks the semantics for ORDER_BY + DISTINCT. This problem would only appear in cases where the size of the output chunks was such that a (sufficiently) full chunk would be output ahead of smaller chunks in an internal buffer. I have added a new PipelineOp annotation named REORDER_SOLUTIONS. This defaults to true, which is the historical throughput-oriented behavior. The MemorySortOp and SliceOp constructors now check for and require REORDER_SOLUTIONS := false. AST2BOpUtility has been modified to turn off REORDER_SOLUTIONS for the ORDER_BY_DISTINCT case (see #563 (ORDER BY + DISTINCT)). The AST2BOpRTO integration now runs clean when failOutOfOrderEvaluation := true. PipelineOp - addded REORDER_SOLUTIONS annotation. Defaults to true (the historical throughput oriented behavior). MemorySortOp - requires REORDER_SOLUTIONS:=false. SliceOp - requires REORDER_SOLUTIONS:=false. Reordered solutions are no longer observed during cutoff join evaluation. The failure to disable the REORDER_SOLUTIONS annotation is now detected if checking of cutoff query plans is enabled. AST2BOpUtility - now adds the REORDER_SOLUTIONS:=false annotation as necessary for SLICE, ORDER_BY and DISTINCT (when paired with ORDER_BY). See #64 (RTO) See #798 (Solution order not always preserved) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -93,6 +93,20 @@ boolean DEFAULT_SHARED_STATE = false; + /** + * When <code>true</code>, the {@link QueryEngine} MAY reorder the + * solutions as they flow through the query plan (this is done as a + * throughput optimization). When <code>false</code>, the + * {@link QueryEngine} MUST NOT reorder solutions. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/798" > + * Solution order not always preserved. </a> + */ + String REORDER_SOLUTIONS = PipelineOp.class.getName() + + ".reorderSolutions"; + + boolean DEFAULT_REORDER_SOLUTIONS = true; + /** * This option may be used to place an optional limit on the #of * concurrent tasks which may run for the same (bopId,shardId) for a @@ -295,6 +309,16 @@ } /** + * @see Annotations#REORDER_SOLUTIONS + */ + final public boolean isReorderSolutions() { + + return getProperty(Annotations.REORDER_SOLUTIONS, + Annotations.DEFAULT_REORDER_SOLUTIONS); + + } + + /** * The maximum amount of memory which may be used to buffered inputs for * this operator on the native heap. When ZERO (0), the inputs will be * buffered on the JVM heap. When {@link Long#MAX_VALUE}, an essentially Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -1292,42 +1292,15 @@ * target that sink. */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, - final int sinkId, -// final SinkTransitionMetadata sinkTransitionMetadata, - final AtomicInteger sinkMessagesOut, final BOpStats stats) { + final int sinkId,// + final AtomicInteger sinkMessagesOut, // + final BOpStats stats// + ) { -// final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null -// : inputBufferMap.get(sinkId); -// -// if (factory != null) { -// -// return factory.newInstance(); -// -// } + return new HandleChunkBuffer(ChunkedRunningQuery.this, bopId, + partitionId, sinkId, op.getChunkCapacity(), + op.isReorderSolutions(), sinkMessagesOut, stats); -// return new HandleChunkBuffer(sinkId, sinkMessagesOut, op -// .newBuffer(stats)); - - /* - * FIXME The buffer allocated here is useless unless we play games - * in HandleChunkBuffer to combine chunks or run a thread which - * drains chunks from all operator tasks (but the task can not - * complete until it is fully drained). - */ -// final IBlockingBuffer<IBindingSet[]> b = new BlockingBuffer<IBindingSet[]>( -// op.getChunkOfChunksCapacity(), op.getChunkCapacity(), op -// .getChunkTimeout(), -// BufferAnnotations.chunkTimeoutUnit); - - return -// new SinkTransitionBuffer( - new HandleChunkBuffer( - ChunkedRunningQuery.this, bopId, partitionId, sinkId, op - .getChunkCapacity(), sinkMessagesOut, stats) -// , -// sinkTransitionMetadata) - ; - } /** @@ -1409,8 +1382,6 @@ public NoCloseBuffer(final UUID queryId, final BOp bop, final int bopId, final int partitionId, final IBlockingBuffer<E> delegate) { -// public NoCloseBuffer(final IBlockingBuffer<E> delegate) { - super(delegate); this.queryId = queryId; @@ -1429,39 +1400,23 @@ } } - // public void add(E e) { -// log.error(Arrays.toString((Object[])e)); -// super.add(e); -// } -// -// public void reset() { -// log.error(""); -// super.reset(); -// } -// -// public long flush() { -// log.error(""); -// return super.flush(); -// } - @Override public void close() { - // NOP -// log.error(""); + // NOP - This makes sure that the query buffer is not closed. } - } + } // class NoCloseBuffer /** - * Class traps {@link #add(IBindingSet[])} to handle the IBindingSet[] - * chunks as they are generated by the running operator task, invoking - * {@link ChunkedRunningQuery#handleOutputChunk(BOp, int, IBlockingBuffer)} for - * each generated chunk to synchronously emit {@link IChunkMessage}s. + * Class traps {@link #add(IBindingSet[])} to handle the {@link IBindingSet} + * [] chunks as they are generated by the running operator task, invoking + * {@link ChunkedRunningQuery#handleOutputChunk(BOp, int, IBlockingBuffer)} + * for each generated chunk to synchronously emit {@link IChunkMessage}s. * <p> * This use of this class significantly increases the parallelism and - * throughput of selective queries. If output chunks are not "handled" - * until the {@link ChunkTask} is complete then the total latency of - * selective queries is increased dramatically. + * throughput of selective queries. If output chunks are not "handled" until + * the {@link ChunkTask} is complete then the total latency of selective + * queries is increased dramatically. */ static private class HandleChunkBuffer implements IBlockingBuffer<IBindingSet[]> { @@ -1474,28 +1429,39 @@ private final int sinkId; +// /** +// * The desired chunk size. +// */ +// private final int chunkCapacity; + + /** The minimum desired chunk size (50% of the {@link #chunkCapacity}). */ + private final int minChunkSize; + + /** The maximum desired chunk size (150% of the {@link #chunkCapacity}) */ + private final int maxChunkSize; /** - * The target chunk size. When ZERO (0) chunks are output immediately as - * they are received (the internal buffer is not used). + * When <code>true</code>, the buffer MAY reorder solutions. When + * <code>false</code>, it MUST NOT. */ - private final int chunkCapacity; + private final boolean reorderSolutions; -// private final SinkTransitionMetadata sinkTransitionMetadata; - private final AtomicInteger sinkMessagesOut; private final BOpStats stats; private volatile boolean open = true; -// /** -// * An internal buffer which is used if chunkCapacity != ZERO. -// */ -// private IBindingSet[] chunk = null; + /** + * A list of small chunks that will be combined into a single chunk. The + * solutions in this list are always evicted by {@link #flush()}. + */ private List<IBindingSet[]> smallChunks = null; /** - * The #of elements in the internal {@link #chunk} buffer. + * The #of elements in the {@link #smallChunks} buffer. Each element is + * an {@link IBindingSet}, so this is the number of solutions that have + * not yet been flushed through because we have not yet made up a single + * decent sized chunk. */ private int chunkSize = 0; @@ -1505,22 +1471,28 @@ * @param bopId * @param sinkId * @param chunkCapacity + * The target capacity for each chunk. + * @param reorderSolutions + * When <code>true</code>, the buffer MAY reorder solutions. + * When <code>false</code>, it MUST NOT. * @param sinkMessagesOut * @param stats */ public HandleChunkBuffer(final ChunkedRunningQuery q, final int bopId, final int partitionId, final int sinkId, final int chunkCapacity, -// final SinkTransitionMetadata sinkTransitionMetadata, + final boolean reorderSolutions, final AtomicInteger sinkMessagesOut, final BOpStats stats) { this.q = q; this.bopId = bopId; this.partitionId = partitionId; this.sinkId = sinkId; - this.chunkCapacity = chunkCapacity; -// this.sinkTransitionMetadata = sinkTransitionMetadata; +// this.chunkCapacity = chunkCapacity; + this.reorderSolutions = reorderSolutions; this.sinkMessagesOut = sinkMessagesOut; this.stats = stats; + this.minChunkSize = (chunkCapacity >> 1); // 50% + this.maxChunkSize = chunkCapacity + (chunkCapacity >> 1); // 150% } /** @@ -1531,7 +1503,11 @@ * <p> * Note: This must be synchronized in case the caller is multi-threaded * since it has a possible side effect on the internal buffer. + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/798" > + * Solution order not always preserved. </a> */ + @Override public void add(final IBindingSet[] e) { if(!open) @@ -1542,75 +1518,138 @@ partitionId, (IBindingSet[]) e); } -// for (IBindingSet bset : e) { -// sinkTransitionMetadata.handleBindingSet(bset); -// } + if (false) { + + /* + * Note: Do this INSTEAD if you want to complete disable both + * reordering and chunk combination. This should ONLY be used + * for debugging. Chunk combination is an important throughput + * enhancer. + */ + + // outputChunk(e); + + } else { + + if (reorderSolutions) { + + // Solutions MAY be reordered. + addReorderAllowed(e); + + } else { + + // Solutions MUST NOT be reordered. + addReorderNotAllowed(e); + + } + + } -// if (chunkCapacity != 0 && e.length < (chunkCapacity >> 1)) { -// /* -// * The caller's array is significantly smaller than the target -// * chunk size. Append the caller's array to the internal buffer -// * and return immediately. The internal buffer will be copied -// * through either in a subsequent add() or in flush(). -// */ -// synchronized (this) { -// -// if (chunk == null) -// chunk = new IBindingSet[chunkCapacity]; -// -// if (chunkSize + e.length > chunkCapacity) { -// -// // flush the buffer first. -// outputBufferedChunk(); -// -// } -// -// // copy the chunk into the buffer. -// System.arraycopy(e/* src */, 0/* srcPos */, -// chunk/* dest */, chunkSize/* destPos */, -// e.length/* length */); -// -// chunkSize += e.length; -// -// return; -// -// } -// -// } + } // add() - if (chunkCapacity != 0 && e.length < (chunkCapacity >> 1)) { + /** + * We are allowed to reorder the solutions. + * <p> + * This will reorder solutions by outputting the current chunk + * immediately if it is GTE 50% of the target chunkCapacity. This is + * also a non-blocking code path (no lock is taken in this method). + * <p> + * Otherwise, the chunk is added {@link #smallChunks} list. If the #of + * solutions on the {@link #smallChunks} reaches a threshold, then the + * {@link #smallChunks} list is converted into a single chunk an + * evicted. + */ + private void addReorderAllowed(final IBindingSet[] e) { + + if (e.length < minChunkSize) { + /* * The caller's array is significantly smaller than the target * chunk size. Append the caller's array to the internal list * and return immediately. The buffered chunks will be copied * through either in a subsequent add() or in flush(). */ + synchronized (this) { - if (chunkSize + e.length > chunkCapacity) { + if (chunkSize + e.length > maxChunkSize) { // flush the buffer first. outputBufferedChunk(); } - + if (smallChunks == null) smallChunks = new LinkedList<IBindingSet[]>(); + // Add to the buffer. smallChunks.add(e); - chunkSize += e.length; return; } + } // output the caller's chunk immediately. outputChunk(e); } + + /** + * We are not allowed to reorder the solutions. + * <p> + * This always outputs solutions in the same order that they are added. + * In order to avoid pushing through small chunks, it allows the output + * chunk to be over the target capacity (by 50%). + * + * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/798" > + * Solution order not always preserved. </a> + */ + private void addReorderNotAllowed(final IBindingSet[] e) { + synchronized (this) { + + if (chunkSize + e.length > maxChunkSize) { + + /* + * The combined chunk would be too large for the buffer. + */ + + // Flush the buffer. + outputBufferedChunk(); + + if (e.length > minChunkSize) { + + /* + * The internal buffer is empty. The chunk is big + * enough. Sent it through immediately. + */ + + outputChunk(e); + + return; + + } + + } + + /* + * Add the chunk to the internal buffer. + */ + + if (smallChunks == null) + smallChunks = new LinkedList<IBindingSet[]>(); + + // Add to the buffer. + smallChunks.add(e); + chunkSize += e.length; + + } // synchronized(this) + + } + /** * Output a chunk, updating the counters. * @@ -1630,12 +1669,6 @@ sinkMessagesOut.addAndGet(messagesOut); -// try { -// q.outstandingMessageSemaphore.acquire(); -// } catch (InterruptedException e1) { -// throw new RuntimeException(e1); -// } - } /** @@ -1643,15 +1676,6 @@ */ synchronized // Note: has side-effect on internal buffer. private void outputBufferedChunk() { -// if (chunk == null || chunkSize == 0) -// return; -// if (chunkSize != chunk.length) { -// // truncate the array. -// chunk = Arrays.copyOf(chunk, chunkSize); -// } -// outputChunk(chunk); -// chunkSize = 0; -// chunk = null; if (smallChunks == null || chunkSize == 0) { return; } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -18,6 +18,7 @@ import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.solutions.SliceOp.Annotations; import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -102,6 +103,11 @@ + "=" + isLastPassRequested()); } + // ORDER_BY must preserve order. + if (isReorderSolutions()) + throw new UnsupportedOperationException( + Annotations.REORDER_SOLUTIONS + "=" + isReorderSolutions()); + // required parameter. getValueComparator(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -150,7 +150,12 @@ if (!isSharedState()) throw new UnsupportedOperationException(Annotations.SHARED_STATE + "=" + isSharedState()); - + + // SLICE must preserve order. + if (isReorderSolutions()) + throw new UnsupportedOperationException( + Annotations.REORDER_SOLUTIONS + "=" + isReorderSolutions()); + } /** Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -489,6 +489,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// // new NV( // QueryEngineTestAnnotations.COMBINE_RECEIVED_CHUNKS, // false),// @@ -784,6 +785,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// // // Require the chunked running query impl. // new NV(QueryEngine.Annotations.RUNNING_QUERY_CLASS, // ChunkedRunningQuery.class.getName()),// @@ -950,6 +952,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })// ); @@ -1102,6 +1105,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })// ); @@ -1580,6 +1584,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); final PipelineOp query = sliceOp; @@ -1952,6 +1957,7 @@ new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); final PipelineOp query = sliceOp; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -200,6 +200,7 @@ new NV(SliceOp.Annotations.LIMIT, limit),// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); final UUID queryId = UUID.randomUUID(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_SortOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -72,6 +72,7 @@ */ public class TestQueryEngine_SortOp extends TestCase2 { + @Override public Properties getProperties() { final Properties p = new Properties(super.getProperties()); @@ -240,6 +241,7 @@ new NV(MemorySortOp.Annotations.MAX_PARALLEL, 1),// // new NV(MemorySortOp.Annotations.SHARED_STATE, true),// new NV(MemorySortOp.Annotations.LAST_PASS, true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); final UUID queryId = UUID.randomUUID(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -42,6 +42,7 @@ import com.bigdata.bop.IQueryContext; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.engine.AbstractQueryEngineTestCase; @@ -132,6 +133,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(MemorySortOp.Annotations.MAX_PARALLEL, 1),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// // new NV(MemorySortOp.Annotations.SHARED_STATE, true),// new NV(MemorySortOp.Annotations.LAST_PASS, true),// })); @@ -244,6 +246,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(MemorySortOp.Annotations.MAX_PARALLEL, 1),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// // new NV(MemorySortOp.Annotations.SHARED_STATE, true),// new NV(MemorySortOp.Annotations.LAST_PASS, true),// })); @@ -360,6 +363,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(MemorySortOp.Annotations.MAX_PARALLEL, 1),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// // new NV(MemorySortOp.Annotations.SHARED_STATE, true),// new NV(MemorySortOp.Annotations.LAST_PASS, true),// })); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -177,6 +177,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", offset, query.getOffset()); @@ -288,6 +289,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", offset, query.getOffset()); @@ -367,6 +369,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", offset, query.getOffset()); @@ -442,6 +445,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", offset, query.getOffset()); @@ -530,6 +534,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", 0L, query.getOffset()); @@ -591,6 +596,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", -1L, query.getOffset()); @@ -632,6 +638,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); assertEquals("offset", 1L, query.getOffset()); @@ -709,6 +716,7 @@ new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.SHARED_STATE,true),// + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,false),// })); final SliceStats stats = query.newStats(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -521,6 +521,11 @@ * "no reordering" guarantee. */ + // disable reordering of solutions for cutoff joins. + final boolean reorderSolutions = (cutoffLimit != null) ? false + : PipelineJoin.Annotations.DEFAULT_REORDER_SOLUTIONS; + + // disable operator parallelism for cutoff joins. final int maxParallel = cutoffLimit != null ? 1 : PipelineOp.Annotations.DEFAULT_MAX_PARALLEL; @@ -530,6 +535,7 @@ new NV(ChunkedMaterializationOp.Annotations.TIMESTAMP, timestamp), // new NV(ChunkedMaterializationOp.Annotations.MATERIALIZE_INLINE_IVS, materializeInlineIvs), // new NV(PipelineOp.Annotations.SHARED_STATE, !ctx.isCluster()),// live stats, but not on the cluster. + new NV(PipelineOp.Annotations.REORDER_SOLUTIONS,reorderSolutions),// new NV(PipelineOp.Annotations.MAX_PARALLEL,maxParallel),// new NV(BOp.Annotations.BOP_ID, ctx.nextId())// ), queryHints, ctx); @@ -729,6 +735,8 @@ new ConditionalRoutingOp(leftOrEmpty(left),// new NV(BOp.Annotations.BOP_ID, ctx.nextId()),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// + // disallow reordering of solutions by the query engine. + new NV(PipelineJoin.Annotations.REORDER_SOLUTIONS, Boolean.FALSE),// new NV(ConditionalRoutingOp.Annotations.CONDITION, c)// ), queryHints, ctx); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -1050,6 +1050,9 @@ .get(Annotations.SIMPLE_JOIN)).booleanValue() && !AST2BOpRTO.runAllJoinsAsComplexJoins; + // disallow reordering of solutions by the query engine. + map.put(PipelineJoin.Annotations.REORDER_SOLUTIONS, Boolean.FALSE); + // disallow parallel evaluation of tasks map.put(PipelineOp.Annotations.MAX_PARALLEL, Integer.valueOf(1)); @@ -1060,7 +1063,7 @@ // disable access path coalescing map.put(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, Boolean.FALSE); - + /* * Disable access path reordering. * Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -61,7 +61,6 @@ import com.bigdata.bop.Var; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.ap.SampleIndex.SampleType; -import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.join.JoinAnnotations; @@ -77,7 +76,6 @@ import com.bigdata.bop.joinGraph.rto.VertexSample; import com.bigdata.bop.rdf.join.ChunkedMaterializationOp; import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.bop.solutions.MemorySortOp; import com.bigdata.bop.solutions.SliceOp; import com.bigdata.journal.IIndexManager; import com.bigdata.rdf.internal.IV; @@ -219,22 +217,23 @@ * estimated cardinality of the join since we can not compute the join hit * ratio without knowing the #of solutions in required to produce a given * #of solutions out. + * <p> + * There are several possible root causes for out of order evaluation. One + * is reordering of access paths in the pipeline join. Another is reordering + * of solutions when they are output from an operator. Another is an + * operator which uses the altSink and thus does not always route solutions + * along a single path. * - * FIXME Make this <code>true</code>. There is a known problem where a - * {@link ConditionalRoutingOp} can cause out of order evaluation if some - * solutions flow along the default sink and some along the alt sink. I - * think that the fix for this is to make the materialization step - * non-conditional when performing cutoff evaluation of the join. I need to - * run this past MikeP, so this allows out-of-order evaluation for the - * moment. See BSBM Q5 for a query that currently fails if out of order - * evaluation is disallowed. + * @see #checkQueryPlans */ - static final private boolean failOutOfOrderEvaluation = false; + static final private boolean failOutOfOrderEvaluation = true; /** * When <code>true</code>, the generated query plans for cutoff evaluation * will be checked to verify that the query plans do not permit reordering * of solutions. + * + * @see #failOutOfOrderEvaluation */ static final private boolean checkQueryPlans = false; // Note: Make [false] in committed code! @@ -904,7 +903,8 @@ BOpEvaluationContext.CONTROLLER),// new NV(SliceOp.Annotations.PIPELINED, true),// new NV(SliceOp.Annotations.MAX_PARALLEL, 1),// - new NV(MemorySortOp.Annotations.SHARED_STATE, true)// + new NV(SliceOp.Annotations.REORDER_SOLUTIONS,false),// + new NV(SliceOp.Annotations.SHARED_STATE, true)// ), rtoJoinGroup, ctx); } @@ -975,6 +975,18 @@ } + + if (tmp.isReorderSolutions()) { + + // reordering of solutions is disallowed. + throw new RuntimeException("RTO " + + PipelineOp.Annotations.REORDER_SOLUTIONS + + ": expected=false, actual=" + + tmp.isReorderSolutions() + ", op=" + + tmp.toShortString()); + + } + if (tmp instanceof PipelineJoin) { final PipelineJoin<?> t = (PipelineJoin<?>) tmp; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2014-01-13 19:58:29 UTC (rev 7786) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2014-01-13 21:55:30 UTC (rev 7787) @@ -534,7 +534,7 @@ new NV(ProjectionOp.Annotations.SELECT, projectedVars)// ), queryBase, ctx); - if(materializeProjection) { + if (materializeProjection) { /* * Note: Materialization done from within the query plan needs @@ -3620,7 +3620,7 @@ if (projection.isWildcard()) throw new AssertionError("Wildcard projection was not rewritten."); - + final IVariable<?>[] vars = projection.getProjectionVars(); final PipelineOp op; @@ -3643,6 +3643,7 @@ * BY + DISTINCT) */ anns.add(new NV(PipelineOp.Annotations.MAX_PARALLEL, 1)); + anns.add(new NV(SliceOp.Annotations.REORDER_SOLUTIONS, false)); } op = new JVMDistinctBindingSetsOp(leftOrEmpty(left),// anns.toArray(new NV[anns.size()])// @@ -3959,6 +3960,7 @@ BOpEvaluationContext.CONTROLLER),// new NV(MemorySortOp.Annotations.PIPELINED, true),// new NV(MemorySortOp.Annotations.MAX_PARALLEL, 1),// + new NV(MemorySortOp.Annotations.REORDER_SOLUTIONS, false),// // new NV(MemorySortOp.Annotations.SHARED_STATE, // true),// new NV(MemorySortOp.Annotations.LAST_PASS, true),// @@ -3985,7 +3987,8 @@ BOpEvaluationContext.CONTROLLER),// new NV(SliceOp.Annotations.PIPELINED, true),// new NV(SliceOp.Annotations.MAX_PARALLEL, 1),// - new NV(MemorySortOp.Annotations.SHARED_STATE, true)// + new NV(SliceOp.Annotations.REORDER_SOLUTIONS,false),// + new NV(SliceOp.Annotations.SHARED_STATE, true)// ), queryBase, ctx); return left; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 19:58:36
|
Revision: 7786 http://bigdata.svn.sourceforge.net/bigdata/?rev=7786&view=rev Author: thompsonbry Date: 2014-01-13 19:58:29 +0000 (Mon, 13 Jan 2014) Log Message: ----------- Modified JGraph by removing an unused public constructor. Added an escape hatch if one or more join paths continues to have a cardinality underflow. This issue (cardinality underflow along some paths) doubtless needs to be examined further. This is to address an endless loop observed for one of the govtrac queries. Added a test where there are no solutions. The RTO should be jumping out as soon as it recognizes that the join graph can not produce a solution. Instead, it is solving the join graph, which is pointless (this is noted, but not fixed). See #64. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1.rq branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1-noSolutions.srx Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-13 16:32:25 UTC (rev 7785) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-13 19:58:29 UTC (rev 7786) @@ -350,68 +350,52 @@ } - /** - * Find a good join path in the data given the join graph. The join path is - * not guaranteed to be the best join path (the search performed by the - * runtime optimizer is not exhaustive) but it should always be a "good" - * join path and may often be the "best" join path. - * - * @param queryEngine - * The query engine. - * @param limit - * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. - * @param nedges - * The edges in the join graph are sorted in order of increasing - * cardinality and up to <i>nedges</i> of the edges having the - * lowest cardinality are used to form the initial set of join - * paths. For each edge selected to form a join path, the - * starting vertex will be the vertex of that edge having the - * lower cardinality. - * @param sampleType - * Type safe enumeration indicating the algorithm which will be - * used to sample the initial vertices. - * - * @return The join path identified by the runtime query optimizer as the - * best path given the join graph and the data. - * - * @throws NoSolutionsException - * If there are no solutions for the join graph in the data (the - * query does not have any results). - * @throws IllegalArgumentException - * if <i>queryEngine</i> is <code>null</code>. - * @throws IllegalArgumentException - * if <i>limit</i> is non-positive. - * @throws IllegalArgumentException - * if <i>nedges</i> is non-positive. - * @throws Exception - */ - public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final int nedges) throws NoSolutionsException, - Exception { +// /** +// * Find a good join path in the data given the join graph. The join path is +// * not guaranteed to be the best join path (the search performed by the +// * runtime optimizer is not exhaustive) but it should always be a "good" +// * join path and may often be the "best" join path. +// * +// * @param queryEngine +// * The query engine. +// * @param limit +// * The limit for sampling a vertex and the initial limit for +// * cutoff join evaluation. +// * @param nedges +// * The edges in the join graph are sorted in order of increasing +// * cardinality and up to <i>nedges</i> of the edges having the +// * lowest cardinality are used to form the initial set of join +// * paths. For each edge selected to form a join path, the +// * starting vertex will be the vertex of that edge having the +// * lower cardinality. +// * @param sampleType +// * Type safe enumeration indicating the algorithm which will be +// * used to sample the initial vertices. +// * +// * @return The join path identified by the runtime query optimizer as the +// * best path given the join graph and the data. +// * +// * @throws NoSolutionsException +// * If there are no solutions for the join graph in the data (the +// * query does not have any results). +// * @throws IllegalArgumentException +// * if <i>queryEngine</i> is <code>null</code>. +// * @throws IllegalArgumentException +// * if <i>limit</i> is non-positive. +// * @throws IllegalArgumentException +// * if <i>nedges</i> is non-positive. +// * @throws Exception +// */ +// public Path runtimeOptimizer(final QueryEngine queryEngine, +// final int limit, final int nedges) throws NoSolutionsException, +// Exception { +// +// final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); +// +// return runtimeOptimizer(queryEngine, limit, nedges, edgeSamples); +// +// } - /* - * This map is used to associate join path segments (expressed as an - * ordered array of bopIds) with edge sample to avoid redundant effort. - * - * FIXME RTO: HEAP MANAGMENT : This map holds references to the cutoff - * join samples. To ensure that the map has the minimum heap footprint, - * it must be scanned each time we prune the set of active paths and any - * entry which is not a prefix of an active path should be removed. - * - * TODO RTO: MEMORY MANAGER : When an entry is cleared from this map, - * the corresponding allocation in the memory manager (if any) must be - * released. The life cycle of the map needs to be bracketed by a - * try/finally in order to ensure that all allocations associated with - * the map are released no later than when we leave the lexicon scope of - * that clause. - */ - final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); - - return runtimeOptimizer(queryEngine, limit, nedges, edgeSamples); - - } - /** * Find a good join path in the data given the join graph. The join path is * not guaranteed to be the best join path (the search performed by the @@ -420,19 +404,6 @@ * * @param queryEngine * The query engine. - * @param limit - * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. - * @param nedges - * The edges in the join graph are sorted in order of increasing - * cardinality and up to <i>nedges</i> of the edges having the - * lowest cardinality are used to form the initial set of join - * paths. For each edge selected to form a join path, the - * starting vertex will be the vertex of that edge having the - * lower cardinality. - * @param sampleType - * Type safe enumeration indicating the algorithm which will be - * used to sample the initial vertices. * @param edgeSamples * A map that will be populated with the samples associated with * each non-pruned join path. This map is used to associate join @@ -463,18 +434,49 @@ * TODO We need to automatically increase the depth of search * for queries where we have cardinality estimation underflows * or punt to another method to decide the join order. + * + * TODO RTO: HEAP MANAGMENT : The edgeSamples map holds + * references to the cutoff join samples. To ensure that the map + * has the minimum heap footprint, it must be scanned each time + * we prune the set of active paths and any entry which is not a + * prefix of an active path should be removed. + * + * TODO RTO: MEMORY MANAGER : When an entry is cleared from this + * map, the corresponding allocation in the memory manager (if + * any) must be released. The life cycle of the map needs to be + * bracketed by a try/finally in order to ensure that all + * allocations associated with the map are released no later + * than when we leave the lexicon scope of that clause. */ - public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final int nedges, - final Map<PathIds, EdgeSample> edgeSamples) - throws Exception, NoSolutionsException { + public Path runtimeOptimizer(// + final QueryEngine queryEngine,// + final Map<PathIds, EdgeSample> edgeSamples// + ) throws Exception, NoSolutionsException { if (queryEngine == null) throw new IllegalArgumentException(); + + /* + * The limit for sampling a vertex and the initial limit for cutoff join + * evaluation. + */ + final int limit = joinGraph.getLimit(); + if (limit <= 0) throw new IllegalArgumentException(); + + /* + * The edges in the join graph are sorted in order of increasing + * cardinality and up to <i>nedges</i> of the edges having the lowest + * cardinality are used to form the initial set of join paths. For each + * edge selected to form a join path, the starting vertex will be the + * vertex of that edge having the lower cardinality. + */ + final int nedges = joinGraph.getNEdges(); + if (nedges <= 0) throw new IllegalArgumentException(); + if (edgeSamples == null) throw new IllegalArgumentException(); @@ -500,32 +502,58 @@ while (paths.length > 0 && round < nvertices - 1) { - /* - * Resample the paths. - * - * Note: Since the vertex samples are random, it is possible for the - * #of paths with cardinality estimate underflow to jump up and down - * due to the sample which is making its way through each path in - * each round. - * - * TODO The RTO needs an escape hatch here. FOr example, if the sum - * of the expected IOs for some path(s) strongly dominates all other - * paths sharing the same vertices, then we should prune those paths - * even if there is a cardinality estimate underflow in those paths. - * This will allow us to focus our efforts on those paths having - * less IO cost while we seek cardinality estimates which do not - * underflow. - */ - int nunderflow; + /* + * Resample the paths. + * + * Note: Since the vertex samples are random, it is possible for the + * #of paths with cardinality estimate underflow to jump up and down + * due to the sample which is making its way through each path in + * each round. + * + * Note: The RTO needs an escape hatch here. Otherwise, it is + * possible for it to spin in a loop while resampling. + * + * TODO For example, if the sum of the expected IOs for some path(s) + * strongly dominates all other paths sharing the same vertices, + * then we should prune those paths even if there is a cardinality + * estimate underflow in those paths. This will allow us to focus + * our efforts on those paths having less IO cost while we seek + * cardinality estimates which do not underflow. + * + * TODO We should be examining the actual sampling limit that is + * currently in place on each vertex and for each path. This is + * available by inspection of the VertexSamples and EdgeSamples, but + * it is not passed back out of the resamplePaths() method as a + * side-effect. We should limit how much we are willing to raise the + * limit, e.g., by specifying a MAX_LIMIT annotation on the + * JoinGraph operator. + */ + int nunderflow = 0; - while ((nunderflow = resamplePaths(queryEngine, limit, round, - paths, edgeSamples)) > 0) { - - log.warn("resampling in round=" + round + " : " + nunderflow - + " paths have cardinality estimate underflow."); - - } + for (int i = 0; i < 3; i++) { + nunderflow = resamplePaths(queryEngine, limit, round, paths, + edgeSamples); + + if (nunderflow == 0) { + + // No paths have cardinality estimate underflow. + break; + + } + + log.warn("Cardinality estimate underflow - resampling: round=" + + round + ", npaths=" + paths.length + ", nunderflow=" + + nunderflow + ", limit=" + limit); + + } + + if (nunderflow > 0) { + + log.warn("Continuing: some paths have cardinality underflow!"); + + } + /* * Extend the paths by one vertex. */ @@ -542,8 +570,10 @@ // Should be one winner. if (paths.length != 1) { + throw new AssertionError("Expected one path but have " + paths.length + " paths."); + } if (log.isInfoEnabled()) { @@ -770,8 +800,8 @@ * * @throws Exception */ - protected int resamplePaths(final QueryEngine queryEngine, int limitIn, - final int round, final Path[] a, + protected int resamplePaths(final QueryEngine queryEngine, + final int limitIn, final int round, final Path[] a, final Map<PathIds, EdgeSample> edgeSamples) throws Exception { if (queryEngine == null) Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2014-01-13 16:32:25 UTC (rev 7785) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2014-01-13 19:58:29 UTC (rev 7786) @@ -468,10 +468,8 @@ final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); // Find the best join path. - final Path path = g - .runtimeOptimizer(context.getRunningQuery() - .getQueryEngine(), getLimit(), getNEdges(), - edgeSamples); + final Path path = g.runtimeOptimizer(context.getRunningQuery() + .getQueryEngine(), edgeSamples); /* * Release samples. Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1-noSolutions.srx =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1-noSolutions.srx (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1-noSolutions.srx 2014-01-13 19:58:29 UTC (rev 7786) @@ -0,0 +1,10 @@ +<?xml version="1.0"?> +<sparql xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:xs="http://www.w3.org/2001/XMLSchema#" xmlns="http://www.w3.org/2005/sparql-results#"> + <head> + <variable name="product" /> + <variable name="label" /> + </head> + <results> + </results> +</sparql> Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1.rq =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1.rq 2014-01-13 16:32:25 UTC (rev 7785) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/BSBM-Q1.rq 2014-01-13 19:58:29 UTC (rev 7786) @@ -5,7 +5,6 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> -#SELECT (COUNT(DISTINCT *) as ?count) SELECT DISTINCT ?product ?label WHERE { Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java 2014-01-13 16:32:25 UTC (rev 7785) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/test/com/bigdata/rdf/sparql/ast/eval/rto/TestRTO_BSBM.java 2014-01-13 19:58:29 UTC (rev 7786) @@ -31,6 +31,7 @@ import junit.framework.AssertionFailedError; +import com.bigdata.bop.joinGraph.NoSolutionsException; import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.sail.BigdataSail; @@ -116,6 +117,30 @@ } /** + * Test of BSBM Q1 against an empty data set. There are no solutions in the + * data. + */ + public void test_BSBM_Q1_noSolutions() throws Exception { + + final TestHelper helper = new TestHelper(// + "rto/BSBM-Q1", // testURI, + "rto/BSBM-Q1.rq",// queryFileURL + new String[]{},// data files. + "rto/BSBM-Q1-noSolutions.srx"// resultFileURL + ); + + /* + * TODO In fact, the RTO should not be running for a group of required + * joins in which some vertex has a zero cardinality or when any join + * can provably produce ZERO results when fed solutions from a fully + * materialized vertex. + */ + + assertSameJoinOrder(new int[] { 2, 1, 3, 4, 5 }, helper); + + } + + /** * BSBM Q1 against pc100. */ public void test_BSBM_Q1_pc100() throws Exception { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 16:32:32
|
Revision: 7785 http://bigdata.svn.sourceforge.net/bigdata/?rev=7785&view=rev Author: thompsonbry Date: 2014-01-13 16:32:25 +0000 (Mon, 13 Jan 2014) Log Message: ----------- javadoc Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 16:29:03 UTC (rev 7784) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 16:32:25 UTC (rev 7785) @@ -458,7 +458,8 @@ * {@link JoinGraph.Annotations#JOIN_GROUP} into the ordering * specified in the {@link Path} and make sure that * convertJoinGroup() did not attempt to recursively reapply the - * RTO. + * RTO. This will get rid of one of the few remaining uses of + * {@link PartitionedJoinGroup}. */ public static PipelineOp compileJoinGraph(final QueryEngine queryEngine, final JoinGraph joinGraph, final Path path) { @@ -502,10 +503,6 @@ /* * Figure out which constraints are attached to which predicates. - * - * TODO Can we reconcile this code with the filter assignment code in - * AST2BOpFilter? If so, then we can get rid of the - * PartitionedJoinGroup. */ final IConstraint[][] constraintAttachmentArray = PartitionedJoinGroup .getJoinGraphConstraints(predicates, constraints, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 16:29:10
|
Revision: 7784 http://bigdata.svn.sourceforge.net/bigdata/?rev=7784&view=rev Author: thompsonbry Date: 2014-01-13 16:29:03 +0000 (Mon, 13 Jan 2014) Log Message: ----------- inline doc. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 16:27:44 UTC (rev 7783) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 16:29:03 UTC (rev 7784) @@ -236,7 +236,7 @@ * will be checked to verify that the query plans do not permit reordering * of solutions. */ - static final private boolean checkQueryPlans = false; + static final private boolean checkQueryPlans = false; // Note: Make [false] in committed code! /** * Inspect the remainder of the join group. If we can isolate a join graph This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 16:27:53
|
Revision: 7783 http://bigdata.svn.sourceforge.net/bigdata/?rev=7783&view=rev Author: thompsonbry Date: 2014-01-13 16:27:44 +0000 (Mon, 13 Jan 2014) Log Message: ----------- Bug fix for non-conditional materialization of SOMETIMES or ALWAYS variables when evaluating constraints for cutoff joins with the RTO. The RTO now relies on non-conditional chunked materialization. The RTO can still fail due to reordering of solutions if the check for reordering is enabled. I am not sure what is the root cause for this. At this point, it is possible that it is the query engine rather than the query plan. See #64 (RTO). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java 2014-01-13 15:54:15 UTC (rev 7782) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpFilters.java 2014-01-13 16:27:44 UTC (rev 7783) @@ -207,7 +207,7 @@ PipelineOp left,// final int rightId, // final IValueExpression<IV> ve,// - final Collection<IVariable<IV>> vars, // + final Set<IVariable<IV>> vars, // final Properties queryHints, final AST2BOpContext ctx) { @@ -269,17 +269,17 @@ * @param ctx * The evaluation context. * - * @return The final bop added to the pipeline by this method + * @return The final bop added to the pipeline by this method. If there are + * no variables that require materialization, then this just returns + * <i>left</i>. * * @see TryBeforeMaterializationConstraint - * - * TODO make [vars] a Set. */ @SuppressWarnings("rawtypes") protected static PipelineOp addMaterializationSteps2(// PipelineOp left,// final int rightId, // - final Collection<IVariable<IV>> vars,// + final Set<IVariable<IV>> vars,// final Properties queryHints, // final AST2BOpContext ctx) { @@ -288,32 +288,38 @@ if (nvars == 0) return left; - final long timestamp = ctx.getLexiconReadTimestamp(); + if (nvars >= 1) { - final String ns = ctx.getLexiconNamespace(); - - if (nvars >= 1) { /* - * Use a pipeline operator which uses the chunked materialization - * pattern for solution sets. This is similar to the pattern which - * is used when the IVs in the final solutions are materialized as - * RDF Values. + * Materializes multiple variables at once. * - * TODO We should drop the more complicated materialization pipeline - * logic unless a performance advantage can be demonstrated either - * on a Journal or a cluster. + * Note: This code path does not reorder the solutions (no + * conditional routing). */ - return (PipelineOp) applyQueryHints(new ChunkedMaterializationOp(leftOrEmpty(left), - new NV(ChunkedMaterializationOp.Annotations.VARS, vars.toArray(new IVariable[nvars])),// - new NV(ChunkedMaterializationOp.Annotations.RELATION_NAME, new String[] { ns }), // - new NV(ChunkedMaterializationOp.Annotations.TIMESTAMP, timestamp), // - new NV(PipelineOp.Annotations.SHARED_STATE, !ctx.isCluster()),// live stats, but not on the cluster. - new NV(BOp.Annotations.BOP_ID, ctx.nextId())// - ), queryHints, ctx); -// vars.toArray(new IVariable[nvars]), ns, timestamp) -// .setProperty(BOp.Annotations.BOP_ID, ctx.nextId()); + + return addChunkedMaterializationStep( + left, + vars, + ChunkedMaterializationOp.Annotations.DEFAULT_MATERIALIZE_INLINE_IVS, + null, // cutoffLimit + queryHints, ctx); + } + /* + * Materialize a single variable. + * + * Note: This code path can reorder the solutions (it uses conditional + * routing). + * + * TODO We should drop the more complicated materialization pipeline + * logic unless a performance advantage can be demonstrated either on a + * Journal or a cluster. + */ + final long timestamp = ctx.getLexiconReadTimestamp(); + + final String ns = ctx.getLexiconNamespace(); + final Iterator<IVariable<IV>> it = vars.iterator(); int firstId = ctx.nextId(); @@ -462,6 +468,74 @@ } + /** + * Use a pipeline operator which uses the chunked materialization pattern + * for solution sets. This is similar to the pattern which is used when the + * IVs in the final solutions are materialized as RDF Values. + * <p> + * Note: The RTO uses this method since it does not use conditional routing + * and does not reorder the solutions. + * + * @param left + * The left (upstream) operator that immediately proceeds the + * materialization steps. + * @param vars + * The terms to materialize. + * @param materializeInlineIvs + * When <code>true</code>, inline IVs are also materialized. + * @param queryHints + * The query hints from the dominating AST node. + * @param ctx + * The evaluation context. + * + * @return The final bop added to the pipeline by this method. If there are + * no variables that require materialization, then this just returns + * <i>left</i>. + * + * @see ChunkedMaterializationOp + */ + protected static PipelineOp addChunkedMaterializationStep(// + PipelineOp left,// + final Set<IVariable<IV>> vars,// + final boolean materializeInlineIvs,// + final Long cutoffLimit,// + final Properties queryHints,// + final AST2BOpContext ctx// + ) { + + final int nvars = vars.size(); + + if (nvars == 0) + return left; + + final long timestamp = ctx.getLexiconReadTimestamp(); + + final String ns = ctx.getLexiconNamespace(); + + /* + * If we are doing cutoff join evaluation, then limit the parallelism of + * this operator to prevent reordering of solutions. + * + * TODO If query hints are allowed to override MAX_PARALLEL and this is + * being invoked for cutoff join evaluation, then that will break the + * "no reordering" guarantee. + */ + + final int maxParallel = cutoffLimit != null ? 1 + : PipelineOp.Annotations.DEFAULT_MAX_PARALLEL; + + return (PipelineOp) applyQueryHints(new ChunkedMaterializationOp(leftOrEmpty(left), + new NV(ChunkedMaterializationOp.Annotations.VARS, vars.toArray(new IVariable[nvars])),// + new NV(ChunkedMaterializationOp.Annotations.RELATION_NAME, new String[] { ns }), // + new NV(ChunkedMaterializationOp.Annotations.TIMESTAMP, timestamp), // + new NV(ChunkedMaterializationOp.Annotations.MATERIALIZE_INLINE_IVS, materializeInlineIvs), // + new NV(PipelineOp.Annotations.SHARED_STATE, !ctx.isCluster()),// live stats, but not on the cluster. + new NV(PipelineOp.Annotations.MAX_PARALLEL,maxParallel),// + new NV(BOp.Annotations.BOP_ID, ctx.nextId())// + ), queryHints, ctx); + + } + // /** // * Wrapper for handling the {@link AST2BOpContext} / {@link BOpContextBase} // * API mismatch. @@ -483,7 +557,7 @@ // ctx.queryEngine), queryHints); // // } - + /** * For each filter which requires materialization steps, add the * materializations steps to the pipeline and then add the filter to the @@ -491,7 +565,10 @@ * * @param left * @param doneSet - * The set of variables already known to be materialized. + * The set of variables already known to be materialized. This is + * populated as a side-effect with any variables that will be + * materialized by the materialization steps added by this + * method. * @param needsMaterialization * A map of constraints and their variable materialization * requirements. @@ -499,6 +576,16 @@ * Query hints from the dominating AST node. * @param ctx * The evaluation context. + * + * TODO This treats each filter in turn rather than handling all + * variable materializations for all filters at once. Is this + * deliberate? If so, maybe we should pay attention to the order + * of the filters. I.e., those constraints should be ordered + * based on an expectation that they can reduce the total work by + * first eliminating solutions with less materialization effort + * (run constraints without materialization requirements before + * those with materialization requirements, run constraints that + * are more selective before others, etc.). */ @SuppressWarnings("rawtypes") protected static PipelineOp addMaterializationSteps3(// @@ -509,72 +596,148 @@ final AST2BOpContext ctx// ) { - if (!needsMaterialization.isEmpty()) { + if (needsMaterialization.isEmpty()) { + // Nothing to do. + return left; + } - final Set<IVariable<?>> alreadyMaterialized = doneSet; + final Set<IVariable<?>> alreadyMaterialized = doneSet; - for (Map.Entry<IConstraint, Set<IVariable<IV>>> e : - needsMaterialization.entrySet()) { + for (Map.Entry<IConstraint, Set<IVariable<IV>>> e : + needsMaterialization.entrySet()) { - final IConstraint c = e.getKey(); + // The constraint. + final IConstraint c = e.getKey(); + + // The set of variables associated with that constraint. + final Set<IVariable<IV>> terms = e.getValue(); + + // remove any terms already materialized + terms.removeAll(alreadyMaterialized); + + if (c instanceof INeedsMaterialization + && ((INeedsMaterialization) c).getRequirement() == Requirement.ALWAYS) { + + // add any new terms to the list of already materialized + alreadyMaterialized.addAll(terms); - final Set<IVariable<IV>> terms = e.getValue(); + } - // remove any terms already materialized - terms.removeAll(alreadyMaterialized); + final int condId = ctx.nextId(); - if (c instanceof INeedsMaterialization - && ((INeedsMaterialization) c).getRequirement() == Requirement.ALWAYS) { + // we might have already materialized everything we need + if (!terms.isEmpty()) { - // add any new terms to the list of already materialized - alreadyMaterialized.addAll(terms); - - } + // Add materialization steps for remaining variables. - final int condId = ctx.nextId(); + @SuppressWarnings("unchecked") + final IValueExpression<IV> ve = (IValueExpression) c.get(0); - // we might have already materialized everything we need - if (!terms.isEmpty()) { + left = addMaterializationSteps1(// + left, // + condId, // right + ve, // value expression + terms,// varsToMaterialize, + queryHints,// + ctx); + + } - // Add materialization steps for remaining variables. + left = applyQueryHints(// + new ConditionalRoutingOp(leftOrEmpty(left),// + new NV(BOp.Annotations.BOP_ID, condId),// + new NV(ConditionalRoutingOp.Annotations.CONDITION,c)// + ), queryHints, ctx); - @SuppressWarnings("unchecked") - final IValueExpression<IV> ve = (IValueExpression) c.get(0); + } - left = addMaterializationSteps1(// - left, // - condId, // right - ve, // value expression - terms,// varsToMaterialize, - queryHints,// - ctx); - -// left = addMaterializationSteps(// -// ctx,// -// left,// -// condId,// rightId -// c, // eval c.get(0) -// terms, // varsToMaterialize -// // idFactory, -// queryHints// -// ); + return left; - } + } - left = applyQueryHints(// - new ConditionalRoutingOp(leftOrEmpty(left),// - new NV(BOp.Annotations.BOP_ID, condId),// - new NV(ConditionalRoutingOp.Annotations.CONDITION,c)// - ), queryHints, ctx); + /** + * The RTO requires that we do not reorder solutions. This means that it + * must use an un-conditional approach to variable materialization for + * constraints with SOMETIMES materialization requirements. This has two + * practical impacts: + * <p> + * 1. We can not attach a filter with SOMETIMES requirements to a JOIN and + * wrap it with a {@link TryBeforeMaterializationConstraint} since this + * requires a {@link ConditionalRoutingOp} with an altSink and that will + * reorder solutions. + * <p> + * 2. We can not use a pattern which involves an {@link InlineMaterializeOp} + * followed by a {@link ConditionalRoutingOp} with an altSink followed by a + * {@link PipelineJoin} against the lexicon. This also reorders the + * solutions (primarily because of the {@link ConditionalRoutingOp} since we + * can force the {@link PipelineJoin} to not reorder solutions). + * <p> + * The code below uses the {@link ChunkedMaterializationOp}. This does not + * reorder the solutions. It can also materialize inline IVs giving us a + * single operator that prepare the solutions for filter evaluation. + */ + @SuppressWarnings("rawtypes") + protected static PipelineOp addNonConditionalMaterializationSteps(// + PipelineOp left,// + final Set<IVariable<?>> doneSet,// + final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization, + final Long cutoffLimit,// + final Properties queryHints,// + final AST2BOpContext ctx// + ) { + + if (needsMaterialization.isEmpty()) { + + // No filters. + return left; + + } - } + // Collect variables that require materialization. + final Set<IVariable<IV>> matvars = new LinkedHashSet<IVariable<IV>>(); + for (Map.Entry<IConstraint, Set<IVariable<IV>>> e : needsMaterialization + .entrySet()) { + + matvars.addAll(e.getValue()); + } + if (!matvars.isEmpty()) { + + // Materialize those variables. + left = addChunkedMaterializationStep(left, matvars, + true/* materializeInlineIVs */, cutoffLimit, queryHints, + ctx); + + // Add them to the doneSet. + doneSet.addAll(matvars); + + } + + // Attach all constraints. + for(IConstraint c : needsMaterialization.keySet()) { + + /* + * Note: While this is using a ConditionalRoutingOp, it is NOT + * use the altSink. All solutions flow through the default sink. + * This use does not cause the solutions to be reordered. + * Parallel evaluation is also disabled. + */ + + left = applyQueryHints(// + new ConditionalRoutingOp(leftOrEmpty(left),// + new NV(BOp.Annotations.BOP_ID, ctx.nextId()),// + new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// + new NV(ConditionalRoutingOp.Annotations.CONDITION, c)// + ), queryHints, ctx); + + } + return left; } - + /** * Partition the constraints for a join into those which can (or might) be * able to run attached to that join and those which must (or might) need to @@ -601,10 +764,70 @@ * join -or- <code>null</code> iff there are no constraints that can * be attached to the join. */ + @SuppressWarnings("rawtypes") static protected IConstraint[] getJoinConstraints( final Collection<IConstraint> constraints, final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization) { + return getJoinConstraints2(constraints, needsMaterialization, true/* conditionalRouting */); + + } + + /** + * Partition the constraints for a join into those which can (or might) be + * able to run attached to that join and those which must (or might) need to + * materialize some variables before they can be evaluated. Constraints + * which might be able to run attached to a join actually wind up both + * attached to the join (in the return value) where they are wrapped by a + * {@link TryBeforeMaterializationConstraint} which will ignore errors + * caused by unmaterialized values and in the <i>needsMaterialization</i> + * map. This allows such constraints to run attached to the join iff that is + * possible and otherwise to be evaluated as soon as the materialization + * pipeline can satisify their materialization requirements. + * <p> + * Note: The RTO requires that solutions are not reordered during cutoff + * JOIN evaluation in order to obtain accurate estimates of the cardinality + * of a join based on a known number of solutions in producing a known + * number of solutions out. Conditional materialization can cause solutions + * to be reordered since some solutions may pass the constraint attached to + * the join and be routed along one path in the query plan while other + * solutions may pass the join but fail the constraint due to a + * materialization requirement and are routed along another path. Thus it is + * necessary to disable conditional routing for cutoff JOIN evaluation. + * + * @param constraints + * The constraints (if any) for the join (optional). This + * collection is NOT modified. + * @param needsMaterialization + * A map providing for each constraint the set of variables which + * either might or must be materialized before that constraint + * can be evaluated. This map is populated as a side-effect. It + * will be empty iff there are no constraints that might or must + * require variable materialization. + * @param conditionalRouting + * When <code>true</code>, constraints that + * {@link Requirement#SOMETIMES} are able to run attached are + * wrapped by a {@link TryBeforeMaterializationConstraint} and + * appear both attached to the JOIN and will also run after the + * JOIN using a {@link ConditionalRoutingOp} to route solutions + * that fail the attached contraint through a materialization + * pipeline and then into a 2nd copy of the constraint once the + * variable(s) are known to be materialized.<br/> + * When <code>false</code>, only constraints that + * {@link Requirement#NEVER} require materialization will be + * attached to the JOIN. All other constraints will use + * non-conditional materialization. + * + * @return Constraints which can (or might) be able to run attached to that + * join -or- <code>null</code> iff there are no constraints that can + * be attached to the join. + */ + @SuppressWarnings("rawtypes") + static protected IConstraint[] getJoinConstraints2( + final Collection<IConstraint> constraints, + final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization, + final boolean conditionalRouting) { + if (constraints == null || constraints.isEmpty()) { // No constraints for this join. @@ -630,7 +853,6 @@ * the join and run it as a ConditionalRoutingOp later. */ - @SuppressWarnings("rawtypes") final Set<IVariable<IV>> terms = new LinkedHashSet<IVariable<IV>>(); final Requirement req = StaticAnalysis.gatherVarsToMaterialize(c, @@ -640,7 +862,7 @@ it.remove(); - if (req == Requirement.SOMETIMES) { + if (req == Requirement.SOMETIMES && conditionalRouting) { tryBeforeMaterialization.add(c); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java 2014-01-13 15:54:15 UTC (rev 7782) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpJoins.java 2014-01-13 16:27:44 UTC (rev 7783) @@ -140,9 +140,20 @@ final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization = new LinkedHashMap<IConstraint, Set<IVariable<IV>>>(); - // Add constraints to the join for that predicate. - anns.add(new NV(JoinAnnotations.CONSTRAINTS, getJoinConstraints( - constraints, needsMaterialization))); + /* + * Add constraints to the join for that predicate. + * + * Note: If we are performing cutoff evaluation of a JOIN + * [cutoffLimit!=null], then this disables the conditional routing logic + * for constraints with SOMETIMES materialization requirements. This is + * necessary in order to preserve the order of evaluation. Conditional + * routing of solutions causes them to be reordered and that breaks the + * ability to accurately estimate the cardinality of the JOIN using + * cutoff evaluation. + */ + anns.add(new NV(JoinAnnotations.CONSTRAINTS, + getJoinConstraints2(constraints, needsMaterialization, + cutoffLimit == null/* conditionalRouting */))); // true iff there are no constraints that require materialization. anns.add(new NV(Annotations.SIMPLE_JOIN, needsMaterialization.isEmpty())); @@ -198,15 +209,39 @@ } + if (needsMaterialization.isEmpty()) { + + // No filters. + return left; + + } + /* - * For each filter which requires materialization steps, add the - * materializations steps to the pipeline and then add the filter to the - * pipeline. + * Add operators to materialization variables (as necessary) and + * evaluate filters. */ + if (cutoffLimit != null) { - left = addMaterializationSteps3(left, doneSet, needsMaterialization, - queryHints, ctx); + left = addNonConditionalMaterializationSteps(left, doneSet, + needsMaterialization, cutoffLimit, queryHints, ctx); + } else { + + /* + * For each filter which requires materialization steps, add the + * materializations steps to the pipeline and then add the + * filter to the pipeline. + * + * Note: This is the old code path. This code path not support + * cutoff evaluation of joins because it can reorder the + * solutions. + */ + + left = addMaterializationSteps3(left, doneSet, + needsMaterialization, queryHints, ctx); + + } + return left; } @@ -1002,7 +1037,7 @@ QueryHints.DEFAULT_HASH_JOIN); if (cutoffLimit != null) { - + /* * Cutoff join (RTO). */ @@ -1026,6 +1061,22 @@ map.put(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, Boolean.FALSE); + /* + * Disable access path reordering. + * + * Note: Reordering must be disabled for complex joins since we will + * correlate the input solutions and output solutions using a row + * identifier. If the solutions are reordered as they flow through + * the pipeline, then it will break this correlation and we will no + * longer have accurate information about the #of input solutions + * required to produce a given number of output solutions. [Simple + * joins might not have this requirement since the PipelineJoin is + * internally doing the accounting for the #of solutions in and out + * of the join.] + */ + map.put(PipelineJoin.Annotations.REORDER_ACCESS_PATHS, + Boolean.FALSE); + if (simpleJoin) { // // disable access path coalescing @@ -1092,20 +1143,6 @@ */ map.put(PipelineJoin.Annotations.SHARED_STATE, Boolean.TRUE);// - /* - * Disable access path reordering. - * - * Note: Reordering must be disabled for complex joins since we - * will correlate the input solutions and output solutions using - * a row identifier. If the solutions are reordered as they flow - * through the pipeline, then it will break this correlation and - * we will no longer have accurate information about the #of - * input solutions required to produce a given number of output - * solutions. - */ - map.put(PipelineJoin.Annotations.REORDER_ACCESS_PATHS, - Boolean.FALSE); - } } // cutoffJoin Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 15:54:15 UTC (rev 7782) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2014-01-13 16:27:44 UTC (rev 7783) @@ -232,6 +232,13 @@ static final private boolean failOutOfOrderEvaluation = false; /** + * When <code>true</code>, the generated query plans for cutoff evaluation + * will be checked to verify that the query plans do not permit reordering + * of solutions. + */ + static final private boolean checkQueryPlans = false; + + /** * Inspect the remainder of the join group. If we can isolate a join graph * and filters, then we will push them down into an RTO JoinGroup. Since the * joins have already been ordered by the static optimizer, we can accept @@ -445,6 +452,13 @@ * based on deep sampling of the join graph. * * @return The query plan to fully execute that join graph. + * + * FIXME RTO: Modify this to use AST2BOpUtility#convertJoinGroup(). + * We would have to reorder the joins in the + * {@link JoinGraph.Annotations#JOIN_GROUP} into the ordering + * specified in the {@link Path} and make sure that + * convertJoinGroup() did not attempt to recursively reapply the + * RTO. */ public static PipelineOp compileJoinGraph(final QueryEngine queryEngine, final JoinGraph joinGraph, final Path path) { @@ -464,7 +478,8 @@ final IConstraint[] constraints = joinGraph.getConstraints(); - final Set<IVariable<?>> doneSet = joinGraph.getDoneSet(); + final Set<IVariable<?>> doneSet = new LinkedHashSet<IVariable<?>>( + joinGraph.getDoneSet()); /* * The AST JoinGroupNode for the joins and filters that we are running @@ -673,6 +688,7 @@ * * @return The result of sampling that edge. */ + //synchronized// FIXME REMOVE synchronized. This forces single threading for debugging purposes when chasing out-of-order evaluation exceptions. static public EdgeSample cutoffJoin(// final QueryEngine queryEngine,// final JoinGraph joinGraph,// @@ -701,38 +717,55 @@ if (sourceSample.getSample() == null) throw new IllegalArgumentException(); - /* - * Generate the query plan for cutoff evaluation of that JOIN. The - * complexity of the query plan depends on whether or not there are - * FILTERs "attached" to the join that have variable materialization - * requirements. - */ - final PipelineOp query = getCutoffJoinQuery(queryEngine, joinGraph, - limit, predicates, constraints, pathIsComplete, sourceSample); + PipelineOp query = null; + try { - if (!runAllJoinsAsComplexJoins && (query instanceof PipelineJoin) - && query.arity() == 0) { - /* - * Simple JOIN. - * - * Old logic for query plan generation. This is deprecated and will - * disappear soon. We will still generate simple query plans and - * execute them in the simple manner, but all of the code will go - * through AST2BOPJoins#join(). + * Generate the query plan for cutoff evaluation of that JOIN. The + * complexity of the query plan depends on whether or not there are + * FILTERs "attached" to the join that have variable materialization + * requirements. */ - return runSimpleJoin(queryEngine, sourceSample, limit, - (PipelineJoin<?>) query); + query = getCutoffJoinQuery(queryEngine, joinGraph, + limit, predicates, constraints, pathIsComplete, sourceSample); - } else { + if (!runAllJoinsAsComplexJoins && (query instanceof PipelineJoin) + && query.arity() == 0) { + /* + * Simple JOIN. + * + * Old logic for query execution. Relies on the ability of + * the PipelineJoin + */ + return runSimpleJoin(queryEngine, sourceSample, limit, + (PipelineJoin<?>) query); + + } else { + + /* + * Complex JOIN involving variable materialization, conditional + * routing operators, filters, and a SLICE to limit the output. + */ + + return runComplexJoin(queryEngine, sourceSample, limit, query); + + } + } catch (Throwable ex) { + /* - * Complex JOIN involving variable materialization, conditional - * routing operators, filters, and a SLICE to limit the output. + * Add some more information. This gives us the specific predicate. + * However, it does not tell us the constraints that were attached + * to that predicate. That information is only available when we are + * compiling the query plan. At this point, the constraints have + * been turned into query plan operators. */ - - return runComplexJoin(queryEngine, sourceSample, limit, query); + throw new RuntimeException("cause=" + ex + "\npred=" + + BOpUtility.toString(pred) + "\nconstraints=" + + Arrays.toString(constraints) + (query==null?"":"\nquery=" + + BOpUtility.toString(query)), ex); + } } @@ -856,7 +889,7 @@ * [left] is now the last operator in the query plan. */ - if (!(left instanceof PipelineJoin) && left.arity() == 0) { + if (!((left instanceof PipelineJoin) && left.arity() == 0)) { /* * The query plan contains multiple operators. @@ -883,12 +916,109 @@ log.debug("RTO cutoff join query::\n" + BOpUtility.toString(left) + "\npred::" + pred); - return left; + if (checkQueryPlans) { + + checkQueryPlan(left); + + } + + return left; } + + /** + * Debug code checks the query plan for patterns that could cause + * {@link OutOfOrderEvaluationException}s. + * <p> + * <ul> + * <li>This looks for query plans that use the alternate sink. Operators + * that route some solutions to the default sink and some solutions to the + * alternate sink can cause out of order evaluation. Out of order evaluation + * is not compatible with cutoff JOIN evaluation.</li> + * <li>This looks for operators that do not restrict + * {@link PipelineOp.Annotations#MAX_PARALLEL} to ONE (1).</li> + * </ul> + * + * TODO We probably need an interface to determine whether an operator (as + * configured) guarantee in order evaluation. Some operators can not be + * configured for in order evaluation. Others, including a hash join using a + * linked hash map for the buckets, can preserve order of the source + * solutions in their output. This would also be useful for creating query + * plans that are order preserving when an index order corresponds to the + * desired output order. + * + * TODO Extend this to check the RTO plan for ordered evaluation and then + * use this in the test suite. The plan must be provable oredered. We could + * also use this for order preserving query plans for other purposes. + */ + private static void checkQueryPlan(final PipelineOp left) { + final Iterator<PipelineOp> itr = BOpUtility.visitAll(left, + PipelineOp.class); + + while (itr.hasNext()) { + + final PipelineOp tmp = itr.next(); + + if (tmp.getProperty(PipelineOp.Annotations.ALT_SINK_REF) != null) { + + // alternative sink is disallowed. + throw new RuntimeException("Query plan uses altSink: op=" + + tmp.toShortString()); + + } + + if (tmp.getMaxParallel() != 1) { + + // parallel execution of an operator is disallowed. + throw new RuntimeException("RTO " + + PipelineOp.Annotations.MAX_PARALLEL + + ": expected=1, actual=" + tmp.getMaxParallel() + + ", op=" + tmp.toShortString()); + + } + + if (tmp instanceof PipelineJoin) { + + final PipelineJoin<?> t = (PipelineJoin<?>) tmp; + + final int maxParallelChunks = t.getMaxParallelChunks(); + + if (maxParallelChunks != 0) { + + throw new RuntimeException("PipelineJoin: " + + PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS + + "=" + maxParallelChunks + + " but must be ZERO (0):: op=" + t.toShortString()); + + } + + final boolean coalesceDuplicateAccessPaths = t + .getProperty( + PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, + PipelineJoin.Annotations.DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS); + + if (coalesceDuplicateAccessPaths) { + + throw new RuntimeException( + "PipelineJoin: " + + PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS + + "=" + coalesceDuplicateAccessPaths + + " but must be false:: op=" + + t.toShortString()); + + } + + } + + } + + } + /** - * Run a simple cutoff join on the {@link QueryEngine}. + * Run a simple cutoff join on the {@link QueryEngine}. This method relies + * on the ability to control the {@link PipelineJoin} such that it does not + * reorder the solutions. * * @param queryEngine * The {@link QueryEngine}. @@ -1050,7 +1180,7 @@ if (log.isInfoEnabled()) log.info("limit=" + limit + ", sourceSample=" + sourceSample + ", query=" + query.toShortString()); - + // Anonymous variable used for the injected column. final IVariable<?> rtoVar = Var.var(); @@ -1102,9 +1232,9 @@ int lastRowId = 0; while (itr.hasNext()) { bset = itr.next(); -//System.err.println(bset.toString()); final int rowid = ((Integer) bset.get(rtoVar).get()) .intValue(); +//log.warn("rowId="+rowid+",lastRowId="+lastRowId+",bset="+bset); if (rowid < lastRowId && failOutOfOrderEvaluation) { /* * Out of order evaluation makes it impossible to @@ -1113,8 +1243,7 @@ * without knowing the #of solutions in required to * produce a given #of solutions out. */ - throw new OutOfOrderEvaluationException( - BOpUtility.toString(query)); + throw new OutOfOrderEvaluationException(); } lastRowId = rowid; bset.clear(rtoVar); // drop injected variable. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2014-01-13 15:54:15 UTC (rev 7782) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpUtility.java 2014-01-13 16:27:44 UTC (rev 7783) @@ -1,7 +1,6 @@ package com.bigdata.rdf.sparql.ast.eval; import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -997,7 +996,7 @@ // Add the materialization step. left = addMaterializationSteps2(left, rightId, - (Collection) vars, queryHints, ctx); + (Set<IVariable<IV>>) (Set) vars, queryHints, ctx); // These variables have now been materialized. doneSet.addAll(vars); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 15:54:22
|
Revision: 7782 http://bigdata.svn.sourceforge.net/bigdata/?rev=7782&view=rev Author: thompsonbry Date: 2014-01-13 15:54:15 +0000 (Mon, 13 Jan 2014) Log Message: ----------- EBVBOp - cleanup imports, @Overrides. SPARQLConstraint - @Overrides. TryBeforeMaterializationConstraint - @Override Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SPARQLConstraint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/TryBeforeMaterializationConstraint.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2014-01-13 15:36:38 UTC (rev 7781) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/EBVBOp.java 2014-01-13 15:54:15 UTC (rev 7782) @@ -33,13 +33,10 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; -import com.bigdata.bop.NV; import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; -import com.bigdata.rdf.internal.NotMaterializedException; import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV; -import com.bigdata.rdf.model.BigdataValue; -import com.bigdata.rdf.sparql.ast.GlobalAnnotations; +import com.bigdata.rdf.sparql.ast.FilterNode; /** * Calculates the "effective boolean value" of an IValueExpression. See the @@ -53,6 +50,7 @@ */ private static final long serialVersionUID = -5701967329003122236L; + @SuppressWarnings("rawtypes") public EBVBOp(final IValueExpression<? extends IV> x) { this(new BOp[] { x }, BOp.NOANNS); @@ -75,7 +73,9 @@ * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}. */ public EBVBOp(final EBVBOp op) { + super(op); + } /** @@ -111,39 +111,41 @@ * as a typed literal with a datatype of xsd:boolean and a lexical value of * "false". */ + @Override + @SuppressWarnings("rawtypes") public boolean accept(final IBindingSet bs) { - final IV iv = getAndCheckBound(0, bs); - - if (iv instanceof XSDBooleanIV) { - - return ((XSDBooleanIV) iv).booleanValue(); - - } - - final Value val = super.asValue(iv); - - try { - - return QueryEvaluationUtil.getEffectiveBooleanValue(val); - - } catch (ValueExprEvaluationException ex) { - - throw new SparqlTypeErrorException(); - - } - + final IV iv = getAndCheckBound(0, bs); + + if (iv instanceof XSDBooleanIV) { + + return ((XSDBooleanIV) iv).booleanValue(); + + } + + final Value val = super.asValue(iv); + + try { + + return QueryEvaluationUtil.getEffectiveBooleanValue(val); + + } catch (ValueExprEvaluationException ex) { + + throw new SparqlTypeErrorException(); + + } + } - + /** - * The EBVBOp only needs materialization if its internal value expression - * does not evaluate to an XSDBooleanIV. + * The {@link EBVBOp} only needs materialization if its internal value + * expression does not evaluate to an {@link XSDBooleanIV}. */ + @Override public Requirement getRequirement() { - - return INeedsMaterialization.Requirement.SOMETIMES; - + + return INeedsMaterialization.Requirement.SOMETIMES; + } - } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SPARQLConstraint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SPARQLConstraint.java 2014-01-13 15:36:38 UTC (rev 7781) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/SPARQLConstraint.java 2014-01-13 15:54:15 UTC (rev 7782) @@ -34,12 +34,14 @@ import com.bigdata.rdf.error.SparqlTypeErrorException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.impl.literal.XSDBooleanIV; +import com.bigdata.rdf.sparql.ast.FilterNode; import com.bigdata.util.InnerCause; /** * BOpConstraint that wraps a {@link EBVBOp}, which itself computes the - * effective boolean value of an IValueExpression. + * effective boolean value of an {@link IValueExpression}. */ +@SuppressWarnings("rawtypes") public class SPARQLConstraint<X extends XSDBooleanIV> extends com.bigdata.bop.constraint.Constraint<X> { @@ -68,7 +70,6 @@ * The value expression will be automatically wrapped inside an * {@link EBVBOp} if it does not itself evaluate to a boolean. */ - @SuppressWarnings("rawtypes") public SPARQLConstraint(final IValueExpression<? extends IV> x) { this(new BOp[] { wrap(x) }, null/*annocations*/); @@ -96,7 +97,7 @@ } - @SuppressWarnings({ "rawtypes", "unchecked" }) + @SuppressWarnings("unchecked") @Override public IValueExpression<? extends XSDBooleanIV> get(final int i) { @@ -114,7 +115,6 @@ try { // evaluate the EBV operator - @SuppressWarnings("rawtypes") final XSDBooleanIV iv = get(0).get(bs); return iv.booleanValue(); @@ -138,4 +138,16 @@ } + /** + * Overridden to provide a little bit of information about the attached + * constraint. + */ + @Override + public String toShortString() { + + return super.toShortString() + "{condition=" + + getValueExpression().toShortString() + "}"; + + } + } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/TryBeforeMaterializationConstraint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/TryBeforeMaterializationConstraint.java 2014-01-13 15:36:38 UTC (rev 7781) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/TryBeforeMaterializationConstraint.java 2014-01-13 15:54:15 UTC (rev 7782) @@ -89,40 +89,42 @@ * step in the pipeline to ensure that the solution gets routed around * the materialization steps. See {@link IsMaterializedBOp}. */ + @Override public boolean accept(final IBindingSet bs) { - final IConstraint c = (IConstraint) get(0); - - try { - - if (log.isDebugEnabled()) { - log.debug("about to attempt evaluation prior to materialization"); - } - - final boolean accept = c.accept(bs); - - if (log.isDebugEnabled()) { - log.debug("successfully evaluated constraint without materialization"); - } - - return accept; - - } catch (Throwable t) { + final IConstraint c = (IConstraint) get(0); - if (InnerCause.isInnerCause(t, NotMaterializedException.class)) { - - if (log.isDebugEnabled()) { - log.debug("could not evaluate constraint without materialization"); - } - - // let the solution through for now, it will get tested again - // on the other side of the materialization pipeline - return true; - - } else throw new RuntimeException(t); - - } - + try { + + if (log.isDebugEnabled()) { + log.debug("about to attempt evaluation prior to materialization"); + } + + final boolean accept = c.accept(bs); + + if (log.isDebugEnabled()) { + log.debug("successfully evaluated constraint without materialization"); + } + + return accept; + + } catch (Throwable t) { + + if (InnerCause.isInnerCause(t, NotMaterializedException.class)) { + + if (log.isDebugEnabled()) { + log.debug("could not evaluate constraint without materialization"); + } + + // let the solution through for now, it will get tested again + // on the other side of the materialization pipeline + return true; + + } else + throw new RuntimeException(t); + + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 15:36:45
|
Revision: 7781 http://bigdata.svn.sourceforge.net/bigdata/?rev=7781&view=rev Author: thompsonbry Date: 2014-01-13 15:36:38 +0000 (Mon, 13 Jan 2014) Log Message: ----------- removed java7 dependency. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2014-01-13 14:58:53 UTC (rev 7780) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2014-01-13 15:36:38 UTC (rev 7781) @@ -102,12 +102,12 @@ */ @Override public int compareTo(final BSBundle o) { - - int ret = -Integer.compare(bopId, o.bopId); + int ret = (bopId < o.bopId) ? 1 : ((bopId == o.bopId) ? 0 : -1); + if (ret == 0) { - ret = -Integer.compare(shardId, o.shardId); + ret = (shardId < o.shardId) ? 1 : ((shardId == o.shardId) ? 0 : -1); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 14:59:00
|
Revision: 7780 http://bigdata.svn.sourceforge.net/bigdata/?rev=7780&view=rev Author: thompsonbry Date: 2014-01-13 14:58:53 +0000 (Mon, 13 Jan 2014) Log Message: ----------- Added option (set via static final boolean) to use an ordered map over the (bopId,shardId) pairs to associate them with the operator input queues rather than a hash map. Both maps are concurrent. This option is currently set to false which is equivalent to the historical behavior. There is a possibility that setting this to true would cause solutions to have less dwell time on the JVM heap since it would bias the QueryEngine to run chunks associated with operators having bopId values that are smaller first. I also modified BSBundle so that it implements Comparable. It's default order should order BSBundles with lower bopIds first. This is the other half of how we can create this front bias over the input queues for the query plan. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2014-01-13 14:32:13 UTC (rev 7779) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/BSBundle.java 2014-01-13 14:58:53 UTC (rev 7780) @@ -27,6 +27,8 @@ package com.bigdata.bop.engine; +import java.util.Comparator; + /** * An immutable class capturing the evaluation context of an operator against a * shard. @@ -34,12 +36,13 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class BSBundle { +public class BSBundle implements Comparable<BSBundle> { public final int bopId; public final int shardId; + @Override public String toString() { return super.toString() + "{bopId=" + bopId + ",shardId=" + shardId @@ -55,15 +58,14 @@ } - /** - * {@inheritDoc} - */ + @Override public int hashCode() { return (bopId * 31) + shardId; } + @Override public boolean equals(final Object o) { if (this == o) @@ -78,4 +80,39 @@ } + /** + * {@inheritDoc} + * <p> + * This orders the {@link BSBundle}s by reverse {@link #bopId} and by + * {@link #shardId} if the {@link #bopId} is the same. This order imposes a + * bias to draw entries with higher {@link #bopId}s from an ordered + * collection. + * <p> + * Note: Query plans are assigned bopIds from 0 through N where higher + * bopIds are assigned to operators that occur later in the query plan. This + * is not a strict rule, but it is a strong bias. Given that bias and an + * ordered map, this {@link Comparator} will tend to draw from operators + * that are further along in the query plan. This emphasizes getting results + * through the pipeline quickly. Whether or not this {@link Comparator} has + * any effect depends on the {@link ChunkedRunningQuery#consumeChunk()} + * method and the backing map over the operator queues. If a hash map is + * used, then the {@link Comparator} is ignored. If a skip list map is used, + * then the {@link Comparator} will influence the manner in which the + * operator queues are drained. + */ + @Override + public int compareTo(final BSBundle o) { + + int ret = -Integer.compare(bopId, o.bopId); + + if (ret == 0) { + + ret = -Integer.compare(shardId, o.shardId); + + } + + return ret; + + } + } Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2014-01-13 14:32:13 UTC (rev 7779) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2014-01-13 14:58:53 UTC (rev 7780) @@ -37,13 +37,12 @@ import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicInteger; -import junit.framework.AssertionFailedError; - import org.apache.log4j.Logger; import com.bigdata.bop.BOp; @@ -112,20 +111,35 @@ * are removed from the map. * <p> * The map is guarded by the {@link #lock}. + * <p> + * Note: Using a hash map here means that {@link #consumeChunks()} will draw + * from operator queues more or less at random. Using an ordered map will + * impose a bias, depending on the natural ordering of the map keys. * - * FIXME Either this and/or {@link #operatorFutures} must be a weak value - * map in order to ensure that entries are eventually cleared in scale-out - * where the #of entries can potentially be very large since they are per - * (bopId,shardId). While these maps were initially declared as - * {@link ConcurrentHashMap} instances, if we remove entries once the - * map/queue entry is empty, this appears to open a concurrency hole which - * does not exist if we leave entries with empty map/queue values in the - * map. Changing to a weak value map should provide the necessary pruning of - * unused entries without opening up this concurrency hole. + * @see BSBundle#compareTo(BSBundle) + * + * FIXME Either this and/or {@link #operatorFutures} must be a weak + * value map in order to ensure that entries are eventually cleared in + * scale-out where the #of entries can potentially be very large since + * they are per (bopId,shardId). While these maps were initially + * declared as {@link ConcurrentHashMap} instances, if we remove + * entries once the map/queue entry is empty, this appears to open a + * concurrency hole which does not exist if we leave entries with empty + * map/queue values in the map. Changing to a weak value map should + * provide the necessary pruning of unused entries without opening up + * this concurrency hole. */ private final ConcurrentMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; /** + * Set to <code>true</code> to make {@link #operatorQueues} and ordered map. + * When <code>true</code>, {@link #consumeChunk()} will have an ordered bias + * in how it schedules work. [The historical behavior is present when this + * is <code>false</code>.] + */ + private static final boolean orderedOperatorQueueMap = false; + + /** * FIXME It appears that this is Ok based on a single unit test known to * fail when {@link #removeMapOperatorQueueEntries} is <code>true</code>, * but I expect that a similar concurrency problem could also exist for the @@ -204,8 +218,16 @@ this.operatorFutures = new ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask, ChunkFutureTask>>(); - this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + if (orderedOperatorQueueMap) { + this.operatorQueues = new ConcurrentSkipListMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + + } else { + + this.operatorQueues = new ConcurrentHashMap<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>>(); + + } + } /** @@ -250,12 +272,14 @@ if (queue == null) { /* - * If the target is a pipelined operator, then we impose a limit - * on the #of messages which may be buffered for that operator. - * If the operator is NOT pipelined, e.g., ORDER_BY, then we use - * an unbounded queue. + * There is no input queue for this operator, so we create one + * now while we are holding the lock. If the target is a + * pipelined operator, then we impose a limit on the #of + * messages which may be buffered for that operator. If the + * operator is NOT pipelined, e.g., ORDER_BY, then we use an + * unbounded queue. * - * TODO Unit/stress tests with capacity set to 1. + * TODO Unit/stress tests with capacity set to 1. */ // The target operator for this message. @@ -265,12 +289,24 @@ PipelineOp.Annotations.PIPELINE_QUEUE_CAPACITY, PipelineOp.Annotations.DEFAULT_PIPELINE_QUEUE_CAPACITY) : Integer.MAX_VALUE; - + + // Create a new queue using [lock]. queue = new com.bigdata.jsr166.LinkedBlockingDeque<IChunkMessage<IBindingSet>>(// capacity, lock); - operatorQueues.put(bundle, queue); + // Add to the collection of operator input queues. + if (operatorQueues.put(bundle, queue) != null) { + + /* + * There must not be an entry for this operator. We checked + * for this above. Nobody else should be adding entries into + * the [operatorQueues] map. + */ + + throw new AssertionError(bundle.toString()); + + } } @@ -396,7 +432,7 @@ if (nrunning == 0) { // No tasks running for this operator. if(removeMapOperatorFutureEntries) - if(map!=operatorFutures.remove(bundle)) throw new AssertionError(); + if (map != operatorFutures.remove(bundle)) throw new AssertionError(); } } if (nrunning >= maxParallel) { @@ -526,14 +562,14 @@ if (queue.isEmpty()) { // No work, so remove work queue for (bopId,partitionId). if(removeMapOperatorQueueEntries) - if(queue!=operatorQueues.remove(bundle)) throw new AssertionError(); + if (queue != operatorQueues.remove(bundle)) throw new AssertionError(); return false; } /* * true iff operator requires at once evaluation and all solutions * are now available for that operator. */ - boolean atOnceReady = false; + boolean atOnceReady = false; if (!pipelined) { if (!isAtOnceReady(bundle.bopId)) { /* This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-13 14:32:19
|
Revision: 7779 http://bigdata.svn.sourceforge.net/bigdata/?rev=7779&view=rev Author: thompsonbry Date: 2014-01-13 14:32:13 +0000 (Mon, 13 Jan 2014) Log Message: ----------- javadoc on ChunkedMaterializationOp. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/ChunkedMaterializationOp.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/ChunkedMaterializationOp.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/ChunkedMaterializationOp.java 2014-01-13 14:31:38 UTC (rev 7778) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/bop/rdf/join/ChunkedMaterializationOp.java 2014-01-13 14:32:13 UTC (rev 7779) @@ -47,6 +47,7 @@ import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.engine.BOpStats; import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.IVCache; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.store.BigdataBindingSetResolverator; @@ -270,7 +271,8 @@ * {@link BigdataValue}s. * * @param required - * The variable(s) to be materialized. + * The variable(s) to be materialized or <code>null</code> to + * materialize all variable bindings. * @param lex * The lexicon reference. * @param chunk @@ -279,7 +281,7 @@ static void resolveChunk(final IVariable<?>[] required, final LexiconRelation lex,// final IBindingSet[] chunk,// - final boolean materializeInlineIVs + final boolean materializeInlineIVs// ) { if (log.isInfoEnabled()) @@ -310,6 +312,7 @@ if (required == null) { + // Materialize all variable bindings. @SuppressWarnings("rawtypes") final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet .iterator(); @@ -329,15 +332,16 @@ } if (iv.needsMaterialization() || materializeInlineIVs) { - - ids.add(iv); - + + ids.add(iv); + } } } else { + // Materialize the specified variable bindings. for (IVariable<?> v : required) { final IConstant<?> c = bindingSet.get(v); @@ -356,9 +360,9 @@ } if (iv.needsMaterialization() || materializeInlineIVs) { - - ids.add(iv); - + + ids.add(iv); + } } @@ -382,7 +386,7 @@ */ for (IBindingSet e : chunk) { - getBindingSet(e, required, terms); + getBindingSet(required, e, terms); } @@ -392,17 +396,24 @@ * Resolve the term identifiers in the {@link IBindingSet} using the map * populated when we fetched the current chunk. * + * @param required + * The variables to be resolved -or- <code>null</code> if all + * variables should have been resolved. * @param bindingSet - * A solution whose {@link Long}s will be interpreted as term - * identifiers and resolved to the corresponding - * {@link BigdataValue}s. + * A solution whose {@link IV}s will be resolved to the + * corresponding {@link BigdataValue}s in the caller's + * <code>terms</code> map. The {@link IVCache} associations are + * set as a side-effect. + * @param terms + * A map from {@link IV}s to {@link BigdataValue}s. * * @throws IllegalStateException * if the {@link IBindingSet} was not materialized with the * {@link IBindingSet}. */ - static private void getBindingSet(final IBindingSet bindingSet, + static private void getBindingSet(// final IVariable<?>[] required, + final IBindingSet bindingSet, final Map<IV<?, ?>, BigdataValue> terms) { if (bindingSet == null) @@ -411,7 +422,7 @@ if (terms == null) throw new IllegalArgumentException(); - if(required != null) { + if (required != null) { /* * Only the specified variables. @@ -420,10 +431,10 @@ for (IVariable<?> var : required) { @SuppressWarnings("unchecked") - final IConstant<IV<?,?>> c = bindingSet.get(var); + final IConstant<IV<?, ?>> c = bindingSet.get(var); if (c == null) { - + // Variable is not bound in this solution. continue; } @@ -435,7 +446,7 @@ continue; } - + final BigdataValue value = terms.get(iv); if (value == null && iv.needsMaterialization()) { @@ -471,7 +482,7 @@ final Object boundValue = entry.getValue().get(); - if (!(boundValue instanceof IV<?, ?>)) { + if (!(boundValue instanceof IV)) { continue; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |