From: <tho...@us...> - 2013-12-30 12:02:43
|
Revision: 7694 http://bigdata.svn.sourceforge.net/bigdata/?rev=7694&view=rev Author: thompsonbry Date: 2013-12-30 12:02:34 +0000 (Mon, 30 Dec 2013) Log Message: ----------- Added "DENSE" as a SampleType for SampleIndex class to support the RTO. DENSE simply takes N keys from the head of the key range for the access path. This significantly reduces the IO latency associated with the either random or uniform sampling since we will typically touch only one or two leaves while random or uniform sampling could easily touch all leaves spanned by the key range. Of course, this head sampling introduces a bias. Added RTO query hints for nedges, limit, and sample type. Added log @ INFO for the RTO overhead and the execution time for the optimized query. See #64 (RTO). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/QueryHintRegistry.java Added Paths: ----------- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOLimitQueryHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTONEdgesQueryHint.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOSampleTypeQueryHint.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2013-12-24 13:23:21 UTC (rev 7693) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -106,7 +106,13 @@ /** * Sample offsets are computed randomly. */ - RANDOM; + RANDOM, + /** + * The samples will be dense and may bave a front bias. This mode + * emphasizes the locality of the samples on the index pages and + * minimizes the IO associated with sampling. + */ + DENSE; } /** @@ -323,6 +329,9 @@ seed(), limit, accessPath.getFromKey(), accessPath .getToKey()); break; + case DENSE: + advancer = new DenseSampleAdvancer<E>(); + break; default: throw new UnsupportedOperationException("SampleType=" + sampleType); @@ -339,6 +348,23 @@ } /** + * Dense samples in key order (simple index scan). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @param <E> + */ + private static class DenseSampleAdvancer<E> extends Advancer<E> { + + private static final long serialVersionUID = 1L; + + @Override + protected void advance(final ITuple<E> tuple) { + // NOP + } + + } + + /** * An advancer pattern which is designed to take evenly distributed samples * from an index. The caller specifies the #of tuples to be sampled. This * class estimates the range count of the access path and then computes the Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2013-12-24 13:23:21 UTC (rev 7693) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -30,7 +30,10 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import org.apache.log4j.Logger; + import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpIdFactory; @@ -107,11 +110,16 @@ int DEFAULT_LIMIT = 100; - /** - * The <i>nedges</i> edges of the join graph having the lowest - * cardinality will be used to generate the initial join paths (default - * {@value #DEFAULT_NEDGES}). This must be a positive integer. - */ + /** + * The <i>nedges</i> edges of the join graph having the lowest + * cardinality will be used to generate the initial join paths (default + * {@value #DEFAULT_NEDGES}). This must be a positive integer. The edges + * in the join graph are sorted in order of increasing cardinality and + * up to <i>nedges</i> of those edges having the lowest cardinality are + * used to form the initial set of join paths. For each edge selected to + * form a join path, the starting vertex will be the vertex of that edge + * having the lower cardinality. + */ String NEDGES = JoinGraph.class.getName() + ".nedges"; int DEFAULT_NEDGES = 2; @@ -292,7 +300,9 @@ */ @Override public Void call() throws Exception { - + + final long begin = System.nanoTime(); + // Create the join graph. final JGraph g = new JGraph(getVertices(), getConstraints(), sampleType); @@ -301,6 +311,10 @@ final Path p = g.runtimeOptimizer(context.getRunningQuery() .getQueryEngine(), limit, nedges); + final long mark = System.nanoTime(); + + final long elapsed_queryOptimizer = mark - begin; + // Factory avoids reuse of bopIds assigned to the predicates. final BOpIdFactory idFactory = new BOpIdFactory(); @@ -313,11 +327,20 @@ // Run the query, blocking until it is done. JoinGraph.runSubquery(context, queryOp); + final long elapsed_queryExecution = System.nanoTime() - mark; + + if (log.isInfoEnabled()) + log.info("RTO: queryOptimizer=" + + TimeUnit.NANOSECONDS.toMillis(elapsed_queryOptimizer) + + ", queryExecution=" + + TimeUnit.NANOSECONDS.toMillis(elapsed_queryExecution)); + return null; } } // class JoinGraphTask + private static final transient Logger log = Logger.getLogger(JGraph.class); /** * Execute the selected join path. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2013-12-24 13:23:21 UTC (rev 7693) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -32,6 +32,7 @@ import com.bigdata.bop.BufferAnnotations; import com.bigdata.bop.IPredicate.Annotations; import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.fed.QueryEngineFactory; @@ -90,6 +91,46 @@ QueryOptimizerEnum DEFAULT_OPTIMIZER = QueryOptimizerEnum.Static; /** + * The sampling bias for the runtime query optimizer. Dense sampling + * maximizes index locality but reduces robustness to correlations that do + * not exist in the head of the access path key range. Random sampling + * maximizes robustness, but pays a heavy IO cost. Even sampling also + * increases robustness, but will visit every Nth tuple and pays a heavy IO + * cost as a result. Thus dense sampling should be much faster but random or + * even sampling should detect bias that might not otherwise be exposed to + * the runtime query optimizer. + * + * @see SampleType + */ + String RTO_SAMPLE_TYPE = "RTO-sampleType"; + + SampleType DEFAULT_RTO_SAMPLE_TYPE = SampleType.DENSE; + + /** + * The limit for sampling a vertex and the initial limit for cutoff join + * evaluation (default {@value #DEFAULT_RTO_LIMIT}). A larger limit and a + * random sample will provide a more accurate estimate of the cost of the + * join paths but are increase the runtime overhead of the RTO optimizer. + */ + String RTO_LIMIT = "RTO-limit"; + + int DEFAULT_RTO_LIMIT = 20; + + /** + * The <i>nedges</i> edges of the join graph having the lowest cardinality + * will be used to generate the initial join paths (default + * {@value #DEFAULT_NEDGES}). This must be a positive integer. The edges in + * the join graph are sorted in order of increasing cardinality and up to + * <i>nedges</i> of those edges having the lowest cardinality are used to + * form the initial set of join paths. For each edge selected to form a join + * path, the starting vertex will be the vertex of that edge having the + * lower cardinality. + */ + String RTO_NEDGES = "RTO-nedges"; + + int DEFAULT_RTO_NEDGES = 2; + + /** * Query hint sets the optimistic threshold for the static join order * optimizer. */ Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2013-12-24 13:23:21 UTC (rev 7693) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -42,12 +42,15 @@ import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.SampleIndex.SampleType; import com.bigdata.bop.joinGraph.rto.JGraph; import com.bigdata.bop.joinGraph.rto.JoinGraph; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.constraints.INeedsMaterialization; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.JoinGroupNode; +import com.bigdata.rdf.sparql.ast.QueryHints; +import com.bigdata.rdf.sparql.ast.QueryOptimizerEnum; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; @@ -228,6 +231,8 @@ * (unless we are going to run the RTO "bottom up") and build a hash * index. When the hash index is ready, we can execute the join group. */ + final SampleType sampleType = joinGroup.getProperty( + QueryHints.RTO_SAMPLE_TYPE, QueryHints.DEFAULT_RTO_SAMPLE_TYPE); left = new JoinGraph(leftOrEmpty(left),// new NV(BOp.Annotations.BOP_ID, ctx.nextId()),// new NV(BOp.Annotations.EVALUATION_CONTEXT, @@ -245,8 +250,7 @@ JoinGraph.Annotations.DEFAULT_LIMIT),// new NV(JoinGraph.Annotations.NEDGES, JoinGraph.Annotations.DEFAULT_NEDGES),// - new NV(JoinGraph.Annotations.SAMPLE_TYPE, - JoinGraph.Annotations.DEFAULT_SAMPLE_TYPE)// + new NV(JoinGraph.Annotations.SAMPLE_TYPE, sampleType.name())// ); // These joins were consumed. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/QueryHintRegistry.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/QueryHintRegistry.java 2013-12-24 13:23:21 UTC (rev 7693) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/QueryHintRegistry.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -100,6 +100,9 @@ add(new RunLastHint()); add(new RunOnceHint()); add(new OptimizerQueryHint()); + add(new RTOSampleTypeQueryHint()); + add(new RTOLimitQueryHint()); + add(new RTONEdgesQueryHint()); add(new OptimisticQueryHint()); add(new AnalyticQueryHint()); Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOLimitQueryHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOLimitQueryHint.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOLimitQueryHint.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -0,0 +1,79 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 27, 2011 + */ + +package com.bigdata.rdf.sparql.ast.hints; + +import com.bigdata.bop.joinGraph.rto.JGraph; +import com.bigdata.rdf.sparql.ast.ASTBase; +import com.bigdata.rdf.sparql.ast.JoinGroupNode; +import com.bigdata.rdf.sparql.ast.QueryHints; +import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; + +/** + * The query hint governing the initial sample size for the RTO optimizer. + * + * @see JGraph + * @see QueryHints#RTO_LIMIT + */ +final class RTOLimitQueryHint extends AbstractIntQueryHint { + + public RTOLimitQueryHint() { + super(QueryHints.RTO_LIMIT, QueryHints.DEFAULT_RTO_LIMIT); + } + + @Override + public Integer validate(final String value) { + + final int i = Integer.valueOf(value); + + if (i <= 0) + throw new IllegalArgumentException("Must be positive: hint=" + + getName() + ", value=" + value); + + return i; + + } + + @Override + public void handle(final AST2BOpContext ctx, final QueryHintScope scope, + final ASTBase op, final Integer value) { + + switch (scope) { + case Group: + case GroupAndSubGroups: + case Query: + case SubQuery: + if (op instanceof JoinGroupNode) { + _setAnnotation(ctx, scope, op, getName(), value); + } + return; + } + throw new QueryHintException(scope, op, getName(), value); + + } + +} \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTONEdgesQueryHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTONEdgesQueryHint.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTONEdgesQueryHint.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -0,0 +1,80 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 27, 2011 + */ + +package com.bigdata.rdf.sparql.ast.hints; + +import com.bigdata.bop.joinGraph.rto.JGraph; +import com.bigdata.rdf.sparql.ast.ASTBase; +import com.bigdata.rdf.sparql.ast.JoinGroupNode; +import com.bigdata.rdf.sparql.ast.QueryHints; +import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; + +/** + * The query hint governing the choice of the number of initial edges for the + * exploration of join paths in the join graph. + * + * @see JGraph + * @see QueryHints#RTO_NEDGES + */ +final class RTONEdgesQueryHint extends AbstractIntQueryHint { + + public RTONEdgesQueryHint() { + super(QueryHints.RTO_NEDGES, QueryHints.DEFAULT_RTO_NEDGES); + } + + @Override + public Integer validate(final String value) { + + int i = Integer.valueOf(value); + + if (i <= 0) + throw new IllegalArgumentException("Must be positive: hint=" + + getName() + ", value=" + value); + + return i; + + } + + @Override + public void handle(final AST2BOpContext ctx, final QueryHintScope scope, + final ASTBase op, final Integer value) { + + switch (scope) { + case Group: + case GroupAndSubGroups: + case Query: + case SubQuery: + if (op instanceof JoinGroupNode) { + _setAnnotation(ctx, scope, op, getName(), value); + } + return; + } + throw new QueryHintException(scope, op, getName(), value); + + } + +} \ No newline at end of file Added: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOSampleTypeQueryHint.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOSampleTypeQueryHint.java (rev 0) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/hints/RTOSampleTypeQueryHint.java 2013-12-30 12:02:34 UTC (rev 7694) @@ -0,0 +1,76 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Nov 27, 2011 + */ + +package com.bigdata.rdf.sparql.ast.hints; + +import com.bigdata.bop.ap.SampleIndex; +import com.bigdata.bop.ap.SampleIndex.SampleType; +import com.bigdata.rdf.sparql.ast.ASTBase; +import com.bigdata.rdf.sparql.ast.JoinGroupNode; +import com.bigdata.rdf.sparql.ast.QueryHints; +import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; + +/** + * The query hint governing the choice of the sampling bais for the RTO + * optimizer. + * + * @see JGraph + * @see SampleType + * @see QueryHints#RTO_SAMPLE_TYPE + */ +final class RTOSampleTypeQueryHint extends AbstractQueryHint<SampleType> { + + public RTOSampleTypeQueryHint() { + super(QueryHints.RTO_SAMPLE_TYPE, QueryHints.DEFAULT_RTO_SAMPLE_TYPE); + } + + @Override + public SampleType validate(final String value) { + + return SampleType.valueOf(value); + + } + + @Override + public void handle(final AST2BOpContext ctx, final QueryHintScope scope, + final ASTBase op, final SampleType value) { + + switch (scope) { + case Group: + case GroupAndSubGroups: + case Query: + case SubQuery: + if (op instanceof JoinGroupNode) { + _setAnnotation(ctx, scope, op, getName(), value); + } + return; + } + throw new QueryHintException(scope, op, getName(), value); + + } + +} \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |