From: <tho...@us...> - 2011-02-21 22:01:59
|
Revision: 4218 http://bigdata.svn.sourceforge.net/bigdata/?rev=4218&view=rev Author: thompsonbry Date: 2011-02-21 22:01:49 +0000 (Mon, 21 Feb 2011) Log Message: ----------- - Working on join graphs and the runtime query optimizer. - Moved JoinGraph, NoSolutionsException, and PartitionedJoinGraph into the com.bigdata.bop.joinGraph package. - Moved IEvaluationPlan, IEvaluationPlanFactory, DefaultEvaluationPlan, etc. into the com.bigdata.bop.joinGraph package. - Moved BOpUtility into the com.bigdata.bop.util package. - Added partial support for dynamically identifying edges based on constraints and for accepting unconstrained edges once there are no other vertices available to extend the join graph. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRuleStatisticsFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/filter/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/OwlSameAsPropertiesExpandingIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/InferenceEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/AbstractJoinGraphTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/internal/constraints/TestInlineConstraints.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestIRIS.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/AbstractRuleTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestOptionals.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestRuleExpansion.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOStarJoin.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/fast/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/fast/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/joinGraph/rto/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/util/TestBOpUtility_sharedVariables.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/NoSolutionsException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/PartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlanFactory2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/FixedEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IRangeCountFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NOPEvaluationPlanFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NoReorderEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility_canJoinUsingConstraints.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestPartitionedJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -40,8 +40,8 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp.Annotations; -import com.bigdata.bop.controller.PartitionedJoinGroup; import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.btree.AbstractNode; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -1381,6 +1381,9 @@ /* * Find the constraints that will run with each vertex of the new * join path. + * + * TODO This is a forward reference to a different package, so maybe + * move the canJoinWithConstraints() method to that package? */ final IConstraint[][] constraintRunArray = PartitionedJoinGroup .getJoinGraphConstraints(newPath, constraints); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -34,6 +34,7 @@ import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.BOpTupleFilter; import com.bigdata.bop.ap.filter.DistinctFilter; +import com.bigdata.bop.joinGraph.IEvaluationPlan; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; @@ -47,7 +48,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.eval.IEvaluationPlan; import com.bigdata.relation.rule.eval.pipeline.JoinMasterTask; import com.bigdata.service.ndx.IClientIndex; import com.bigdata.striterator.IKeyOrder; Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 18:33:22 UTC (rev 4217) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-02-21 22:01:49 UTC (rev 4218) @@ -1,3260 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop.controller; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.Formatter; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpContextBase; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.BOpIdFactory; -import com.bigdata.bop.BOpUtility; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IElement; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.ap.SampleIndex; -import com.bigdata.bop.ap.SampleIndex.SampleType; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.engine.IRunningQuery; -import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; -import com.bigdata.bop.rdf.join.DataSetJoin; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.BufferClosedException; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.IChunkedIterator; -import com.bigdata.util.concurrent.Haltable; - -/** - * A join graph with annotations for estimated cardinality and other details in - * support of runtime query optimization. A join graph is a collection of - * relations and joins which connect those relations. This boils down to a - * collection of {@link IPredicate}s (selects on relations), shared variables - * (which identify joins), and {@link IConstraint}s (which limit solutions). - * Operators other than standard joins (including optional joins, sort, order - * by, etc.) must be handled downstream from the join graph in a "tail plan". - * - * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join - * Graph Isolation. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo Examine the overhead of the runtime optimizer. Look at ways to prune - * its costs. For example, by pruning the search, by recognizing when the - * query is simple enough to execute directly, by recognizing when we have - * already materialized the answer to the query, etc. - * - * @todo Cumulative estimated cardinality is an estimate of the work to be done. - * However, the actual cost of a join depends on whether we will use - * nested index subquery or a hash join and the cost of that operation on - * the database. There could be counter examples where the cost of the - * hash join with a range scan using the unbound variable is LT the nested - * index subquery. For those cases, we will do the same amount of IO on - * the hash join but there will still be a lower cardinality to the join - * path since we are feeding in fewer solutions to be joined. - * - * @todo Look at the integration with the SAIL. We decorate the joins with some - * annotations. Those will have to be correctly propagated to the "edges" - * in order for edge sampling and incremental evaluation (or final - * evaluation) to work. The {@link DataSetJoin} essentially inlines one of - * its access paths. That should really be changed into an inline access - * path and a normal join operator so we can defer some of the details - * concerning the join operator annotations until we decide on the join - * path to be executed. An inline AP really implies an inline relation, - * which in turn implies that the query is a searchable context for - * query-local resources. - * <p> - * For s/o, when the AP is remote, the join evaluation context must be ANY - * and otherwise (for s/o) it must be SHARDED. - * <p> - * Since the join graph is fed the vertices (APs), it does not have access - * to the annotated joins so we need to generated appropriately annotated - * joins when sampling an edge and when evaluation a subquery. - * <p> - * One solution would be to always use the unpartitioned views of the - * indices for the runtime query optimizer, which is how we are estimating - * the range counts of the access paths right now. [Note that the static - * query optimizer ignores named and default graphs, while the runtime - * query optimizer SHOULD pay attention to these things and exploit their - * conditional selectivity for the query plan.] - * - * @todo Handle optional join graphs by first applying the runtime optimizer to - * the main join graph and obtaining a sample for the selected join path. - * That sample will then be feed into the the optional join graph in order - * to optimize the join order within the optional join graph (a join order - * which is selective in the optional join graph is better since it will - * result in faster rejections of intermediate results and hence do less - * work). - * <p> - * This is very much related to accepting a collection of non-empty - * binding sets when running the join graph. However, optional join graph - * should be presented in combination with the original join graph and the - * starting paths must be constrained to have the selected join path for - * the original join graph as a prefix. With this setup, the original join - * graph has been locked in to a specific join path and the sampling of - * edges and vertices for the optional join graph can proceed normally. - * <p> - * True optionals will always be appended as part of the "tail plan" for - * any join graph and can not be optimized as each optional join must run - * regardless (as long as the intermediate solution survives the - * non-optional joins). - * - * @todo There are two cases where a join graph must be optimized against a - * specific set of inputs. In one case, it is a sample (this is how - * optimization of an optional join group proceeds per above). In the - * other case, the set of inputs is fixed and is provided instead of a - * single empty binding set as the starting condition. This second case is - * actually a bit more complicated since we can not use a random sample of - * vertices unless the do not share any variables with the initial binding - * sets. When there is a shared variable, we need to do a cutoff join of - * the edge with the initial binding sets. When there is not a shared - * variable, we can sample the vertex and then do a cutoff join. - * - * @todo When we run into a cardinality estimation underflow (the expected - * cardinality goes to zero) we could double the sample size for just - * those join paths which hit a zero estimated cardinality and re-run them - * within the round. This would imply that we keep per join path limits. - * The vertex and edge samples are already aware of the limit at which - * they were last sampled so this should not cause any problems there. - * <p> - * A related option would be to deepen the samples only when we are in - * danger of cardinality estimation underflow. E.g., a per-path limit. - * Resampling vertices may only make sense when we increase the limit - * since otherwise we may find a different correlation with the new sample - * but the comparison of paths using one sample base with paths using a - * different sample base in a different round does not carry forward the - * cardinality estimates from the prior round (unless we do something like - * a weighted moving average). - * - * @todo When comparing choices among join paths having fully bound tails where - * the estimated cardinality has also gone to zero, we should prefer to - * evaluate vertices in the tail with better index locality first. For - * example, if one vertex had one variable in the original plan while - * another had two variables, then solutions which reach the 2-var vertex - * could be spread out over a much wider range of the selected index than - * those which reach the 1-var vertex. [In order to support this, we would - * need a means to indicate that a fully bound access path should use an - * index specified by the query optimizer rather than the primary index - * for the relation. In addition, this suggests that we should keep bloom - * filters for more than just the SPO(C) index in scale-out.] - * - * @todo Examine behavior when we do not have perfect covering indices. This - * will mean that some vertices can not be sampled using an index and that - * estimation of their cardinality will have to await the estimation of - * the cardinality of the edge(s) leading to that vertex. Still, the - * approach should be able to handle queries without perfect / covering - * automatically. Then experiment with carrying fewer statement indices - * for quads. - * - * @todo Unit test when there are no solutions to the query. In this case there - * will be no paths identified by the optimizer and the final path length - * becomes zero. - */ -public class JoinGraph extends PipelineOp { - - private static final transient Logger log = Logger - .getLogger(JoinGraph.class); - - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends PipelineOp.Annotations { - - /** - * The vertices of the join graph, expressed an an {@link IPredicate}[] - * (required). - */ - String VERTICES = JoinGraph.class.getName() + ".vertices"; - - /** - * The constraints on the join graph, expressed an an - * {@link IConstraint}[] (optional, defaults to no constraints). - */ - String CONSTRAINTS = JoinGraph.class.getName() + ".constraints"; - - /** - * The initial limit for cutoff sampling (default - * {@value #DEFAULT_LIMIT}). - */ - String LIMIT = JoinGraph.class.getName() + ".limit"; - - int DEFAULT_LIMIT = 100; - - /** - * The <i>nedges</i> edges of the join graph having the lowest - * cardinality will be used to generate the initial join paths (default - * {@value #DEFAULT_NEDGES}). This must be a positive integer. - */ - String NEDGES = JoinGraph.class.getName() + ".nedges"; - - int DEFAULT_NEDGES = 2; - } - - /** - * @see Annotations#VERTICES - */ - public IPredicate<?>[] getVertices() { - - return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); - - } - - /** - * @see Annotations#CONSTRAINTS - */ - public IConstraint[] getConstraints() { - - return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */); - - } - - /** - * @see Annotations#LIMIT - */ - public int getLimit() { - - return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); - - } - - /** - * @see Annotations#NEDGES - */ - public int getNEdges() { - - return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES); - - } - - public JoinGraph(final BOp[] args, final NV... anns) { - - this(args, NV.asMap(anns)); - - } - - public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - - super(args, anns); - - // required property. - final IPredicate<?>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES); - - if (vertices == null) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (vertices.length == 0) - throw new IllegalArgumentException(Annotations.VERTICES); - - if (getLimit() <= 0) - throw new IllegalArgumentException(Annotations.LIMIT); - - if (getNEdges() <= 0) - throw new IllegalArgumentException(Annotations.NEDGES); - - if (!isController()) - throw new IllegalArgumentException(); - - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT - + "=" + getEvaluationContext()); - } - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new JoinGraphTask(context)); - - } - - /** - * A sample of a {@link Vertex} (an access path). - */ - public static class VertexSample { - - /** - * Fast range count. This will be the same for each sample taken - * (assuming a read historical view or even a time scale of query which - * is significantly faster than update). - */ - public final long rangeCount; - - /** - * The limit used to produce the {@link #sample}. - */ - public final int limit; - - /** - * When <code>true</code>, the result is not a sample but the - * materialized access path. - * - * TODO When <code>true</code>, we could run the join against the sample - * rather than the disk. This would require wrapping the sample as an - * access path. Since all exact samples will be pretty small, this is - * not likely to have any great performance benefit. - */ - public final boolean exact; - - /** - * Sample. - */ - final Object[] sample; - - /** - * - * @param rangeCount - * @param limit - * @param exact - * @param sample - */ - public VertexSample(final long rangeCount, final int limit, - final boolean exact, final Object[] sample) { - - if (rangeCount < 0L) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.rangeCount = rangeCount; - - this.limit = limit; - - this.exact = exact; - - this.sample = sample; - - } - - public String toString() { - return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit - + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; - } - - } - - /** - * A vertex of the join graph is an annotated relation (this corresponds to - * an {@link IPredicate} with additional annotations to support the adaptive - * query optimization algorithm). - * <p> - * The unique identifier for a {@link Vertex} (within a given join graph) is - * the {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. - * {@link #hashCode()} is defined in terms of this unique identifier so we - * can readily detect when a {@link Set} already contains a given - * {@link Vertex}. - */ - public static class Vertex implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public final IPredicate<?> pred; - - /** - * The most recently taken sample of the {@link Vertex}. - */ - transient VertexSample sample = null; - - Vertex(final IPredicate<?> pred) { - - if (pred == null) - throw new IllegalArgumentException(); - - this.pred = pred; - - } - - public String toString() { - - return "Vertex{pred=" + pred + ",sample=" + sample + "}"; - - } - - /** - * Equals is based on a reference test. - */ - public boolean equals(Object o) { - return this == o; - } - - /** - * The hash code is just the {@link BOp.Annotations#BOP_ID} of the - * associated {@link IPredicate}. - */ - public int hashCode() { - return pred.getId(); - } - - /** - * Take a sample of the vertex, updating {@link #sample} as a - * side-effect. If the sample is already exact, then this is a NOP. If - * the vertex was already sampled to that limit, then this is a NOP (you - * have to raise the limit to re-sample the vertex). - * - * @param limit - * The sample cutoff. - */ - public void sample(final QueryEngine queryEngine, final int limit) { - - if (queryEngine == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - final VertexSample oldSample = this.sample; - - if (oldSample != null && oldSample.exact) { - - /* - * The old sample is already the full materialization of the - * vertex. - */ - - return; - - } - - if (oldSample != null && oldSample.limit >= limit) { - - /* - * The vertex was already sampled to this limit. - */ - - return; - - } - - final BOpContextBase context = new BOpContextBase(queryEngine); - - final IRelation r = context.getRelation(pred); - - final IAccessPath ap = context.getAccessPath(r, pred); - - final long rangeCount = oldSample == null ? ap - .rangeCount(false/* exact */) : oldSample.rangeCount; - - if (rangeCount <= limit) { - - /* - * Materialize the access path. - * - * TODO This could be more efficient if we raised it onto the AP - * or if we overrode CHUNK_CAPACITY and the fully buffered - * iterator threshold such that everything was materialized as a - * single chunk. - */ - - final List<Object> tmp = new ArrayList<Object>((int) rangeCount); - - final IChunkedIterator<Object> itr = ap.iterator(); - - try { - - while (itr.hasNext()) { - - tmp.add(itr.next()); - - } - - } finally { - - itr.close(); - } - - sample = new VertexSample(rangeCount, limit, true/* exact */, - tmp.toArray(new Object[0])); - - } else { - - /* - * Materialize a random sample from the access path. - */ - -// final SampleType sampleType = SampleType.EVEN; - final SampleType sampleType = SampleType.RANDOM; - - final SampleIndex<?> sampleOp = new SampleIndex( - new BOp[] {}, // - NV.asMap(// - new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit),// - new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// - )); - - sample = new VertexSample(rangeCount, limit, false/* exact */, - sampleOp.eval(context)); - - } - - if (log.isTraceEnabled()) - log.trace("Sampled: " + sample); - - return; - - } - - } - - /** - * Type safe enumeration describes the edge condition (if any) for a - * cardinality estimate. - */ - public static enum EstimateEnum { - /** - * An estimate, but not any of the edge conditions. - */ - Normal(" "), - /** - * The cardinality estimate is exact. - */ - Exact("E"), - /** - * The cardinality estimation is a lower bound (the actual cardinality - * may be higher than the estimated value). - */ - LowerBound("L"), - /** - * Flag is set when the cardinality estimate underflowed (false zero - * (0)). - */ - Underflow("U"); - - private EstimateEnum(final String code) { - - this.code = code; - - } - - private final String code; - - public String getCode() { - - return code; - - } - - } // EstimateEnum - - /** - * A sample of an {@link Edge} (a join). - */ - public static class EdgeSample { - - /** - * The fast range count (aka cardinality) for the source vertex of the - * edge (whichever vertex has the lower cardinality). - */ - public final long rangeCount; - - /** - * <code>true</code> iff the source sample is exact (because the source - * is either a fully materialized vertex or an edge whose solutions have - * been fully materialized). - */ - public final boolean sourceSampleExact; - - /** - * The limit used to sample the edge (this is the limit on the #of - * solutions generated by the cutoff join used when this sample was - * taken). - */ - public final int limit; - - /** - * The #of binding sets out of the source sample vertex sample which - * were consumed. - */ - public final int inputCount; - - /** - * The #of binding sets generated before the join was cutoff. - * <p> - * Note: If the outputCount is zero then this is a good indicator that - * there is an error in the query such that the join will not select - * anything. This is not 100%, merely indicative. - */ - public final long outputCount; - - /** - * The ratio of the #of input samples consumed to the #of output samples - * generated (the join hit ratio or scale factor). - */ - public final double f; - - /** - * The estimated cardinality of the join. - */ - public final long estimatedCardinality; - - /** - * Indicates whether the estimate is exact, an upper bound, or a lower - * bound. - * - * TODO This field should be used to avoid needless re-computation of a - * join whose exact solution is already known. - */ - public final EstimateEnum estimateEnum; - - /** - * The sample of the solutions for the join path. - */ - private final IBindingSet[] sample; - - /** - * Create an object which encapsulates a sample of an edge. - * - * @param limit - * The limit used to sample the edge (this is the limit on - * the #of solutions generated by the cutoff join used when - * this sample was taken). - * @param sourceVertexSample - * The sample for source vertex of the edge (whichever vertex - * has the lower cardinality). - * @param inputCount - * The #of binding sets out of the source sample vertex - * sample which were consumed. - * @param outputCount - * The #of binding sets generated before the join was cutoff. - */ - EdgeSample( - // final VertexSample sourceVertexSample, - final long sourceSampleRangeCount,// - final boolean sourceSampleExact, // - final int sourceSampleLimit,// - final int limit,// - final int inputCount, // - final long outputCount,// - final double f, - final long estimatedCardinality, - final IBindingSet[] sample) { - - if (sample == null) - throw new IllegalArgumentException(); - - // this.rangeCount = sourceVertexSample.rangeCount; - this.rangeCount = sourceSampleRangeCount; - - this.sourceSampleExact = sourceSampleExact; - - this.limit = limit; - - this.inputCount = inputCount; - - this.outputCount = outputCount; - - this.f = f; - - this.estimatedCardinality = estimatedCardinality; - - if (sourceSampleExact && outputCount < limit) { - /* - * Note: If the entire source vertex is being fed into the - * cutoff join and the cutoff join outputCount is LT the limit, - * then the sample is the actual result of the join. That is, - * feeding all source solutions into the join gives fewer than - * the desired number of output solutions. - */ - estimateEnum = EstimateEnum.Exact; - } else if (inputCount == 1 && outputCount == limit) { - /* - * If the inputCount is ONE (1) and the outputCount is the - * limit, then the estimated cardinality is a lower bound as - * more than outputCount solutions might be produced by the join - * when presented with a single input solution. - */ - estimateEnum = EstimateEnum.LowerBound; - } else if (!sourceSampleExact - && inputCount == Math.min(sourceSampleLimit, rangeCount) - && outputCount == 0) { - /* - * When the source sample was not exact, the inputCount is EQ to - * the lesser of the source range count and the source sample - * limit, and the outputCount is ZERO (0), then feeding in all - * source solutions in is not sufficient to generate any output - * solutions. In this case, the estimated join hit ratio appears - * to be zero. However, the estimation of the join hit ratio - * actually underflowed and the real join hit ratio might be a - * small non-negative value. A real zero can only be identified - * by executing the full join. - * - * Note: An apparent join hit ratio of zero does NOT imply that - * the join will be empty (unless the source vertex sample is - * actually the fully materialized access path - this case is - * covered above). - */ - estimateEnum = EstimateEnum.Underflow; - } else { - estimateEnum = EstimateEnum.Normal; - } - - this.sample = sample; - } - - public String toString() { - return getClass().getName() // - + "{ rangeCount=" + rangeCount// - + ", sourceSampleExact=" + sourceSampleExact// - + ", limit=" + limit // - + ", inputCount=" + inputCount// - + ", outputCount=" + outputCount // - + ", f=" + f// - + ", estimatedCardinality=" + estimatedCardinality// - + ", estimateEnum=" + estimateEnum// -// + ", estimateIsLowerBound=" + estimateIsLowerBound// -// + ", estimateIsUpperBound=" + estimateIsUpperBound// -// + ", sampleIsExactSolution=" + estimateIsExact // - + "}"; - } - - }; - - /** - * An edge of the join graph is an annotated join operator. The edges of the - * join graph are undirected. Edges exist when the vertices share at least - * one variable. - * <p> - * {@link #hashCode()} is defined in terms of the unordered hash codes of - * the individual vertices. - */ - public static class Edge implements Serializable { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * The vertices connected by that edge. - */ - public final Vertex v1, v2; - - /** - * The set of shared variables. - */ - public final Set<IVariable<?>> shared; - - /** - * The last sample for this edge and <code>null</code> if the edge has - * not been sampled. - * <p> - * Note: This sample is only the one-step cutoff evaluation of the edge - * given a sample of its vertex having the lesser cardinality. It is NOT - * the cutoff sample of a join path having this edge except for the - * degenerate case where the edge is the first edge in the join path. - */ - transient EdgeSample sample = null; - - public Edge(final Vertex v1, final Vertex v2, - final Set<IVariable<?>> shared) { - if (v1 == null) - throw new IllegalArgumentException(); - if (v2 == null) - throw new IllegalArgumentException(); - if (shared == null) - throw new IllegalArgumentException(); - // Note: We need to allow edges which do not share variables -// if (shared.isEmpty()) -// throw new IllegalArgumentException(); - this.v1 = v1; - this.v2 = v2; - this.shared = shared; - } - - /** - * The edge label is formed from the {@link BOp.Annotations#BOP_ID} of - * its ordered vertices (v1,v2). - */ - public String getLabel() { - - return "(" + v1.pred.getId() + "," + v2.pred.getId() + ")"; - - } - - /** - * Note: The vertices of the edge are labeled using the - * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} - * for each vertex. - */ - public String toString() { - - return "Edge{ "+getLabel()+", estCard=" - + (sample == null ? "N/A" : sample.estimatedCardinality) - + ", shared=" + shared.toString() + ", sample=" + sample - + "}"; - - } - - /** - * Equality is determined by reference testing. - */ - public boolean equals(final Object o) { - - return this == o; - - } - - /** - * The hash code of an edge is the hash code of the vertex with the - * smaller hash code X 31 plus the hash code of the vertex with the - * larger hash code. This definition compensates for the arbitrary order - * in which the vertices may be expressed and also recognizes that the - * vertex hash codes are based on the bop ids, which are often small - * integers. - */ - public int hashCode() { - - if (hash == 0) { - - final int h1 = v1.hashCode(); - final int h2 = v2.hashCode(); - - final int h; - if (h1 < h2) { - - h = h1 * 31 + h2; - - } else { - - h = h2 * 31 + h1; - - } - - hash = h; - - } - return hash; - - } - - private int hash; - - /** - * Return the vertex with the smaller estimated cardinality. - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMinimumCardinalityVertex() { - - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - - return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; - - } - - /** - * Return the vertex with the larger estimated cardinality (the vertex - * not returned by {@link #getMinimumCardinalityVertex()}). - * - * @throws IllegalStateException - * if either vertex has not been sampled. - */ - public Vertex getMaximumCardinalityVertex() { - - // The vertex with the minimum cardinality. - final Vertex o = getMinimumCardinalityVertex(); - - // Return the other vertex. - return (v1 == o) ? v2 : v1; - - } - - /** - * Estimate the cardinality of the edge, updating {@link #sample} as a - * side-effect. This is a NOP if the edge has already been sampled at - * that <i>limit</i>. This is a NOP if the edge sample is exact. - * - * @param context - * - * @return The new {@link EdgeSample} (this is also updated on - * {@link #sample} as a side-effect). - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit) throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - -// /* -// * Note: There is never a need to "re-sample" the edge. Unlike ROX, -// * we always can sample a vertex. This means that we can sample the -// * edges exactly once, during the initialization of the join graph. -// */ -// if (sample != null) -// throw new RuntimeException(); - - if (sample != null) { - - if (sample.limit >= limit) { - - // Already sampled at that limit. - return sample; - - } - - if (sample.estimateEnum == EstimateEnum.Exact) { - - // Sample is exact (fully materialized result). - return sample; - - } - - } - - /* - * Figure out which vertex has the smaller cardinality. The sample - * of that vertex is used since it is more representative than the - * sample of the other vertex. - */ - // vertex v, vprime - final Vertex v, vp; - if (v1.sample == null) // vertex not sampled. - throw new IllegalStateException(); - if (v2.sample == null) // vertex not sampled. - throw new IllegalStateException(); - /* - * FIXME CONSTRAINT ORDERING : If a variable only appears in a - * CONSTRAINT for one of the two vertices then that vertex must be - * evaluated second. (If the vertices both have this problem then - * the edge can not be evaluated until some other vertex causes the - * variables of either one [v1] or [v2] to become bound.) - */ - if (v1.sample.rangeCount < v2.sample.rangeCount) { - v = v1; - vp = v2; - } else { - v = v2; - vp = v1; - } - - /* - * Convert the source sample into an IBindingSet[]. - * - * TODO We might as well do this when we sample the vertex. - */ - final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; - { - for (int i = 0; i < sourceSample.length; i++) { - final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample.sample[i], - v.pred, bset); - sourceSample[i] = bset; - } - } - - // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample.rangeCount, v.sample.exact, v.sample.limit, - sourceSample); - - return sample; - - } - - /** - * Estimate the cardinality of the edge given a sample of either a - * vertex or a join path leading up to that edge. - * <p> - * Note: The caller is responsible for protecting against needless - * re-sampling. - * - * @param queryEngine - * @param limit - * @param vSource - * The source vertex. - * @param vTarget - * The target vertex - * @param sourceSample - * The sample for the source vertex. When this is a one-step - * estimation of the cardinality of the edge, then this - * sample is taken from the {@link VertexSample}. When the - * edge (vSource,vTarget) extends some {@link Path}, then - * this is taken from the {@link EdgeSample} for that - * {@link Path}. - * - * @return The result of sampling that edge. - * - * @throws Exception - */ - public EdgeSample estimateCardinality(final QueryEngine queryEngine, - final int limit, final Vertex vSource, final Vertex vTarget, - final long sourceSampleRangeCount, - final boolean sourceSampleExact, - final int sourceSampleLimit, - final IBindingSet[] sourceSample) - throws Exception { - - if (limit <= 0) - throw new IllegalArgumentException(); - - /* - * Note: This sets up a cutoff pipeline join operator which makes an - * accurate estimate of the #of input solutions consumed and the #of - * output solutions generated. From that, we can directly compute - * the join hit ratio. This approach is preferred to injecting a - * "RowId" column as the estimates are taken based on internal - * counters in the join operator and the join operator knows how to - * cutoff evaluation as soon as the limit is satisfied, thus - * avoiding unnecessary effort. - * - * TODO Any constraints on the edge (other than those implied by - * shared variables) need to be annotated on the join. Constraints - * (other than range constraints which are directly coded by the - * predicate) will not reduce the effort to compute the join, but - * they can reduce the cardinality of the join and that is what we - * are trying to estimate here. - * - * TODO How can join constraints be moved around? Just attach them - * where ever a variable becomes bound? And when do we filter out - * variables which are not required downstream? Once we decide on a - * join path and execute it fully (rather than sampling that join - * path). - */ - final int joinId = 1; - final Map<String,Object> anns = NV.asMap(// - new NV(BOp.Annotations.BOP_ID, joinId),// - // @todo Why not use a factory which avoids bopIds already in use? - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred.setBOpId(3)), - // disallow parallel evaluation of tasks. - new NV(PipelineOp.Annotations.MAX_PARALLEL,1), - // disallow parallel evaluation of chunks. - new NV(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS,0), - // disable access path coalescing - new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), - // cutoff join. - new NV(PipelineJoin.Annotations.LIMIT,(long)limit), - /* - * Note: In order to have an accurate estimate of the join - * hit ratio we need to make sure that the join operator - * runs using a single PipelineJoinStats instance which will - * be visible to us when the query is cutoff. In turn, this - * implies that the join must be evaluated on the query - * controller. - * - * @todo This implies that sampling of scale-out joins must - * be done using remote access paths. - */ - new NV(PipelineJoin.Annotations.SHARED_STATE,true), - new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) - ); - - final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, anns); - - final PipelineOp queryOp = joinOp; - - // run the cutoff sampling of the edge. - final UUID queryId = UUID.randomUUID(); - final IRunningQuery runningQuery = queryEngine.eval(queryId, - queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, joinOp.getId()/* startId */, - -1 /* partitionId */, - new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sourceSample }))); - - final List<IBindingSet> result = new LinkedList<IBindingSet>(); - try { - try { - IBindingSet bset = null; - // Figure out the #of source samples consumed. - final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>( - runningQuery.iterator()); - while (itr.hasNext()) { - bset = itr.next(); - result.add(bset); - } - } finally { - // verify no problems. - runningQuery.get(); - } - } finally { - runningQuery.cancel(true/* mayInterruptIfRunning */); - } - - // The join hit ratio can be computed directly from these stats. - final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery - .getStats().get(joinId); - - if (log.isTraceEnabled()) - log.trace(joinStats.toString()); - - /* - * TODO Improve comments here. See if it is possible to isolate a - * common base class which would simplify the setup of the cutoff - * join and the computation of the sample stats. - */ - - // #of solutions in. - final int nin = (int) joinStats.inputSolutions.get(); - - // #of solutions out. - long nout = joinStats.outputSolutions.get(); - - // cumulative range count of the sampled access paths. - final long sumRangeCount = joinStats.accessPathRangeCount.get(); - - if (nin == 1 && nout == limit) { - /* - * We are getting [limit] solutions out for one solution in. In - * this case, (nout/nin) is a lower bound for the estimated - * cardinality of the edge. In fact, this condition suggests - * that the upper bound is a must better estimate of the - * cardinality of this join. Therefore, we replace [nout] with - * the sum of the range counts for the as-bound predicates - * considered by the cutoff join. - * - * For example, consider a join feeding a rangeCount of 16 into - * a rangeCount of 175000. With a limit of 100, we estimated the - * cardinality at 1600L (lower bound). In fact, the cardinality - * is 16*175000. This falsely low estimate can cause solutions - * which are really better to be dropped. - * - * @todo we should mark [nout] when we do this show that it - * shows up in the trace! Also, the rangeCount is sometimes - * falsely high. However, that should be corrected by random - * resampling of the vertices and paths. - */ - nout = sumRangeCount; - - } - - final double f = nout == 0 ? 0 : (nout / (double) nin); - - final long estimatedCardinality = (long) (sourceSampleRangeCount * f); - - final EdgeSample edgeSample = new EdgeSample( - sourceSampleRangeCount, // - sourceSampleExact, // @todo redundant with sourceSampleLimit - sourceSampleLimit, // - limit, // - nin,// - nout, // - f, // - estimatedCardinality, // - result.toArray(new IBindingSet[result.size()])); - - if (log.isDebugEnabled()) - log.debug(getLabel() + " : newSample=" + edgeSample); - - return edgeSample; - - } - - } - - /** - * A sequence of {@link Edge}s (aka join steps). - */ - public static class Path { - - /** - * An immutable ordered list of the edges in the (aka the sequence of - * joins represented by this path). - */ - public final List<Edge> edges; - - /** - * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. - * <p> - * Note: This sample is generated one edge at a time rather than by - * attempting the cutoff evaluation of the entire join path (the latter - * approach does allow us to limit the amount of work to be done to - * satisfy the cutoff). - */ - public EdgeSample sample; - - /** - * The cumulative estimated cardinality of the path. This is zero for an - * empty path. For a path consisting of a single edge, this is the - * estimated cardinality of that edge. When creating a new path by - * adding an edge to an existing path, the cumulative cardinality of the - * new path is the cumulative cardinality of the existing path plus the - * estimated cardinality of the cutoff join of the new edge given the - * input sample of the existing path. - * - * @todo track this per vertex as well as the total for more interesting - * traces in showPath(Path). - */ - final public long cumulativeEstimatedCardinality; - - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Path{"); - boolean first = true; - for (Edge e : edges) { - if (!first) - sb.append(","); - sb.append(e.getLabel()); - first = false; - } - sb.append(",cumEstCard=" + cumulativeEstimatedCardinality - + ",sample=" + sample + "}"); - return sb.toString(); - } - - /** - * Create an empty path. - */ - public Path() { - this.edges = Collections.emptyList(); - this.cumulativeEstimatedCardinality = 0; - this.sample = null; - } - - /** - * Create a path from a single edge. - * - * @param e - * The edge. - */ - public Path(final Edge e) { - - if (e == null) - throw new IllegalArgumentException(); - - if (e.sample == null) - throw new IllegalArgumentException("Not sampled: " + e); - - this.edges = Collections.singletonList(e); - - this.sample = e.sample; - - this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; - - } - - /** - * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} - * - * @param edges - * The edges in the new path. - * @param cumulativeEstimatedCardinality - * The cumulative estimated cardinality of the new path. - * @param sample - * The sample from the last - */ - private Path(final List<Edge> edges, - final long cumulativeEstimatedCardinality, - final EdgeSample sample) { - - if (edges == null) - throw new IllegalArgumentException(); - - if (cumulativeEstimatedCardinality < 0) - throw new IllegalArgumentException(); - - if (sample == null) - throw new IllegalArgumentException(); - - this.edges = Collections.unmodifiableList(edges); - - this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; - - this.sample = sample; - - } - - /** - * Return <code>true</code> iff the {@link Path} contains at least one - * {@link Edge} for that {@link Vertex}. - * - * @param v - * The vertex - * - * @return true if the vertex is already part of the path. - */ - public boolean contains(final Vertex v) { - - if (v == null) - throw new IllegalArgumentException(); - - for (Edge e : edges) { - - if (e.v1 == v || e.v2 == v) - return true; - - } - - return false; - } - - /** - * Return <code>true</code> if this path is an unordered variant of the - * given path (same vertices in any order). - * - * @param p - * Another path. - * - * @return <code>true</code> if this path is an unordered variant of the - * given path. - */ - public boolean isUnorderedVariant(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (edges.size() != p.edges.size()) { - /* - * Fast rejection. This assumes that each edge after the first - * adds one distinct vertex to the path. That assumption is - * enforced by #addEdge(). - */ - return false; - } - - final Vertex[] v1 = getVertices(); - final Vertex[] v2 = p.getVertices(); - - if (v1.length != v2.length) { - - // Reject (this case is also covered by the test above). - return false; - - } - - /* - * Scan the vertices of the caller's path. If any of those vertices - * are NOT found in this path the paths are not unordered variations - * of one another. - */ - for (int i = 0; i < v2.length; i++) { - - final Vertex tmp = v2[i]; - - boolean found = false; - for (int j = 0; j < v1.length; j++) { - - if (v1[j] == tmp) { - found = true; - break; - } - - } - - if (!found) { - return false; - } - - } - - return true; - - } - - /** - * Return the vertices in this path (in path order). For the first edge, - * the minimum cardinality vertex is always reported first (this is - * critical for producing the correct join plan). For the remaining - * edges in the path, the unvisited is reported. - * - * @return The vertices (in path order). - * - * TODO This could be rewritten without the toArray() using a - * method which visits the vertices of a path in any order. - * - * @todo unit test for the first vertex to be reported. - */ - public Vertex[] getVertices() { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - if (tmp.isEmpty()) { - /* - * The first edge is handled specially in order to report - * the minimum cardinality vertex first. - * - * FIXME CONSTRAINT ORDERING : A vertex can not run until - * all variables appearing in its CONSTRAINTS would be - * bound. This can cause us to use and report an ordering - * which does not place the minimum cardinality vertex 1st. - */ - tmp.add(e.getMinimumCardinalityVertex()); - tmp.add(e.getMaximumCardinalityVertex()); - - } else { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - return a; - - } - - /** - * Return the {@link IPredicate}s associated with the vertices of the - * join path in path order. - * - * @see #getVertices() - */ - public IPredicate<?>[] getPredicates() { - - // The vertices in the selected evaluation order. - final Vertex[] vertices = getVertices(); - - // The predicates in the same order as the vertices. - final IPredicate<?>[] preds = new IPredicate[vertices.length]; - - for (int i = 0; i < vertices.length; i++) { - - preds[i] = vertices[i].pred; - - } - - return preds; - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - public int[] getVertexIds() { - - return getVertexIds(edges); - - } - - /** - * Return the {@link BOp} identifiers of the predicates associated with - * each vertex in path order. - */ - static public int[] getVertexIds(final List<Edge> edges) { - - final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); - - for (Edge e : edges) { - - tmp.add(e.v1); - - tmp.add(e.v2); - - } - - final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); - - final int[] b = new int[a.length]; - - for (int i = 0; i < a.length; i++) { - - b[i] = a[i].pred.getId(); - - } - - return b; - - } - - /** - * Return <code>true</code> if this path begins with the given path. - * - * @param p - * The given path. - * - * @return <code>true</code> if this path begins with the given path. - */ - public boolean beginsWith(final Path p) { - - if (p == null) - throw new IllegalArgumentException(); - - if (p.edges.size() > edges.size()) { - // Proven false since the caller's path is lon... [truncated message content] |