[Bigdata-commit] SF.net SVN: bigdata:[3945] branches/QUADS_QUERY_BRANCH/bigdata

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 3945
          http://bigdata.svn.sourceforge.net/bigdata/?rev=3945&view=rev
Author:   thompsonbry
Date:     2010-11-12 20:47:51 +0000 (Fri, 12 Nov 2010)

Log Message:
-----------
More clean up on JoinGraph.

Added LUBM Q9 to the test case.

Modified Paths:
--------------
    branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java
    branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java
    branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java

Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java
===================================================================

--- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java	2010-11-12 17:52:10 UTC (rev 3944)
+++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java	2010-11-12 20:47:51 UTC (rev 3945)
@@ -20,7 +20,7 @@
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
+ */
 /*
  * Created on Aug 16, 2010
  */
@@ -77,8 +77,9 @@
  * support of runtime query optimization. A join graph is a collection of
  * relations and joins which connect those relations. This boils down to a
  * collection of {@link IPredicate}s (selects on relations) and shared variables
- * (which identify joins).
- * <p>
+ * (which identify joins). Operators other than standard joins (including
+ * optional joins, sort, order by, etc.) must be handled downstream from the
+ * join graph in a "tail plan".
  * 
  * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join
  *      Graph Isolation.
@@ -86,130 +87,98 @@
  * @author <a href="mailto:tho...@us...">Bryan Thompson</a>
  * @version $Id$
  * 
- * TODO Some edges can be eliminated by transitivity. For example, given
- * 
- *       <pre>
- * query:
- * 
- * :- (A loves B), (B loves A), (B marriedTo C).
- * 
- * vertices: 
- * 
- * v1=(A loves B);
- * v2=(B loves A);
- * v3=(B marriedTo C);
- * 
- * edges:
- * 
- * e1=(v1,v2) // JOIN( SCAN(A loves B), SCAN(B loves A)) 
- * e2=(v1,v3) // JOIN( SCAN(A loves B), SCAN(B marriedTo C))
- * e3=(v2,v3) // JOIN( SCAN(B loves A), SCAN(B marriedTo C))
- *   
- * It is necessary to execute e1 and either e2 or e3, but not both e2 and e3.
- * </pre>
- * 
- * TODO In order to combine pipelining with runtime query optimization we need
- *       to sample based on the first chunk(s) delivered by the pipeline. If
- *       necessary, we can buffer multiple chunks for semi-selective queries.
- *       However, for unselective queries we would accept as many buffers worth
- *       of bindings as were allowed for a given join and then sample the
- *       binding sets from left hand side (the buffers) and run those samples
- *       against the right hand side (the local shard).
+ * @todo Examine the overhead of the runtime optimizer. Look at ways to prune
+ *       its costs. For example, by pruning the search, by recognizing when the
+ *       query is simple enough to execute directly, by recognizing when we have
+ *       already materialized the answer to the query, etc.
  */
 public class JoinGraph extends PipelineOp {
 
-	private static final transient Logger log = Logger.getLogger(JoinGraph.class);
-	
-    private static final long serialVersionUID = 1L;
+	private static final transient Logger log = Logger
+			.getLogger(JoinGraph.class);
 
-    /**
-     * Known annotations.
-     */
-    public interface Annotations extends PipelineOp.Annotations {
+	private static final long serialVersionUID = 1L;
+
+	/**
+	 * Known annotations.
+	 */
+	public interface Annotations extends PipelineOp.Annotations {
+
 		/**
-    	 * The vertices of the join graph expressed an an {@link IPredicate}[]. 
-    	 */
+		 * The vertices of the join graph expressed an an {@link IPredicate}[].
+		 */
 		String VERTICES = JoinGraph.class.getName() + ".vertices";
-		
+
 		/**
-		 * The initial limit for cutoff sampling (default {@value #DEFAULT_LIMIT}).
+		 * The initial limit for cutoff sampling (default
+		 * {@value #DEFAULT_LIMIT}).
 		 */
 		String LIMIT = JoinGraph.class.getName() + ".limit";
-		
-		int DEFAULT_LIMIT = 100; 
-    }
 
+		int DEFAULT_LIMIT = 100;
+	}
+
 	/**
-     * @see Annotations#VERTICES
-     */
-    public IPredicate[] getVertices() {
-    
-    	return (IPredicate[]) getRequiredProperty(Annotations.VERTICES);
-    	
-    }
+	 * @see Annotations#VERTICES
+	 */
+	public IPredicate[] getVertices() {
 
-    /**
-     * @see Annotations#LIMIT
-     */
-    public int getLimit() {
-    	
-    	return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT);
-    	
-    }
-    
-    public JoinGraph(final NV ...anns) {
-        
+		return (IPredicate[]) getRequiredProperty(Annotations.VERTICES);
+
+	}
+
+	/**
+	 * @see Annotations#LIMIT
+	 */
+	public int getLimit() {
+
+		return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT);
+
+	}
+
+	public JoinGraph(final NV... anns) {
+
 		this(BOpBase.NOARGS, NV.asMap(anns));
-    	
-    }
 
+	}
+
 	/**
 	 * 
-	 * TODO We can derive the vertices from the join operators or the join
-	 *       operators from the vertices. However, if a specific kind of join
-	 *       operator is required then the question is whether we have better
-	 *       information to make that choice when the join graph is evaluated or
-	 *       before it is constructed.
-	 * 
-	 * TODO How we will handle optional joins? Presumably they are outside of
-	 *       the code join graph as part of the tail attached to that join
-	 *       graph.
-	 * 
 	 * TODO How can join constraints be moved around? Just attach them where
-	 *       ever a variable becomes bound? And when do we filter out variables
-	 *       which are not required downstream? Once we decide on a join path
-	 *       and execute it fully (rather than sampling that join path).
+	 * ever a variable becomes bound? And when do we filter out variables which
+	 * are not required downstream? Once we decide on a join path and execute it
+	 * fully (rather than sampling that join path).
 	 */
-    public JoinGraph(final BOp[] args, final Map<String,Object> anns) {
+	public JoinGraph(final BOp[] args, final Map<String, Object> anns) {
 
-    	super(args,anns);
+		super(args, anns);
 
-        switch (getEvaluationContext()) {
-        case CONTROLLER:
-            break;
-        default:
-            throw new UnsupportedOperationException(
-                    Annotations.EVALUATION_CONTEXT + "="
-                            + getEvaluationContext());
-        }
+		switch (getEvaluationContext()) {
+		case CONTROLLER:
+			break;
+		default:
+			throw new UnsupportedOperationException(
+					Annotations.EVALUATION_CONTEXT + "="
+							+ getEvaluationContext());
+		}
 
-    }
+	}
 
-    public FutureTask<Void> eval(final BOpContext<IBindingSet> context) {
+	public FutureTask<Void> eval(final BOpContext<IBindingSet> context) {
 
-        return new FutureTask<Void>(new JoinGraphTask(context));
+		return new FutureTask<Void>(new JoinGraphTask(context));
 
-    }
+	}
 
-    /**
-     * Used to assign row identifiers.
-     */
+	/**
+	 * Used to assign row identifiers.
+	 */
 	static private final IVariable<Integer> ROWID = Var.var("__rowid");
 
-    /**
-     * A sample of a {@link Vertex} (an access path).
-     */
-    public static class VertexSample {
+	/**
+	 * A sample of a {@link Vertex} (an access path).
+	 */
+	public static class VertexSample {
 
 		/**
 		 * Fast range count. This will be the same for each sample taken
@@ -218,23 +187,22 @@
 		 */
 		public final long rangeCount;
 
-    	/**
+		/**
 		 * The limit used to produce the {@link #sample}.
 		 */
-    	public final int limit;
+		public final int limit;
 
 		/**
 		 * When <code>true</code>, the result is not a sample but the
 		 * materialized access path.
 		 * 
-		 * @todo When <code>true</code>, we could run the join against the
-		 *       sample rather than the disk. This would require wrapping the
-		 *       sample as an access path. Since all exact samples will be
-		 *       pretty small, this is not likely to have any great performance
-		 *       benefit.
+		 * TODO When <code>true</code>, we could run the join against the sample
+		 * rather than the disk. This would require wrapping the sample as an
+		 * access path. Since all exact samples will be pretty small, this is
+		 * not likely to have any great performance benefit.
 		 */
-    	public final boolean exact;
-    	
+		public final boolean exact;
+
 		/**
 		 * Sample.
 		 */
@@ -247,34 +215,35 @@
 		 * @param exact
 		 * @param sample
 		 */
-		public VertexSample(final long rangeCount, final int limit, final boolean exact, final Object[] sample) {
+		public VertexSample(final long rangeCount, final int limit,
+				final boolean exact, final Object[] sample) {
 
 			if (rangeCount < 0L)
 				throw new IllegalArgumentException();
 
 			if (limit <= 0)
 				throw new IllegalArgumentException();
-			
+
 			if (sample == null)
 				throw new IllegalArgumentException();
 
 			this.rangeCount = rangeCount;
-			
+
 			this.limit = limit;
-			
+
 			this.exact = exact;
-			
+
 			this.sample = sample;
-			
+
 		}
 
 		public String toString() {
 			return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit
 					+ ",exact=" + exact + ", sampleSize=" + sample.length + "}";
 		}
-		
-    }
 
+	}
+
 	/**
 	 * A vertex of the join graph is an annotated relation (this corresponds to
 	 * an {@link IPredicate} with additional annotations to support the adaptive
@@ -299,20 +268,20 @@
 		 * The most recently taken sample of the {@link Vertex}.
 		 */
 		VertexSample sample = null;
-		
+
 		Vertex(final IPredicate<?> pred) {
 
 			if (pred == null)
 				throw new IllegalArgumentException();
-			
+
 			this.pred = pred;
-			
+
 		}
-		
+
 		public String toString() {
 
 			return "Vertex{pred=" + pred + ",sample=" + sample + "}";
-		
+
 		}
 
 		/**
@@ -334,30 +303,31 @@
 		 * Take a sample of the vertex. If the sample is already exact, then
 		 * this is a NOP.
 		 * 
-		 * @param context
 		 * @param limit
 		 *            The sample cutoff.
 		 */
-		public void sample(final BOpContextBase context, final int limit) {
+		public void sample(final QueryEngine queryEngine, final int limit) {
 
-			if (context == null)
+			if (queryEngine == null)
 				throw new IllegalArgumentException();
 
 			if (limit <= 0)
 				throw new IllegalArgumentException();
-			
+
 			final VertexSample oldSample = this.sample;
 
-			if(oldSample != null && oldSample.exact) {
+			if (oldSample != null && oldSample.exact) {
 
 				/*
 				 * The old sample is already the full materialization of the
 				 * vertex.
 				 */
-				
+
 				return;
-				
+
 			}
+
+			final BOpContextBase context = new BOpContextBase(queryEngine);
 			
 			final IRelation r = context.getRelation(pred);
 
@@ -371,12 +341,12 @@
 				/*
 				 * Materialize the access path.
 				 * 
-				 * @todo This could be more efficient if we raised it onto the
-				 * AP or if we overrode CHUNK_CAPACITY and the fully buffered
+				 * TODO This could be more efficient if we raised it onto the AP
+				 * or if we overrode CHUNK_CAPACITY and the fully buffered
 				 * iterator threshold such that everything was materialized as a
 				 * single chunk.
 				 */
-				
+
 				final List<Object> tmp = new ArrayList<Object>((int) rangeCount);
 
 				final IChunkedIterator<Object> itr = ap.iterator();
@@ -396,25 +366,31 @@
 
 				sample = new VertexSample(rangeCount, limit, true/* exact */,
 						tmp.toArray(new Object[0]));
-				
-				return;
 
-			}
+			} else {
 
-			/*
-			 * Materialize a random sample from the access path. 
-			 */
-			
-			final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, //
-					NV.asMap(//
+				/*
+				 * Materialize a random sample from the access path.
+				 */
+
+				final SampleIndex sampleOp = new SampleIndex(
+						new BOp[] {}, //
+						NV.asMap(//
 							new NV(SampleIndex.Annotations.PREDICATE, pred),//
 							new NV(SampleIndex.Annotations.LIMIT, limit)));
 
-			sample = new VertexSample(rangeCount, limit, false/*exact*/, sampleOp
-					.eval(context));
+				sample = new VertexSample(rangeCount, limit, false/* exact */,
+						sampleOp.eval(context));
 
+			}
+
+			if (log.isInfoEnabled())
+				log.info("Sampled: " + sample);
+
+			return;
+			
 		}
-		
+
 	}
 
 	/**
@@ -449,13 +425,13 @@
 		 * anything. This is not 100%, merely indicative.
 		 */
 		public final int outputCount;
-		
+
 		/**
 		 * The ratio of the #of input samples consumed to the #of output samples
 		 * generated (the join hit ratio or scale factor).
 		 */
 		public final double f;
-		
+
 		/**
 		 * The estimated cardinality of the join.
 		 */
@@ -499,12 +475,12 @@
 		 * join. That is, feeding all source tuples into the join gives fewer
 		 * than the desired number of output tuples.
 		 * 
-		 * @todo This field marks this condition and should be used to avoid
-		 *       needless recomputation of a join whose exact solution is
-		 *       already known.
+		 * TODO This field marks this condition and should be used to avoid
+		 * needless re-computation of a join whose exact solution is already
+		 * known.
 		 */
-		public final boolean exact; 
-		
+		public final boolean exact;
+
 		/**
 		 * The sample of the solutions for the join path.
 		 */
@@ -526,40 +502,39 @@
 		 * @param outputCount
 		 *            The #of binding sets generated before the join was cutoff.
 		 */
-		EdgeSample(//final VertexSample sourceVertexSample,
+		EdgeSample(
+				// final VertexSample sourceVertexSample,
 				final long sourceSampleRangeCount,
-				final boolean sourceSampleExact,
-				final int limit,
+				final boolean sourceSampleExact, final int limit,
 				final int inputCount, final int outputCount,
 				final IBindingSet[] sample) {
 
-			if(sample == null)
+			if (sample == null)
 				throw new IllegalArgumentException();
-			
-//			this.rangeCount = sourceVertexSample.rangeCount;
+
+			// this.rangeCount = sourceVertexSample.rangeCount;
 			this.rangeCount = sourceSampleRangeCount;
-			
+
 			this.limit = limit;
-			
+
 			this.inputCount = inputCount;
-			
+
 			this.outputCount = outputCount;
-			
+
 			f = outputCount == 0 ? 0 : (outputCount / (double) inputCount);
 
 			estimatedCardinality = (long) (rangeCount * f);
-			
+
 			estimateIsLowerBound = inputCount == 1 && outputCount == limit;
-			
-//			final boolean sourceSampleExact = sourceVertexSample.exact;
-			estimateIsUpperBound = !sourceSampleExact
-					&& outputCount < limit;
-			
+
+			// final boolean sourceSampleExact = sourceVertexSample.exact;
+			estimateIsUpperBound = !sourceSampleExact && outputCount < limit;
+
 			this.exact = sourceSampleExact && outputCount < limit;
-			
+
 			this.sample = sample;
 		}
-		
+
 		public String toString() {
 			return getClass().getName() + "{inputRangeCount=" + rangeCount
 					+ ", limit=" + limit + ", inputCount=" + inputCount
@@ -567,10 +542,9 @@
 					+ ", estimatedCardinality=" + estimatedCardinality
 					+ ", estimateIsLowerBound=" + estimateIsLowerBound
 					+ ", estimateIsUpperBound=" + estimateIsUpperBound
-					+ ", sampleIsExactSolution=" + exact
-					+ "}";
+					+ ", sampleIsExactSolution=" + exact + "}";
 		}
-		
+
 	};
 
 	/**
@@ -603,13 +577,14 @@
 		 * not been sampled.
 		 */
 		public EdgeSample sample = null;
-		
-		public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) {
+
+		public Edge(final Vertex v1, final Vertex v2,
+				final Set<IVariable<?>> shared) {
 			if (v1 == null)
 				throw new IllegalArgumentException();
 			if (v2 == null)
 				throw new IllegalArgumentException();
-			if (shared==null)
+			if (shared == null)
 				throw new IllegalArgumentException();
 			if (shared.isEmpty())
 				throw new IllegalArgumentException();
@@ -624,8 +599,10 @@
 		 * for each vertex.
 		 */
 		public String toString() {
-			
-			return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + ")"
+
+			return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId()
+					+ "), estCard="
+					+ (sample == null ? "N/A" : sample.estimatedCardinality)
 					+ ", shared=" + shared.toString() + ", sample=" + sample
 					+ "}";
 
@@ -635,9 +612,9 @@
 		 * Equality is determined by reference testing.
 		 */
 		public boolean equals(final Object o) {
-			
+
 			return this == o;
-			
+
 		}
 
 		/**
@@ -657,24 +634,25 @@
 
 				final int h;
 				if (h1 < h2) {
-				
+
 					h = h1 * 31 + h2;
-					
+
 				} else {
-					
+
 					h = h2 * 31 + h1;
-					
+
 				}
 
 				hash = h;
 
 			}
-	        return hash;
+			return hash;
 
-	    }
-	    private int hash;
+		}
 
-	    /**
+		private int hash;
+
+		/**
 		 * Return the vertex with the smaller estimated cardinality.
 		 * 
 		 * @throws IllegalStateException
@@ -684,15 +662,15 @@
 
 			if (v1.sample == null) // vertex not sampled.
 				throw new IllegalStateException();
-			
+
 			if (v2.sample == null) // vertex not sampled.
 				throw new IllegalStateException();
-			
+
 			return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2;
-			
+
 		}
-		
-	    /**
+
+		/**
 		 * Return the vertex with the larger estimated cardinality (the vertex
 		 * not returned by {@link #getMinimumCardinalityVertex()}).
 		 * 
@@ -703,12 +681,12 @@
 
 			// The vertex with the minimum cardinality.
 			final Vertex o = getMinimumCardinalityVertex();
-			
+
 			// Return the other vertex.
 			return (v1 == o) ? v2 : v1;
-			
+
 		}
-		
+
 		/**
 		 * Estimate the cardinality of the edge.
 		 * 
@@ -716,7 +694,7 @@
 		 * 
 		 * @return The estimated cardinality of the edge.
 		 * 
-		 * @throws Exception 
+		 * @throws Exception
 		 */
 		public long estimateCardinality(final QueryEngine queryEngine,
 				final int limit) throws Exception {
@@ -763,21 +741,22 @@
 			 * both the input and the output of the cutoff evaluation of the
 			 * edge rather than rows of the materialized relation.
 			 * 
-			 * TODO On subsequent iterations we would probably re-sample [v]
-			 * and we would run against the materialized intermediate result for
+			 * TODO On subsequent iterations we would probably re-sample [v] and
+			 * we would run against the materialized intermediate result for
 			 * [v'].
 			 */
 
 			/*
 			 * Convert the source sample into an IBindingSet[].
 			 * 
-			 * @todo We might as well do this when we sample the vertex.
+			 * TODO We might as well do this when we sample the vertex.
 			 */
 			final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length];
 			{
 				for (int i = 0; i < sourceSample.length; i++) {
 					final IBindingSet bset = new HashBindingSet();
-					BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, bset);
+					BOpContext.copyValues((IElement) v.sample.sample[i],
+							v.pred, bset);
 					sourceSample[i] = bset;
 				}
 			}
@@ -819,7 +798,7 @@
 
 			if (limit <= 0)
 				throw new IllegalArgumentException();
-		
+
 			// Inject a rowId column.
 			sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */,
 					sourceSample);
@@ -834,15 +813,14 @@
 			 */
 			final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, //
 					new NV(BOp.Annotations.BOP_ID, 1),//
-					new NV(PipelineJoin.Annotations.PREDICATE,vTarget.pred.setBOpId(3))
-					);
+					new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred
+							.setBOpId(3)));
 
 			final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },//
 					NV.asMap(//
 							new NV(BOp.Annotations.BOP_ID, 2), //
-							new NV(SliceOp.Annotations.LIMIT, (long)limit), //
-							new NV(
-									BOp.Annotations.EVALUATION_CONTEXT,
+							new NV(SliceOp.Annotations.LIMIT, (long) limit), //
+							new NV(BOp.Annotations.EVALUATION_CONTEXT,
 									BOpEvaluationContext.CONTROLLER)));
 
 			// run the cutoff sampling of the edge.
@@ -875,31 +853,18 @@
 							.get());
 				} finally {
 					// verify no problems. FIXME Restore test of the query.
-//					runningQuery.get();
+					// runningQuery.get();
 				}
 			} finally {
 				runningQuery.cancel(true/* mayInterruptIfRunning */);
 			}
 
 			/*
-			 * Note: This needs to be based on the source vertex having the
-			 * minimum cardinality for the Path which is being extended which
-			 * connects via some edge defined in the join graph. If a different
-			 * vertex is chosen as the source then the estimated cardinality
-			 * will be falsely high by whatever ratio the chosen vertex
-			 * cardinality exceeds the one having the minimum cardinality which
-			 * is connected via an edge to the target vertex).
-			 * 
-			 * FIXME I am not convinced that this approach is quite right. I am
-			 * also not convinced that this approach will correctly carry the
-			 * additional metadata on the EdgeSample (exact, estimate overflow
-			 * and underflow, etc). [This needs to be the estimated cardinality
-			 * of the path which is being extended by an edge to the target
-			 * vertex.]
+			 * TODO Improve comments here. See if it is possible to isolate a
+			 * common base class which would simplify the setup of the cutoff
+			 * join and the computation of the sample stats.
 			 */
-//			final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample
-//					: vTarget.sample;
-			
+
 			final EdgeSample edgeSample = new EdgeSample(
 					sourceSampleRangeCount, sourceSampleExact, limit,
 					inputCount, outputCount, result
@@ -911,64 +876,14 @@
 			return edgeSample;
 
 		}
-		
+
 	}
 
-//	/**
-//	 * A path sample includes the materialized binding sets from the as-executed
-//	 * join path.
-//	 * 
-//	 * @todo The sample {@link IBindingSet}[] could be saved with the
-//	 *       {@link EdgeSample}. However, when we are sampling a join path we
-//	 *       want to associate the net sample with the path, not each edge in
-//	 *       that path, because we need to be able to generate join paths in
-//	 *       which the path is extended from any vertex already part of the path
-//	 *       to any vertex which has not yet incorporated in the path and has
-//	 *       not yet been executed. To do this we need to intermediate results
-//	 *       for the path, which includes all variables bound by each join for
-//	 *       each edge in the path, not just on an edge by edge basis.
-//	 */
-//	public static class PathSample extends EdgeSample {
-//
-//		/**
-//		 * <code>true</code> if the sample is the exact solution for the join path.
-//		 */
-//		private final boolean exact; 
-//		
-//		/**
-//		 * The sample of the solutions for the join path.
-//		 */
-//		private final IBindingSet[] sample;
-//
-//		PathSample(final long inputRangeCount, final int limit,
-//				final int inputCount, final int outputCount,
-//				final boolean exact, final IBindingSet[] sample) {
-//
-//			super(inputRangeCount, limit, inputCount, outputCount);
-//
-//			if(sample == null)
-//				throw new IllegalArgumentException();
-//			
-//			this.exact = exact;
-//			
-//			this.sample = sample;
-//			
-//		}
-//
-//		public String toString() {
-//
-//			return super.toString() + ":{exact=" + exact + ", sampleSize="
-//					+ sample.length + "}";
-//
-//		}
-//		
-//	}
-	
 	/**
 	 * A sequence of {@link Edge}s (aka join steps).
 	 */
 	public static class Path {
-		
+
 		/**
 		 * An immutable ordered list of the edges in the (aka the sequence of
 		 * joins represented by this path).
@@ -995,19 +910,6 @@
 		 */
 		final public long cumulativeEstimatedCardinality;
 
-		/**
-		 * The vertex at which the path from which this path was derived
-		 * stopped. This is initialized to the source vertex when entering the
-		 * chainSample() method.
-		 * 
-		 * @todo This is used by ROX to only grow the path from its end. We
-		 *       could of course just look at the last edge in the path.
-		 *       However, I think that I prefer to grow a path from any
-		 *       branching vertex as long as the path does not duplicate any
-		 *       path already generated (including those which were pruned).
-		 */
-		private Vertex stopVertex;
-		
 		public String toString() {
 			final StringBuilder sb = new StringBuilder();
 			sb.append("Path{");
@@ -1015,7 +917,8 @@
 			for (Edge e : edges) {
 				if (!first)
 					sb.append(",");
-				sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")");
+				sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId()
+						+ ")");
 				first = false;
 			}
 			sb.append(",cumEstCard=" + cumulativeEstimatedCardinality
@@ -1042,23 +945,27 @@
 
 			if (e == null)
 				throw new IllegalArgumentException();
-			
+
 			if (e.sample == null)
-				throw new IllegalArgumentException("Not sampled: "+e);
-			
+				throw new IllegalArgumentException("Not sampled: " + e);
+
 			this.edges = Collections.singletonList(e);
-			
+
 			this.sample = e.sample;
-			
+
 			this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality;
-			
+
 		}
 
 		/**
 		 * Constructor used by {@link #addEdge(QueryEngine, int, Edge)}
-		 * @param edges The edges in the new path.
-		 * @param cumulativeEstimatedCardinality The cumulative estimated cardinality of the new path.
-		 * @param sample The sample from the last 
+		 * 
+		 * @param edges
+		 *            The edges in the new path.
+		 * @param cumulativeEstimatedCardinality
+		 *            The cumulative estimated cardinality of the new path.
+		 * @param sample
+		 *            The sample from the last
 		 */
 		private Path(final List<Edge> edges,
 				final long cumulativeEstimatedCardinality,
@@ -1066,19 +973,19 @@
 
 			if (edges == null)
 				throw new IllegalArgumentException();
-			
+
 			if (cumulativeEstimatedCardinality < 0)
 				throw new IllegalArgumentException();
-			
+
 			if (sample == null)
 				throw new IllegalArgumentException();
 
 			this.edges = Collections.unmodifiableList(edges);
-			
+
 			this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality;
-			
+
 			this.sample = sample;
-			
+
 		}
 
 		/**
@@ -1132,7 +1039,7 @@
 
 			final Vertex[] v1 = getVertices();
 			final Vertex[] v2 = p.getVertices();
-			
+
 			if (v1.length < v2.length) {
 				// Proven false since the other set is larger.
 				return false;
@@ -1164,7 +1071,7 @@
 			}
 
 			return true;
-			
+
 		}
 
 		/**
@@ -1172,8 +1079,8 @@
 		 * 
 		 * @return The vertices (in path order).
 		 * 
-		 * @todo this could be rewritten without the toArray() using a method
-		 *       which visits the vertices of a path in any order.
+		 *         TODO This could be rewritten without the toArray() using a
+		 *         method which visits the vertices of a path in any order.
 		 */
 		public Vertex[] getVertices() {
 			final Set<Vertex> tmp = new LinkedHashSet<Vertex>();
@@ -1190,7 +1097,7 @@
 		 * 
 		 * @param p
 		 *            The given path.
-		 *            
+		 * 
 		 * @return <code>true</code> if this path begins with the given path.
 		 */
 		public boolean beginsWith(final Path p) {
@@ -1210,10 +1117,10 @@
 					return false;
 				}
 			}
-			
+
 			return true;
 		}
-		
+
 		/**
 		 * Add an edge to a path, computing the estimated cardinality of the new
 		 * path, and returning the new path.
@@ -1272,11 +1179,11 @@
 			 * cardinality then we should prefer join paths which achieve the
 			 * same reduction in cardinality with less 'intermediate
 			 * cardinality' - that is, by examining fewer possible solutions.
+			 * [In fact, the estimated (cumulative) cardinality might not be a
+			 * good reflection of the IOs to be done -- this needs more
+			 * thought.]
 			 */
 
-//			final IBindingSet[] sample = BOpUtility.injectRowIdColumn(ROWID,
-//					0/* start */, this.sample.sample);
-			
 			final EdgeSample edgeSample = e.estimateCardinality(queryEngine,
 					limit, sourceVertex, targetVertex,
 					this.sample.estimatedCardinality, this.sample.exact,
@@ -1286,9 +1193,9 @@
 
 				final List<Edge> edges = new ArrayList<Edge>(
 						this.edges.size() + 1);
-				
+
 				edges.addAll(this.edges);
-				
+
 				edges.add(e);
 
 				final long cumulativeEstimatedCardinality = this.cumulativeEstimatedCardinality
@@ -1303,58 +1210,58 @@
 				return tmp;
 
 			}
-			
+
 		}
-		
-//		/**
-//		 * Equality is defined by comparison of the unordered set of edges.
-//		 */
-//		public boolean equals(final Object o) {
-//			if (this == o)
-//				return true;
-//			if (!(o instanceof Path))
-//				return false;
-//			final Path t = (Path) o;
-//			if (edges.length != t.edges.length)
-//				return false;
-//			for (Edge e : edges) {
-//				boolean found = false;
-//				for (Edge x : t.edges) {
-//					if (x.equals(e)) {
-//						found = true;
-//						break;
-//					}
-//				}
-//				if (!found)
-//					return false;
-//			}
-//			return true;
-//		}
-//		
-//		/**
-//		 * The hash code of path is defined as the bit-wise XOR of the hash
-//		 * codes of the edges in that path.
-//		 */
-//	    public int hashCode() {
-//
-//	        if (hash == 0) {
-//
-//	            int result = 0;
-//
-//	        	for(Edge e : edges) {
-//
-//	                result ^= e.hashCode();
-//
-//	            }
-//
-//	            hash = result;
-//
-//	        }
-//	        return hash;
-//
-//	    }
-//	    private int hash;
 
+		// /**
+		// * Equality is defined by comparison of the unordered set of edges.
+		// */
+		// public boolean equals(final Object o) {
+		// if (this == o)
+		// return true;
+		// if (!(o instanceof Path))
+		// return false;
+		// final Path t = (Path) o;
+		// if (edges.length != t.edges.length)
+		// return false;
+		// for (Edge e : edges) {
+		// boolean found = false;
+		// for (Edge x : t.edges) {
+		// if (x.equals(e)) {
+		// found = true;
+		// break;
+		// }
+		// }
+		// if (!found)
+		// return false;
+		// }
+		// return true;
+		// }
+		//		
+		// /**
+		// * The hash code of path is defined as the bit-wise XOR of the hash
+		// * codes of the edges in that path.
+		// */
+		// public int hashCode() {
+		//
+		// if (hash == 0) {
+		//
+		// int result = 0;
+		//
+		// for(Edge e : edges) {
+		//
+		// result ^= e.hashCode();
+		//
+		// }
+		//
+		// hash = result;
+		//
+		// }
+		// return hash;
+		//
+		// }
+		// private int hash;
+
 	}
 
 	/**
@@ -1364,13 +1271,13 @@
 	 * 
 	 * @param a
 	 *            An array of join paths.
-	 *            
+	 * 
 	 * @return A table with that data.
 	 */
 	static public String showTable(final Path[] a) {
 		final StringBuilder sb = new StringBuilder();
 		final Formatter f = new Formatter(sb);
-		for(int i=0; i<a.length; i++) {
+		for (int i = 0; i < a.length; i++) {
 			final Path x = a[i];
 			if (x.sample == null) {
 				f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i);
@@ -1381,18 +1288,18 @@
 			}
 			sb.append(", [");
 			final Vertex[] vertices = x.getVertices();
-			for(Vertex v : vertices) {
+			for (Vertex v : vertices) {
 				f.format("%2d ", v.pred.getId());
 			}
 			sb.append("]");
-//			for (Edge e : x.edges)
-//				sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId()
-//						+ ")");
+			// for (Edge e : x.edges)
+			// sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId()
+			// + ")");
 			sb.append("\n");
 		}
 		return sb.toString();
 	}
-	
+
 	/**
 	 * A join graph (data structure and methods only).
 	 * 
@@ -1442,9 +1349,6 @@
 		 */
 		private final Edge[] E;
 
-		// The set of vertices which have been consumed by the query.
-		private final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>();
-		
 		public List<Vertex> getVertices() {
 			return Collections.unmodifiableList(Arrays.asList(V));
 		}
@@ -1457,28 +1361,25 @@
 			final StringBuilder sb = new StringBuilder();
 			sb.append("JoinGraph");
 			sb.append("{V=[");
-			for(Vertex v : V) {
-				sb.append("\nV["+v.pred.getId()+"]="+v);
+			for (Vertex v : V) {
+				sb.append("\nV[" + v.pred.getId() + "]=" + v);
 			}
 			sb.append("],E=[");
-			for(Edge e : E) {
-				sb.append("\n"+e);
+			for (Edge e : E) {
+				sb.append("\n" + e);
 			}
-			sb.append("\n],ExecutedVertices=[");
-			for(Vertex v : executedVertices) {
-				sb.append("\nV["+v.pred.getId()+"]="+v);
-			}
 			sb.append("\n]}");
 			return sb.toString();
-			
-//			return super.toString() + "{V=" + Arrays.toString(V) + ",E="
-//			+ Arrays.toString(E) + ", executedVertices="+executedVertices+"}"; 
+
+			// return super.toString() + "{V=" + Arrays.toString(V) + ",E="
+			// + Arrays.toString(E) +
+			// ", executedVertices="+executedVertices+"}";
 		}
-		
+
 		public JGraph(final IPredicate[] v) {
 
-            if (v == null)
-                throw new IllegalArgumentException();
+			if (v == null)
+				throw new IllegalArgumentException();
 
 			if (v.length < 2)
 				throw new IllegalArgumentException();
@@ -1527,18 +1428,200 @@
 		}
 
 		/**
+		 * 
+		 * @param queryEngine
+		 * @param limit
+		 *            The limit for sampling a vertex and the initial limit for
+		 *            cutoff join evaluation. A reasonable value is
+		 *            <code>100</code>.
+		 * 
+		 * @throws Exception
+		 */
+		public void runtimeOptimizer(final QueryEngine queryEngine,
+				final int limit) throws Exception {
+
+			// // The set of vertices which have been consumed by the query.
+			// final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>();
+
+			// Setup the join graph.
+			Path[] paths = round0(queryEngine, limit, 2/* nedges */);
+
+			/*
+			 * The input paths for the first round have two vertices (one edge
+			 * is two vertices). Each round adds one more vertex, so we have
+			 * three vertices by the end of round 1. We are done once we have
+			 * generated paths which include all vertices.
+			 * 
+			 * This occurs at round := nvertices - 1
+			 */
+
+			final int nvertices = V.length;
+
+			int round = 1;
+
+			while (round < nvertices - 1) {
+
+				paths = expand(queryEngine, limit, round++, paths);
+
+			}
+
+			/*
+			 * FIXME Choose the best join path and execute it (or return the
+			 * evaluation order to the caller).
+			 * 
+			 * FIXME This must either recognize each time a join path is known
+			 * to dominate all other join paths and then execute it or iterator
+			 * until the total join path is decided and then execute the
+			 * original query using that join path.
+			 * 
+			 * @todo When executing the query, it is actually being executed as
+			 * a subquery. Therefore we have to take appropriate care to ensure
+			 * that the results are copied out of the subquery and into the
+			 * parent query.
+			 * 
+			 * @todo When we execute the query, we should clear the references
+			 * to the sample (unless they are exact, in which case they can be
+			 * used as is) in order to release memory associated with those
+			 * samples if the query is long running.
+			 */
+			
+		}
+
+		/**
+		 * Choose the starting vertices.
+		 * 
+		 * @param nedges
+		 *            The maximum #of edges to choose.
+		 */
+		public Path[] choseStartingPaths(final int nedges) {
+
+			final List<Path> tmp = new LinkedList<Path>();
+
+			// All edges in the graph.
+			final Edge[] edges = getEdges().toArray(new Edge[0]);
+
+			// Sort them by ascending expected cardinality.
+			Arrays.sort(edges, 0, edges.length,
+					EstimatedEdgeCardinalityComparator.INSTANCE);
+
+			// Choose the top-N edges (those with the least cardinality).
+			for (int i = 0; i < edges.length && i < nedges; i++) {
+
+				tmp.add(new Path(edges[i]));
+
+			}
+
+			final Path[] a = tmp.toArray(new Path[tmp.size()]);
+
+			return a;
+
+		}
+
+		/**
+		 * Choose up to <i>nedges</i> edges to be the starting point.
+		 * 
+		 * @param queryEngine
+		 *            The query engine.
+		 * @param limit
+		 *            The cutoff used when sampling the vertices and when
+		 *            sampling the edges.
+		 * @param nedges
+		 *            The maximum #of edges to choose. Those having the smallest
+		 *            expected cardinality will be chosen.
+		 * 
+		 * @throws Exception
+		 */
+		public Path[] round0(final QueryEngine queryEngine, final int limit,
+				final int nedges) throws Exception {
+
+			/*
+			 * Sample the vertices.
+			 */
+			sampleVertices(queryEngine, limit);
+
+			if (log.isInfoEnabled()) {
+				final StringBuilder sb = new StringBuilder();
+				sb.append("Vertices:\n");
+				for (Vertex v : V) {
+					sb.append(v.toString());
+					sb.append("\n");
+				}
+				log.info(sb.toString());
+			}
+
+			/*
+			 * Estimate the cardinality for each edge.
+			 * 
+			 * TODO It would be very interesting to see the variety and/or
+			 * distribution of the values bound when the edge is sampled. This
+			 * can be easily done using a hash map with a counter. That could
+			 * tell us a lot about the cardinality of the next join path
+			 * (sampling the join path also tells us a lot, but it does not
+			 * explain it as much as seeing the histogram of the bound values).
+			 * I believe that there are some interesting online algorithms for
+			 * computing the N most frequent observations and the like which
+			 * could be used here.
+			 */
+			estimateEdgeWeights(queryEngine, limit);
+
+			if (log.isInfoEnabled()) {
+				final StringBuilder sb = new StringBuilder();
+				sb.append("Edges:\n");
+				for (Edge e : E) {
+					sb.append(e.toString());
+					sb.append("\n");
+				}
+				log.info(sb.toString());
+			}
+
+			/*
+			 * Choose the initial set of paths.
+			 */
+			final Path[] paths_t0 = choseStartingPaths(nedges);
+
+			if (log.isInfoEnabled())
+				log.info("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0));
+
+			return paths_t0;
+
+		}
+
+		/**
 		 * Do one breadth first expansion.
 		 * 
 		 * @param queryEngine
+		 *            The query engine.
 		 * @param limit
+		 *            The limit (this is automatically multiplied by the round
+		 *            to increase the sample size in each round).
 		 * @param round
+		 *            The round number in [1:n].
 		 * @param a
-		 * @return
+		 *            The set of paths from the previous round. For the first
+		 *            round, this is formed from the initial set of edges to
+		 *            consider.
+		 * 
+		 * @return The set of paths which survived pruning in this round.
+		 * 
 		 * @throws Exception
 		 */
-		final public Path[] expand(final QueryEngine queryEngine, final int limit,
+		public Path[] expand(final QueryEngine queryEngine, int limit,
 				final int round, final Path[] a) throws Exception {
 
+			if (queryEngine == null)
+				throw new IllegalArgumentException();
+			if (limit <= 0)
+				throw new IllegalArgumentException();
+			if (round <= 0)
+				throw new IllegalArgumentException();
+			if (a == null)
+				throw new IllegalArgumentException();
+			if (a.length == 0)
+				throw new IllegalArgumentException();
+			
+			// increment the limit by itself in each round.
+			limit *= round;
+			
 			final List<Path> tmp = new LinkedList<Path>();
 
 			// First, copy all existing paths.
@@ -1546,16 +1629,20 @@
 				tmp.add(x);
 			}
 
+			// Vertices are inserted into this collection when they are resampled.
+			final Set<Vertex> resampled = new LinkedHashSet<Vertex>();
+			
 			// Then expand each path.
 			for (Path x : a) {
 
 				if (x.edges.size() < round) {
-
+					// Path is from a previous round.
 					continue;
-
 				}
+
 				// The set of vertices used to expand this path in this round.
 				final Set<Vertex> used = new LinkedHashSet<Vertex>();
+
 				// Check all edges in the graph.
 				for (Edge edgeInGraph : E) {
 
@@ -1573,18 +1660,27 @@
 						continue;
 					}
 
-					final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1;
-					
-					if(used.contains(newVertex)) {
+					final Vertex newVertex = v1Found ? edgeInGraph.v2
+							: edgeInGraph.v1;
+
+					if (used.contains(newVertex)) {
 						// Vertex already used to extend this path.
 						continue;
 					}
-					
+
 					// add the new vertex to the set of used vertices.
 					used.add(newVertex);
+
+					if (!resampled.add(newVertex)&&round>1) {
+						/*
+						 * Resample this vertex before we sample a new edge
+						 * which targets this vertex.
+						 */
+						newVertex.sample(queryEngine, limit);
+					}
 					
 					// Extend the path to the new vertex.
-					final Path p = x.addEdge(queryEngine, limit * round,
+					final Path p = x.addEdge(queryEngine, limit,
 							edgeInGraph);
 
 					// Add to the set of paths for this round.
@@ -1596,19 +1692,22 @@
 
 			final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]);
 
-			System.err.println("\n*** Paths @ round=" + round + "\n"
-					+ JoinGraph.showTable(paths_tp1));
+			if (log.isDebugEnabled())
+				log.debug("\n*** round=" + round + " : generated paths\n"
+						+ JoinGraph.showTable(paths_tp1));
 
 			final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1);
 
-			System.err.println("\n*** Paths @ round=" + round
-					+ " (after pruning)\n"
-					+ JoinGraph.showTable(paths_tp1_pruned));
+			if (log.isInfoEnabled())
+				log.info("\n*** round=" + round + ": paths{in=" + a.length
+						+ ",considered=" + paths_tp1.length + ",out="
+						+ paths_tp1_pruned.length + "}\n"
+						+ JoinGraph.showTable(paths_tp1_pruned));
 
 			return paths_tp1_pruned;
-			
+
 		}
-		
+
 		/**
 		 * Return the {@link Vertex} whose {@link IPredicate} is associated with
 		 * the given {@link BOp.Annotations#BOP_ID}.
@@ -1619,8 +1718,8 @@
 		 *         vertex in the join graph.
 		 */
 		public Vertex getVertex(int bopId) {
-			for(Vertex v : V) {
-				if(v.pred.getId()==bopId)
+			for (Vertex v : V) {
+				if (v.pred.getId() == bopId)
 					return v;
 			}
 			return null;
@@ -1639,7 +1738,7 @@
 		 *         the join graph.
 		 */
 		public Edge getEdge(Vertex v1, Vertex v2) {
-			for(Edge e : E) {
+			for (Edge e : E) {
 				if (e.v1 == v1 && e.v2 == v2)
 					return e;
 				if (e.v1 == v2 && e.v2 == v1)
@@ -1647,22 +1746,23 @@
 			}
 			return null;
 		}
-		
+
 		/**
-		 * Obtain a sample and estimated cardinality (fast range count) for each vertex.
+		 * Obtain a sample and estimated cardinality (fast range count) for each
+		 * vertex.
 		 * 
-		 * @param context
+		 * @param queryEngine
 		 * @param limit
 		 *            The sample size.
 		 */
-		public void sampleVertices(final BOpContextBase context, final int limit) {
+		public void sampleVertices(final QueryEngine queryEngine, final int limit) {
 
 			for (Vertex v : V) {
 
-				v.sample(context, limit);
-				
+				v.sample(queryEngine, limit);
+
 			}
-			
+
 		}
 
 		/**
@@ -1687,8 +1787,7 @@
 
 				}
 
-				e.estimateCardinality(
-						queryEngine, limit);
+				e.estimateCardinality(queryEngine, limit);
 
 			}
 
@@ -1706,14 +1805,14 @@
 		 *         are no {@link Edge}s having an estimated cardinality.
 		 */
 		public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) {
-		
+
 			long minCard = Long.MIN_VALUE;
 			Edge minEdge = null;
 
 			for (Edge e : E) {
 
 				if (e.sample == null) {
-			
+
 					// Edge has not been sampled.
 					continue;
 
@@ -1721,12 +1820,12 @@
 
 				if (visited != null
 						&& (visited.contains(e.v1) || visited.contains(e.v2))) {
-					
+
 					// A vertex of that edge has already been consumed.
 					continue;
-					
+
 				}
-				
+
 				final long estimatedCardinality = e.sample.estimatedCardinality;
 
 				if (minEdge == null || estimatedCardinality < minCard) {
@@ -1740,22 +1839,24 @@
 			}
 
 			return minEdge;
-			
+
 		}
 
-//		/**
-//		 * Return the {@link Edge} having the minimum estimated cardinality out
-//		 * of those edges whose cardinality has been estimated.
-//		 * 
-//		 * @return The minimum cardinality edge -or- <code>null</code> if there
-//		 *         are no {@link Edge}s having an estimated cardinality.
-//		 */
-//		public Edge getMinimumCardinalityEdge() {
-//		
-//			return getMinimumCardinalityEdge(null);
-//			
-//		}
-		
+		// /**
+		// * Return the {@link Edge} having the minimum estimated cardinality
+		// out
+		// * of those edges whose cardinality has been estimated.
+		// *
+		// * @return The minimum cardinality edge -or- <code>null</code> if
+		// there
+		// * are no {@link Edge}s having an estimated cardinality.
+		// */
+		// public Edge getMinimumCardinalityEdge() {
+		//		
+		// return getMinimumCardinalityEdge(null);
+		//			
+		// }
+
 		/**
 		 * Return the #of edges in which the given vertex appears where the
 		 * other vertex of the edge does not appear in the set of visited
@@ -1765,7 +1866,7 @@
 		 *            The vertex.
 		 * @param visited
 		 *            A set of vertices to be excluded from consideration.
-		 *            
+		 * 
 		 * @return The #of such edges.
 		 */
 		public int getEdgeCount(final Vertex v, final Set<Vertex> visited) {
@@ -1787,17 +1888,17 @@
 		 * @return Those edges.
 		 */
 		public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) {
-			
+
 			if (v == null)
 				throw new IllegalArgumentException();
 
 			if (visited != null && visited.contains(v))
 				return Collections.emptyList();
-			
+
 			final List<Edge> tmp = new LinkedList<Edge>();
-			
+
 			for (Edge e : E) {
-			
+
 				if (v.equals(e.v1) || v.equals(e.v2)) {
 
 					if (visited != null) {
@@ -1811,464 +1912,16 @@
 					}
 
 					tmp.add(e);
-					
-				}
-				
-			}
-			
-			return tmp;
-			
-		}
 
-		/**
-		 * 
-		 * @param queryEngine
-		 * @param limit
-		 *            The limit for sampling a vertex and the initial limit for
-		 *            cutoff join evaluation. A reasonable value is
-		 *            <code>100</code>.
-		 * @param timeout
-		 *            The timeout for cutoff join path evaluation
-		 *            (milliseconds). A reasonable value is <code>100</code>ms.
-		 * @throws Exception
-		 * 
-		 *             FIXME This must either return the query plan or copy the
-		 *             results as they are materialized to the sink for the join
-		 *             graph operator.
-		 * 
-		 * 
-		 * @todo We do not need the [timeout] as long as we evaluate each cutoff
-		 *       join separately. The limited number of input solutions to the
-		 *       join automatically limits the amount of work the join can do.
-		 *       However, if we do cutoff evaluation of a series of edges then
-		 *       it is possible to do a lot of work in order to find [limit]
-		 *       solutions. In this case, a [timeout] protects us against join
-		 *       paths which have poor correlations and large cardinality for
-		 *       their vertices (a lot of solutions are considered to produce
-		 *       very few results).
-		 */
-		public void runtimeOptimizer(final QueryEngine queryEngine,
-				final int limit, final long timeout) throws Exception {
-
-			final BOpContextBase context = new BOpContextBase(queryEngine);
-
-			if (log.isInfoEnabled())
-				log.info("limit=" + limit);
-
-			/*
-			 * Sample the vertices.
-			 * 
-			 * TODO Sampling for scale-out not yet finished.
-			 * 
-			 * FIXME Re-sampling will always produce the same sample depending
-			 * on the sample operator impl (it should be random, but it is not).
-			 */
-			sampleVertices(context, limit);
-
-			if(log.isDebugEnabled())
-				log.debug("joinGraph=" + toString());
-
-			/*
-			 * Estimate the cardinality and weights for each edge, obtaining the
-			 * Edge with the minimum estimated cardinality. This will be the
-			 * starting point for the join graph evaluation.
-			 * 
-			 * @todo It would be very interesting to see the variety and/or
-			 * distribution of the values bound when the edge is sampled. This
-			 * can be easily done using a hash map with a counter. That could
-			 * tell us a lot about the cardinality of the next join path
-			 * (sampling the join path also tells us a lot, but it does not
-			 * explain it as much as seeing the histogram of the bound values).
-			 * I believe that there are some interesting online algorithms for
-			 * computing the N most frequent observations and the like which
-			 * could be used here.
-			 * 
-			 * TODO ROX is choosing the starting edge based on the minimum
-			 * estimated cardinality. However, it is possible for there to be
-			 * more than one edge with an estimated cardinality which is
-			 * substantially to the minimum estimated cardinality. It would be
-			 * best to start from multiple vertices so we can explore join paths
-			 * which begin with those alternative starting vertices as well.
-			 * (LUBM Q2 is an example of such a query).
-			 */
-			estimateEdgeWeights(queryEngine, limit);
-
-			while(moreEdgesToVisit(executedVertices)) {
-				
-				// Decide on the next join path to execute.
-				final Path p = chainSample(queryEngine, limit, timeout);
-				
-				for(Edge e : p.edges) {
-
-					/*
-					 * FIXME Finish the algorithm.
-					 * 
-					 * Execute the edge. We have two choices here. If join path
-					 * is currently materialized and the expected cardinality of
-					 * the edge is small to moderate (LTE limit * 10) then we
-					 * can simply materialize the result of evaluating the edge.
-					 * 
-					 * In this case, we replace the sample for the vertex with
-					 * the actual result of evaluating the edge. [This concept
-					 * pre-supposes that a vertex sample is the set of matching
-					 * elements and that we do not store the binding sets which
-					 * satisfy the join path. I think that this is perhaps the
-					 * primary point of difference for MonetDB/ROX and bigdata.
-					 * bigdata is working with IBindingSet[]s and should
-					 * associate the set of intermediate solutions which
-					 * represent the materialized intermediate result with the
-					 * join path, not the vertex or the edge.]
-					 * 
-					 * Otherwise, either the join path is already only a sample
-					 * or the expected cardinality of this edge is too large so
-					 * we do the cutoff evaluation of the edge in order to
-					 * propagate a sample.
-					 * 
-					 * 1. exec(e,T1(v1),T2(v2))
-					 */
-
-					executedVertices.add(e.v1);
-					executedVertices.add(e.v2);
-					
 				}
 
-				/*
-				 * Re-sample edges branching from any point in the path which we
-				 * just executed. The purpose of this is to improve the
-				 * detection of correlations using a materialized sample of the
-				 * intermediate results (which will be correlated) rather than
-				 * independent samples of the vertices (which are not
-				 * correlated).
-				 * 
-				 * Also, note that ROX only samples vertices which satisfy the
-				 * zero investment property and therefore there could be
-				 * vertices which have not yet been sampled if some vertices are
-				 * not associated with an index.
-				 * 
-				 * @todo This could just be another call to sampleVertices() and
-				 * estimateEdgeWeights() if those methods accepted the set of
-				 * already executed vertices so they could make the proper
-				 * exclusions (or if we had a method which returned the
-				 * un-executed vertices and/or edges).
-				 */
-//				e.v1.sample(context, limit);
-//				e.v2.sample(context, limit);
-				
 			}
 
-		}
+			return tmp;
 
-		/**
-		 * Return <code>true</code> iff there exists at least one {@link Edge}
-		 * branching from a vertex NOT found in the set of vertices which have
-		 * visited.
-		 * 
-		 * @param visited
-		 *            A set of vertices.
-		 * 
-		 * @return <code>true</code> if there are more edges to explore.
-		 */
-		private boolean moreEdgesToVisit(final Set<Vertex> visited) {
-			
-			// Consider all edges.
-			for(Edge e : E) {
-				
-				if (visited.contains(e.v1) && visited.contains(e.v2)) {
-					/*
-					 * Since both vertices for this edge have been executed the
-					 * edge is now redundant. Either it was explicitly executed
-					 * or another join path was used which implies the edge by
-					 * transitivity in the join graph.
-					 */
-					continue;
-				}
-
-				/*
-				 * We found a counter example (an edge which has not been
-				 * explored).
-				 */
-				if (log.isTraceEnabled())
-					log.trace("Edge has not been explored: " + e);
-
-				return true;
-
-			}
-
-			// No more edges to explore.
-			return false;
-			
 		}
 
 		/**
-		 * E
-		 * 
-		 * @param limit
-		 * @return
-		 * 
-		 *         TODO How to indicate the set of edges which remain to be
-		 *         explored?
-		 * 
-		 * @throws Exception
-		 */
-		public Path chainSample(final QueryEngine queryEngine, final int limit,
-				final long timeout) throws Exception {
-
-			final Vertex source;
-			{
-				/*
-				 * Find the edge having the minimum estimated cardinality.
-				 */
-				final Edge e = getMinimumCardinalityEdge(executedVertices);
-
-				if (e == null)
-					throw new RuntimeException("No weighted edges.");
-
-				/*
-				 * Decide which vertex of that edge will be the starting point
-				 * for chain sampling (if any).
-				 */
-				if (getEdgeCount(e.v1, executedVertices) > 1
-						|| getEdgeCount(e.v2, executedVertices) > 1) {
-					/*
-					 * There is at least one vertex of that edge which branches.
-					 * Chain sampling will begin with the vertex of that edge
-					 * which has the lower estimated cardinality.
-					 * 
-					 * TODO It could be that the minimum cardinality vertex does
-					 * not branch. What happens for that code path? Do we just
-					 * execute that edge and then reenter chain sampling? If so,
-					 * it would be cleared to test for this condition explicitly
-					 * up front.
-					 */
-					source = e.getMinimumCardinalityVertex();
-				} else {
-					/*
-					 * There is no vertex which branches for that edge. This is
-					 * a stopping condition for chain sampling. The path
-					 * consisting of just that edge is returned and should be
-					 * executed by the caller.
-					 */
-					return new Path(e);
-				}
-
-			}
-			
-			/*
-			 * Setup some data structures for one or more breadth first
-			 * expansions of the set of path(s) which are being sampled. This
-			 * iteration will continue until we reach a stopping condition.
-			 */
-
-			// The set of paths being considered.
-			final List<Path> paths = new LinkedList<Path>();
-
-			{
-				// The current path.
-				final Path p = new Path();
-
-				p.stopVertex = source;
-//				p.inputSample = source.sample;
-				paths.add(p);
-			}
-
-			// initialize the cutoff to the limit used to sample the vertices.
-			int cutoff = limit;
-			long cutoffMillis = timeout;
-
-			final Set<Vertex> unsampled = new LinkedHashSet<Vertex>(
-					executedVertices);
-
-			/*
-			 * One breadth first expansion of the join paths.
-			 * 
-			 * Note: This expands each join path one vertex in each iteration.
-			 * However, different join paths can expand from different vertices.
-			 * 
-			 * For ROX, each join path is expanded from the last vertex which
-			 * was added to that join path so the initial edge for each join
-			 * path strongly determines the edges in the join graph along which
-			 * that join path can grow.
-			 * 
-			 * For bigdata, we can grow the path from any vertex already in the
-			 * path to any vertex which (a) is not yet in the path; and (b) has
-			 * not yet been evaluated.
-			 * 
-			 * This suggests that this loop must consider each of the paths to
-			 * decide whether that path can be extended.
-			 */
-			while (moreEdgesToVisit(unsampled)) {
-
-				// increment the cutoff.
-				cutoff += limit;
-				cutoffMillis += timeout;
-
-				// Consider each path.
-				for(Path p : paths) {
-
-					/*
-					 * The vertex at which we stopped expanding that path the
-					 * last time.
-					 * 
-					 * TODO ROX might have to traverse vertex to vertex along
-					 * edges, but we can execute any edge whose preconditions
-					 * have been satisfied.
-					 */
-					final Vertex v = p.stopVertex;
-					
-					// TODO depends on the notion of the paths remaining.
-					if (getEdgeCount(v, null/*executed+sampled(p)*/) > 0) {
-						/*
-						 * This path branches at this vertex, so remove the old
-						 * path 1st.
-						 */
-						paths.remove(p);
-					}
-					
-					// For each edge which is a neighbor of the vertex [v].
-					final List<Edge> neighbors = null;
-					for(Edge e : neighbors) {
-						// 1. append the edge to the path
-						final Path p1 = p.addEdge(queryEngine, cutoff, e);
-						// 3. add the path to paths.
-						paths.add(p1);
-					}
-					
-				}
-
-				final Path p = getSelectedJoinPath(paths.toArray(new Path[paths.size()]));
-				
-				if(p != null) {
-					
-					return p;
-					
-				}
-
-			} // while(moreEdgesToSample)
-
-			final Path p = getBestAlternativeJoinPath(paths.toArray(new Path[paths.size()]));
-			
-			if(p != null) {
-				
-				return p;
-				
-			}
-			
-			// TODO ROX as given can return null here, which looks like a bug.
-			return null;
-			
-		} // chainSample()
-
-		/**
-		 * Return the path which is selected by the termination criteria
-		 * (looking for a path which dominates the alternatives).
-		 * 
-		 * @param a
-		 *            An array of {@link Path}s to be tested.
-		 * 
-		 * @return The selected path -or- <code>null</code> if none of the paths
-		 *         is selected.
-		 * 
-		 * @todo Should we only explore beneath the diagonal?
-		 * 
-		 * @todo What is the basis for comparing the expected cardinality of
-		 *       join paths? Where one path is not simply the one step extension
-		 *       of the other.
-		 *       <p>
-		 *       This rule might only be able to compare the costs for paths in
-		 *       which one path directly extends another.
-		 *       <p>
-		 *       It is not clear that this code is comparing all paths which
-		 *       need to be compared.
-		 * 
-		 * @todo I have restated the termination rule as follows.
-		 *       <p>
-		 *       If there is a path [p] whose total cost is LTE the cost of
-		 *       executing just its last edge [e], then the path [p] dominates
-		 *       all paths beginning with edge [e]. The dominated paths should
-		 *       be pruned.
-		 *       <p>
-		 *       If there is a path, [p], which is an unordered extension of
-		 *       another path, [p1] (the vertices of p are a superset of the
-		 *       vertices of p1), and the cost of [p] is LTE the cost of [p1],
-		 *       then [p] dominates [p1]. The dominated paths should be pruned.
-		 *       <p>
-		 *       If there is a path, [p], which has the same vertices as a path
-		 *       [p1] and the cost of [p] is LTE the cost of [p1], then [p]
-		 *       dominates (or is equivalent to) [p1]. The path [p1] should be
-		 *       pruned.
-		 * 
-		 *       For a given path length [l], if no paths of length [l] remain
-		 *       then the minimum cost path of length GT [l] may be executed.
-		 * 
-		 * @todo Due to sampling error and the desire to be robust to small
-		 *       differences in the expected cost of an operation, we should
-		 *       only consider two significant digits when comparing estimates
-		 *       of cost. E.g., 990 and 1000 should not be differentiated as
-		 *       they are the same within the sampling error. This should be
-		 *       used to chose all starting vertices which have the same minimum
-		 *       cardinality.
-		 */
-		public Path getSelectedJoinPath(final Path[] a) {
-			final StringBuilder sb = new StringBuilder();
-			final Formatter f = new Formatter(sb);
-			Path p = null;
-			for (int i = 0; i < a.length; i++) {
-				final Path Pi = a[i];
-				if (Pi.sample == null)
-					throw new RuntimeException("Not sampled: " + Pi);
-				for (int j = 0; j < a.length; j++) {
-					if (i == j)
-						continue;
-					final Path Pj = a[j];
-					if (Pj.sample == null)
-						throw new RuntimeException("Not sampled: " + Pj);
-					/*
-					 * FIXME This needs to compare the cost of Pj given path Pi
-					 * against the cost of Pj when executed as a single edge (or
-					 * by any other alternative join path sequence). The choice
-					 * of Pi and Pj is not coherent and the same value of costPj
-					 * is being used for both sides of the equation. 
-					 */
-					final long costPi = Pi.sample.estimatedCardinality;
-					final double sfPi = Pi.sample.f;
-					final long costPj = Pj.sample.estimatedCardinality;
-					final long expectedCombinedCost = costPi
-							+ (long) (sfPi * costPj);
-					/*
-					 * @todo I think that LTE makes more sense here since having
-					 * the same net cardinality for a given edge after
-					 * performing more steps would appear to be worth while.
-					 */
-					final boolean lte = expectedCombinedCost <= costPj;
-					{
-						f
-								.format(
-									...
 
[truncated message content]

[Bigdata-commit] SF.net SVN: bigdata:[3945] branches/QUADS_QUERY_BRANCH/bigdata

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[3945] branches/QUADS_QUERY_BRANCH/bigdata