From: <tho...@us...> - 2013-12-30 15:02:32
|
Revision: 7698 http://bigdata.svn.sourceforge.net/bigdata/?rev=7698&view=rev Author: thompsonbry Date: 2013-12-30 15:02:24 +0000 (Mon, 30 Dec 2013) Log Message: ----------- - Adjusted defaults for the RTO in QueryHints. - Added the limit, sampleType, and nedges to the explain view for the JoinGraph operator. - Added parallel sampling of vertices to the JGraph. See #64 Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2013-12-30 14:28:09 UTC (rev 7697) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2013-12-30 15:02:24 UTC (rev 7698) @@ -1162,9 +1162,13 @@ w.write(cdata(Arrays.toString(vars))); } if (bop instanceof JoinGraph) { - final Path p = ((JoinGraph) bop).getPath(q); - final Map<PathIds, EdgeSample> samples = ((JoinGraph) bop) + final JoinGraph t = ((JoinGraph) bop); + final Path p = t.getPath(q); + final Map<PathIds, EdgeSample> samples = t .getSamples(q); + w.write(cdata("sampleType=" + t.getSampleType())); + w.write(cdata(", limit=" + t.getLimit())); + w.write(cdata(", nedges=" + t.getNEdges())); if (p != null && samples != null) { // Show the RTO discovered join path. w.write("<pre>"); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 14:28:09 UTC (rev 7697) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 15:02:24 UTC (rev 7698) @@ -37,6 +37,9 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; @@ -50,6 +53,7 @@ import com.bigdata.bop.joinGraph.NoSolutionsException; import com.bigdata.bop.joinGraph.PartitionedJoinGroup; import com.bigdata.bop.rdf.join.DataSetJoin; +import com.bigdata.util.concurrent.ExecutionExceptions; /** * A runtime optimizer for a join graph. The {@link JoinGraph} bears some @@ -1225,19 +1229,91 @@ * not share a variable directly and hence will materialize the * full cross product before filtering which is *really* * expensive. - * */ public void sampleAllVertices(final QueryEngine queryEngine, final int limit) { + // Setup tasks to sample vertices. + final List<Callable<Void>> tasks = new LinkedList<Callable<Void>>(); for (Vertex v : V) { - v.sample(queryEngine, limit, sampleType); + tasks.add(new SampleVertexTask(queryEngine, v, limit, sampleType)); } + // Sample vertices in parallel. + final List<Future<Void>> futures; + try { + + futures = queryEngine.getIndexManager().getExecutorService() + .invokeAll(tasks); + + } catch (InterruptedException e) { + // propagate interrupt. + Thread.currentThread().interrupt(); + return; + } + + // Check futures for errors. + final List<Throwable> causes = new LinkedList<Throwable>(); + for (Future<Void> f : futures) { + try { + f.get(); + } catch (InterruptedException e) { + log.error(e); + causes.add(e); + } catch (ExecutionException e) { + log.error(e); + causes.add(e); + } + } + + /* + * If there were any errors, then throw an exception listing them. + */ + if (!causes.isEmpty()) { + // Throw exception back to the leader. + if (causes.size() == 1) + throw new RuntimeException(causes.get(0)); + throw new RuntimeException("nerrors=" + causes.size(), + new ExecutionExceptions(causes)); + } + } /** + * Task to sample a vertex. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + static private class SampleVertexTask implements Callable<Void> { + + private final QueryEngine queryEngine; + private final Vertex v; + private final int limit; + private final SampleType sampleType; + + public SampleVertexTask(final QueryEngine queryEngine, final Vertex v, + final int limit, final SampleType sampleType) { + + this.queryEngine = queryEngine; + this.v = v; + this.limit = limit; + this.sampleType = sampleType; + + } + + @Override + public Void call() throws Exception { + + v.sample(queryEngine, limit, sampleType); + + return null; + } + + } + + /** * Estimate the cardinality of each edge. This is only invoked by * {@link #round0(QueryEngine, int, int)} when it is trying to select the * minimum cardinality edges which it will use to create the initial set of @@ -1362,9 +1438,9 @@ paths.add(p); - } + } // next other vertex. - } + } // next vertex return paths.toArray(new Path[paths.size()]); @@ -1393,7 +1469,7 @@ */ public Path[] pruneJoinPaths(final Path[] a, final Map<PathIds, EdgeSample> edgeSamples) { - final boolean neverPruneUnderflow = true; + final boolean neverPruneUnderflow = true; /* * Find the length of the longest path(s). All shorter paths are * dropped in each round. Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2013-12-30 14:28:09 UTC (rev 7697) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java 2013-12-30 15:02:24 UTC (rev 7698) @@ -111,10 +111,18 @@ * evaluation (default {@value #DEFAULT_RTO_LIMIT}). A larger limit and a * random sample will provide a more accurate estimate of the cost of the * join paths but are increase the runtime overhead of the RTO optimizer. + * Smaller value can lead to underflow in the cardinality estimates of the + * cutoff joins resulting in a longer execution time for the RTO since more + * paths may be explored or the explored paths must be deepened in order to + * differentiate their costs. Values corresponding to up to the expected + * number of triples on an index page should have the same IO cost since + * there will be a single page read for the vertex and the output of the + * join will be cutoff once the desired number of join results has been + * produced. */ String RTO_LIMIT = "RTO-limit"; - int DEFAULT_RTO_LIMIT = 20; + int DEFAULT_RTO_LIMIT = 100; /** * The <i>nedges</i> edges of the join graph having the lowest cardinality @@ -124,11 +132,14 @@ * <i>nedges</i> of those edges having the lowest cardinality are used to * form the initial set of join paths. For each edge selected to form a join * path, the starting vertex will be the vertex of that edge having the - * lower cardinality. + * lower cardinality. If ONE (1), then only those join paths that start with + * the two vertices having the lowest cardinality will be explored (this was + * the published behavior for ROX). When greater than ONE, a broader search + * of the join paths will be carried out. */ String RTO_NEDGES = "RTO-nedges"; - int DEFAULT_RTO_NEDGES = 2; + int DEFAULT_RTO_NEDGES = 1; /** * Query hint sets the optimistic threshold for the static join order This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |