[Bigdata-commit] SF.net SVN: bigdata:[7698] branches/BIGDATA_RELEASE_1_3_0

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 7698
          http://bigdata.svn.sourceforge.net/bigdata/?rev=7698&view=rev
Author:   thompsonbry
Date:     2013-12-30 15:02:24 +0000 (Mon, 30 Dec 2013)
Log Message:
-----------
- Adjusted defaults for the RTO in QueryHints.

- Added the limit, sampleType, and nedges to the explain view for the JoinGraph operator.

- Added parallel sampling of vertices to the JGraph.

See #64

Modified Paths:
--------------
    branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java
    branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java
    branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java
===================================================================

--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java	2013-12-30 14:28:09 UTC (rev 7697)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java	2013-12-30 15:02:24 UTC (rev 7698)
@@ -1162,9 +1162,13 @@
                 w.write(cdata(Arrays.toString(vars)));
             }
             if (bop instanceof JoinGraph) {
-                final Path p = ((JoinGraph) bop).getPath(q);
-                final Map<PathIds, EdgeSample> samples = ((JoinGraph) bop)
+                final JoinGraph t = ((JoinGraph) bop);
+                final Path p = t.getPath(q);
+                final Map<PathIds, EdgeSample> samples = t
                         .getSamples(q);
+                w.write(cdata("sampleType=" + t.getSampleType()));
+                w.write(cdata(", limit=" + t.getLimit()));
+                w.write(cdata(", nedges=" + t.getNEdges()));
                 if (p != null && samples != null) {
                     // Show the RTO discovered join path.
                     w.write("<pre>");

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java	2013-12-30 14:28:09 UTC (rev 7697)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java	2013-12-30 15:02:24 UTC (rev 7698)
@@ -37,6 +37,9 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.log4j.Logger;
@@ -50,6 +53,7 @@
 import com.bigdata.bop.joinGraph.NoSolutionsException;
 import com.bigdata.bop.joinGraph.PartitionedJoinGroup;
 import com.bigdata.bop.rdf.join.DataSetJoin;
+import com.bigdata.util.concurrent.ExecutionExceptions;
 
 /**
  * A runtime optimizer for a join graph. The {@link JoinGraph} bears some
@@ -1225,19 +1229,91 @@
 	 *            not share a variable directly and hence will materialize the
 	 *            full cross product before filtering which is *really*
 	 *            expensive.
-	 * 
 	 */
     public void sampleAllVertices(final QueryEngine queryEngine, final int limit) {
 
+        // Setup tasks to sample vertices.
+        final List<Callable<Void>> tasks = new LinkedList<Callable<Void>>();
         for (Vertex v : V) {
 
-            v.sample(queryEngine, limit, sampleType);
+            tasks.add(new SampleVertexTask(queryEngine, v, limit, sampleType));
 
         }
 
+        // Sample vertices in parallel.
+        final List<Future<Void>> futures;
+        try {
+
+            futures = queryEngine.getIndexManager().getExecutorService()
+                    .invokeAll(tasks);
+
+        } catch (InterruptedException e) {
+            // propagate interrupt.
+            Thread.currentThread().interrupt();
+            return;
+        }
+
+        // Check futures for errors.
+        final List<Throwable> causes = new LinkedList<Throwable>();
+        for (Future<Void> f : futures) {
+            try {
+                f.get();
+            } catch (InterruptedException e) {
+                log.error(e);
+                causes.add(e);
+            } catch (ExecutionException e) {
+                log.error(e);
+                causes.add(e);
+            }
+        }
+
+        /*
+         * If there were any errors, then throw an exception listing them.
+         */
+        if (!causes.isEmpty()) {
+            // Throw exception back to the leader.
+            if (causes.size() == 1)
+                throw new RuntimeException(causes.get(0));
+            throw new RuntimeException("nerrors=" + causes.size(),
+                    new ExecutionExceptions(causes));
+        }
+
     }
 
     /**
+     * Task to sample a vertex.
+     * 
+     * @author <a href="mailto:tho...@us...">Bryan
+     *         Thompson</a>
+     */
+    static private class SampleVertexTask implements Callable<Void> {
+
+        private final QueryEngine queryEngine;
+        private final Vertex v;
+        private final int limit;
+        private final SampleType sampleType;
+        
+        public SampleVertexTask(final QueryEngine queryEngine, final Vertex v,
+                final int limit, final SampleType sampleType) {
+
+            this.queryEngine = queryEngine;
+            this.v = v;
+            this.limit = limit;
+            this.sampleType = sampleType;
+
+        }
+        
+        @Override
+        public Void call() throws Exception {
+
+            v.sample(queryEngine, limit, sampleType);
+
+            return null;
+        }
+        
+    }
+    
+    /**
      * Estimate the cardinality of each edge. This is only invoked by
      * {@link #round0(QueryEngine, int, int)} when it is trying to select the
      * minimum cardinality edges which it will use to create the initial set of
@@ -1362,9 +1438,9 @@
 
 				paths.add(p);
 
-			}
+			} // next other vertex.
         
-        }
+        } // next vertex
         
         return paths.toArray(new Path[paths.size()]);
 
@@ -1393,7 +1469,7 @@
      */
     public Path[] pruneJoinPaths(final Path[] a,
             final Map<PathIds, EdgeSample> edgeSamples) {
-    	final boolean neverPruneUnderflow = true;
+        final boolean neverPruneUnderflow = true;
         /*
          * Find the length of the longest path(s). All shorter paths are
          * dropped in each round.

Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java
===================================================================
--- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java	2013-12-30 14:28:09 UTC (rev 7697)
+++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/QueryHints.java	2013-12-30 15:02:24 UTC (rev 7698)
@@ -111,10 +111,18 @@
      * evaluation (default {@value #DEFAULT_RTO_LIMIT}). A larger limit and a
      * random sample will provide a more accurate estimate of the cost of the
      * join paths but are increase the runtime overhead of the RTO optimizer.
+     * Smaller value can lead to underflow in the cardinality estimates of the
+     * cutoff joins resulting in a longer execution time for the RTO since more
+     * paths may be explored or the explored paths must be deepened in order to
+     * differentiate their costs. Values corresponding to up to the expected
+     * number of triples on an index page should have the same IO cost since
+     * there will be a single page read for the vertex and the output of the
+     * join will be cutoff once the desired number of join results has been
+     * produced.
      */
     String RTO_LIMIT = "RTO-limit";
 
-    int DEFAULT_RTO_LIMIT = 20;
+    int DEFAULT_RTO_LIMIT = 100;
 
     /**
      * The <i>nedges</i> edges of the join graph having the lowest cardinality
@@ -124,11 +132,14 @@
      * <i>nedges</i> of those edges having the lowest cardinality are used to
      * form the initial set of join paths. For each edge selected to form a join
      * path, the starting vertex will be the vertex of that edge having the
-     * lower cardinality.
+     * lower cardinality. If ONE (1), then only those join paths that start with
+     * the two vertices having the lowest cardinality will be explored (this was
+     * the published behavior for ROX). When greater than ONE, a broader search
+     * of the join paths will be carried out.
      */
     String RTO_NEDGES = "RTO-nedges";
 
-    int DEFAULT_RTO_NEDGES = 2;
+    int DEFAULT_RTO_NEDGES = 1;
 
     /**
      * Query hint sets the optimistic threshold for the static join order

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





[Bigdata-commit] SF.net SVN: bigdata:[7698] branches/BIGDATA_RELEASE_1_3_0

Fast, scalable, robust graph database platform

[Bigdata-commit] SF.net SVN: bigdata:[7698] branches/BIGDATA_RELEASE_1_3_0