From: <tho...@us...> - 2013-12-30 16:02:25
|
Revision: 7701 http://bigdata.svn.sourceforge.net/bigdata/?rev=7701&view=rev Author: thompsonbry Date: 2013-12-30 16:02:18 +0000 (Mon, 30 Dec 2013) Log Message: ----------- Added parallel resampling of the vertices in the join graph. See #64 (RTO) Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 15:36:02 UTC (rev 7700) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 16:02:18 UTC (rev 7701) @@ -799,16 +799,22 @@ } - for (Path x : a) { + // re-sample vertices. + sampleVertices(queryEngine, vertexLimit); + +// for (Map.Entry<Vertex, AtomicInteger> e : vertexLimit.entrySet()) { +// +//// final Vertex v = x.vertices[0]; +//// final int limit = vertexLimit.get(v).intValue(); +// +// final Vertex v = e.getKey(); +// +// final int limit = e.getValue().get(); +// +// v.sample(queryEngine, limit, sampleType); +// +// } - final Vertex v = x.vertices[0]; - - final int limit = vertexLimit.get(v).intValue(); - - v.sample(queryEngine, limit, sampleType); - - } - } /* @@ -830,15 +836,15 @@ int nunderflow = 0; for (Path x : a) { - /* - * Get the new sample limit for the path. - * - * TODO We only need to increase the sample limit starting at the - * vertex where we have a cardinality underflow or variability in - * the cardinality estimate. This is increasing the limit in each - * round of expansion, which means that we are reading more data - * than we really need to read. - */ + /* + * Get the new sample limit for the path. + * + * TODO We only need to increase the sample limit starting at the + * vertex where we have a cardinality underflow or variability in + * the cardinality estimate. This is increasing the limit in each + * round of expansion, which means that we are reading more data + * than we really need to read. + */ final int limit = x.getNewLimit(limitIn); // The cutoff join sample of the one step shorter path segment. @@ -1289,10 +1295,44 @@ */ public void sampleAllVertices(final QueryEngine queryEngine, final int limit) { + final Map<Vertex, AtomicInteger> vertexLimit = new LinkedHashMap<Vertex, AtomicInteger>(); + + for (Vertex v : V) { + + vertexLimit.put(v,new AtomicInteger(limit)); + + } + + sampleVertices(queryEngine, vertexLimit); + + } + + /** + * (Re-)sample a set of vertices. Sampling is done in parallel. + * <p> + * Note: A request to re-sample a vertex is a NOP unless the limit has been + * increased since the last time the vertex was sampled. It is also a NOP if + * the vertex has been fully materialized. + * + * @param queryEngine + * @param vertexLimit + * A map whose keys are the {@link Vertex vertices} to be + * (re-)samples and whose values are the <code>limit</code> to be + * used when sampling the associated vertex. This map is + * read-only so it only needs to be thread-safe for concurrent + * readers. + */ + private void sampleVertices(final QueryEngine queryEngine, + final Map<Vertex, AtomicInteger> vertexLimit) { + // Setup tasks to sample vertices. final List<Callable<Void>> tasks = new LinkedList<Callable<Void>>(); - for (Vertex v : V) { + for (Map.Entry<Vertex, AtomicInteger> e : vertexLimit.entrySet()) { + final Vertex v = e.getKey(); + + final int limit = e.getValue().get(); + tasks.add(new SampleVertexTask(queryEngine, v, limit, sampleType)); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 12:55:21
|
Revision: 7790 http://bigdata.svn.sourceforge.net/bigdata/?rev=7790&view=rev Author: thompsonbry Date: 2014-01-14 12:55:11 +0000 (Tue, 14 Jan 2014) Log Message: ----------- More information on cardinality underflow case. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:45:59 UTC (rev 7789) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:55:11 UTC (rev 7790) @@ -542,11 +542,27 @@ } + /* + * Show information about the paths and the paths that are + * experiencing cardinality underflow. + */ + log.warn("Cardinality estimate underflow - resampling: round=" + round + ", npaths=" + paths.length + ", nunderflow=" + nunderflow + ", limit=" + limit + "\n" + showTable(paths)); + for(Path p : paths) { + final EdgeSample edgeSample; + synchronized(edgeSamples) { + edgeSample = edgeSamples.get(p.getVertexIds()); + } + if (edgeSample.isUnderflow()) { + log.warn("Underflow on path::" + + showPath(p, edgeSamples)); + } + } + } if (nunderflow > 0) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 13:34:28
|
Revision: 7793 http://bigdata.svn.sourceforge.net/bigdata/?rev=7793&view=rev Author: thompsonbry Date: 2014-01-14 13:34:22 +0000 (Tue, 14 Jan 2014) Log Message: ----------- Added logic to select the join path having the minimum sumEstCard if there is cardinality underflow for some of the join paths. This addresses a corner case where the RTO has multiple possible paths. If all paths have underflow, it accepts the first path. Note: This does not eliminate paths for the same sets of vertices where some (or all) paths have underflow during the expansion rounds. Therefore it might do too much work on such queries. Note: We should, perhaps, throw out a NoSolutionsException instead when all paths have underflow. See #64 (RTO). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 12:59:09 UTC (rev 7792) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:34:22 UTC (rev 7793) @@ -498,7 +498,8 @@ final int nvertices = V.length; - int round = 1; + int round = 1; // #of rounds. + int nunderflow = 0; // #of paths with card. underflow (no solutions). while (paths.length > 0 && round < nvertices - 1) { @@ -528,8 +529,6 @@ * limit, e.g., by specifying a MAX_LIMIT annotation on the * JoinGraph operator. */ - int nunderflow = 0; - for (int i = 0; i < 3; i++) { nunderflow = resamplePaths(queryEngine, limit, round, paths, @@ -558,7 +557,7 @@ edgeSample = edgeSamples.get(new PathIds(p)); } if (edgeSample.isUnderflow()) { - log.warn("Underflow on path::" + log.warn("Underflow on path::\n" + showPath(p, edgeSamples)); } } @@ -584,24 +583,84 @@ throw new NoSolutionsException(); } + + /* + * In general, there should be one winner. However, if there are paths + * with cardinality estimate underflow (that is, paths that do not have + * any solutions when sampling the path) then there can be multiple + * solutions. + * + * When this occurs we have a choice. We can either the path with the + * minimum cost (min sumEstCard) or we can take a path that does not + * have a cardinality estimate underflow because we actually have an + * estimate for that path. In principle, the path with the minimum + * estimated cost should be the better choice, but we do not have any + * information about the order of the joins beyond the point where the + * cardinality underflow begins on that path. If we take a path that + * does not have a cardinality estimate underflow, then at least we know + * that the join order has been optimized for the entire path. + */ + + final Path selectedPath; - // Should be one winner. if (paths.length != 1) { - throw new AssertionError("Expected one path but have " - + paths.length + " paths."); + log.warn("Multiple paths exist: npaths=" + paths.length + + ", nunderflow=" + nunderflow + "\n" + + showTable(paths, null/* pruned */, edgeSamples)); + + Path t = null; + for (Path p : paths) { + + if (p.edgeSample.isUnderflow()) { + + /* + * Skip paths with cardinality estimate underflow. They are + * not fully tested in the data since no solutions have made + * it through all of the joins. + */ + + continue; + + } + + if (t == null || p.sumEstCard < t.sumEstCard) { + + // Accept path with the least cost. + t = p; + + } + + } + + if (t == null) { + + /* Arbitrary choice if all paths underflow. + * + * TODO Or throw out NoSolutionsException? + */ + t = paths[0]; + + } + + selectedPath = t; + + } else { + + selectedPath = paths[0]; + } if (log.isInfoEnabled()) { log.info("\n*** Selected join path: " - + Arrays.toString(paths[0].getVertexIds()) + "\n" - + showPath(paths[0], edgeSamples)); + + Arrays.toString(selectedPath.getVertexIds()) + "\n" + + showPath(selectedPath, edgeSamples)); } - return paths[0]; + return selectedPath; } @@ -1941,9 +2000,9 @@ } /** - * Comma delimited table showing the estimated join hit ratio, the estimated - * cardinality, and the set of vertices for each of the specified join - * paths. + * Return a comma delimited table showing the estimated join hit ratio, the + * estimated cardinality, and the set of vertices for each of the specified + * join paths. * * @param a * A set of paths (typically those before pruning). @@ -1954,8 +2013,35 @@ * * @return A table with that data. */ - static public String showTable(final Path[] a,final Path[] pruned) { - final StringBuilder sb = new StringBuilder(); + static public String showTable(final Path[] a, final Path[] pruned) { + + return showTable(a, pruned, null/* edgeSamples */); + + } + + /** + * Return a comma delimited table showing the estimated join hit ratio, the + * estimated cardinality, and the set of vertices for each of the specified + * join paths. + * + * @param a + * A set of paths (typically those before pruning). + * @param pruned + * The set of paths after pruning (those which were retained) + * (optional). When given, the paths which were pruned are marked + * in the table. + * @param edgeSamples + * When non-<code>null</code>, the details will be shown (using + * {@link #showPath(Path, Map)}) for each path that is + * experiencing cardinality estimate underflow (no solutions in + * the data). + * + * @return A table with that data. + */ + static public String showTable(final Path[] a, final Path[] pruned, + final Map<PathIds, EdgeSample> edgeSamples) { + final StringBuilder sb = new StringBuilder(128 * a.length); + final List<Path> underflowPaths = new LinkedList<Path>(); final Formatter f = new Formatter(sb); f.format("%-4s %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = %10s %10s%1s : %10s %10s %10s %10s", "path",// @@ -2030,8 +2116,28 @@ // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() // + ")"); sb.append("\n"); + if(x.edgeSample.isUnderflow()) { + underflowPaths.add(x); + } } + + if (edgeSamples != null && !underflowPaths.isEmpty()) { + + // Show paths with cardinality estimate underflow. + sb.append("\nPaths with cardinality estimate underflow::\n"); + + for (Path p : underflowPaths) { + + sb.append(showPath(p, edgeSamples)); + + sb.append("----\n"); + + } + + } + return sb.toString(); + } /** @@ -2039,14 +2145,17 @@ * join hit ratio for each step in the path. * * @param p - * The join path. + * The join path (required). * @param edgeSamples - * A map containing the samples utilized by the {@link Path}. + * A map containing the samples utilized by the {@link Path} + * (required). */ static public String showPath(final Path x, final Map<PathIds, EdgeSample> edgeSamples) { if (x == null) throw new IllegalArgumentException(); + if (edgeSamples == null) + throw new IllegalArgumentException(); final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2014-01-14 13:57:34
|
Revision: 7794 http://bigdata.svn.sourceforge.net/bigdata/?rev=7794&view=rev Author: thompsonbry Date: 2014-01-14 13:57:27 +0000 (Tue, 14 Jan 2014) Log Message: ----------- removed redundant logging for cardinality underflow. javadoc edits. Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:34:22 UTC (rev 7793) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2014-01-14 13:57:27 UTC (rev 7794) @@ -545,23 +545,12 @@ * Show information about the paths and the paths that are * experiencing cardinality underflow. */ - + log.warn("Cardinality estimate underflow - resampling: round=" + round + ", npaths=" + paths.length + ", nunderflow=" + nunderflow + ", limit=" + limit + "\n" - + showTable(paths)); + + showTable(paths, null/* pruned */, edgeSamples)); - for(Path p : paths) { - final EdgeSample edgeSample; - synchronized(edgeSamples) { - edgeSample = edgeSamples.get(new PathIds(p)); - } - if (edgeSample.isUnderflow()) { - log.warn("Underflow on path::\n" - + showPath(p, edgeSamples)); - } - } - } if (nunderflow > 0) { @@ -1474,6 +1463,11 @@ * * TODO Only sample vertices with an index. * + * TODO If any required join has a vertex with a proven exact + * cardinality of zero, then there are no solutions for the join + * group. Throw a {@link NoSolutionsException} and have the + * caller handle it? + * * TODO Consider other cases where we can avoid sampling a vertex * or an initial edge. * <p> @@ -1486,21 +1480,16 @@ * not share a variable directly and hence will materialize the * full cross product before filtering which is *really* * expensive. - * - * FIXME We need attach any access path filters that are required - * for named graphs or scale-out for the RTO to function in those - * environments. We DO NOT need to attach SPARQL FILTERs here - - * those get applied when we evaluate the cutoff joins from one - * vertex to another. */ - private void sampleAllVertices(final QueryEngine queryEngine, final int limit) { + private void sampleAllVertices(final QueryEngine queryEngine, + final int limit) { final Map<Vertex, AtomicInteger> vertexLimit = new LinkedHashMap<Vertex, AtomicInteger>(); - + for (Vertex v : V) { - vertexLimit.put(v,new AtomicInteger(limit)); - + vertexLimit.put(v, new AtomicInteger(limit)); + } sampleVertices(queryEngine, vertexLimit); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |