From: <tho...@us...> - 2010-11-12 17:28:16
|
Revision: 3943 http://bigdata.svn.sourceforge.net/bigdata/?rev=3943&view=rev Author: thompsonbry Date: 2010-11-12 17:28:10 +0000 (Fri, 12 Nov 2010) Log Message: ----------- more on runtime query optimizer Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -1184,6 +1184,35 @@ final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); return a; } + + /** + * Return <code>true</code> if this path begins with the given path. + * + * @param p + * The given path. + * + * @return <code>true</code> if this path begins with the given path. + */ + public boolean beginsWith(final Path p) { + + if (p == null) + throw new IllegalArgumentException(); + + if (p.edges.size() > edges.size()) { + // Proven false since the caller's path is longer. + return false; + } + + for (int i = 0; i < p.edges.size(); i++) { + final Edge eSelf = edges.get(i); + final Edge eOther = p.edges.get(i); + if (eSelf != eOther) { + return false; + } + } + + return true; + } /** * Add an edge to a path, computing the estimated cardinality of the new @@ -1498,6 +1527,78 @@ } /** + * Do one breadth first expansion. + * + * @param queryEngine + * @param limit + * @param round + * @param a + * @return + * @throws Exception + */ + final public Path[] expand(final QueryEngine queryEngine, final int limit, + final int round, final Path[] a) throws Exception { + + final List<Path> tmp = new LinkedList<Path>(); + + // First, copy all existing paths. + for (Path x : a) { + tmp.add(x); + } + + // Then expand each path. + for (Path x : a) { + + if (x.edges.size() < round) { + + continue; + + } + + final Set<Vertex> used = new LinkedHashSet<Vertex>(); + + for (Edge edgeInGraph : E) { + + // Figure out which vertices are already part of this path. + final boolean v1Found = x.contains(edgeInGraph.v1); + final boolean v2Found = x.contains(edgeInGraph.v2); + + if (!v1Found && !v2Found) { + // Edge is not connected to this path. + continue; + } + + if (v1Found && v2Found) { + // Edge is already present in this path. + continue; + } + + final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; + + if(used.contains(newVertex)) { + // Vertex already used to extend this path. + continue; + } + + // add the new vertex to the set of used vertices. + used.add(newVertex); + + // Extend the path to the new vertex. + final Path p = x.addEdge(queryEngine, limit * round, + edgeInGraph); + + // Add to the set of paths for this round. + tmp.add(p); + + } + + } + + return tmp.toArray(new Path[tmp.size()]); + + } + + /** * Return the {@link Vertex} whose {@link IPredicate} is associated with * the given {@link BOp.Annotations#BOP_ID}. * @@ -2209,12 +2310,16 @@ final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); + if (pruned.contains(Pi)) + continue; for (int j = 0; j < a.length; j++) { if (i == j) continue; final Path Pj = a[j]; if (Pj.sample == null) throw new RuntimeException("Not sampled: " + Pj); + if (pruned.contains(Pj)) + continue; final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); if(!isPiSuperSet) { // Can not directly compare these join paths. @@ -2223,23 +2328,35 @@ final long costPi = Pi.cumulativeEstimatedCardinality; final long costPj = Pj.cumulativeEstimatedCardinality; final boolean lte = costPi <= costPj; + List<Integer> prunedByThisPath = null; + if (lte) { + prunedByThisPath = new LinkedList<Integer>(); + if (pruned.add(Pj)) + prunedByThisPath.add(j); + for (int k = 0; k < a.length; k++) { + final Path x = a[k]; + if (x.beginsWith(Pj)) { + if (pruned.add(x)) + prunedByThisPath.add(k); + } + } + } { f .format( "Comparing: P[%2d] with P[%2d] : %10d %2s %10d %s", i, j, costPi, (lte ? "<=" : ">"), - costPj, lte ? " **prune P["+j+"]**" : ""); + costPj, lte ? " *** pruned " + + prunedByThisPath : ""); System.err.println(sb); sb.setLength(0); } - if (lte) { - pruned.add(Pj); - } } // Pj } // Pi - System.err.println("Pruned "+pruned.size()+" of out "+a.length+" paths"); + System.err.println("Pruned " + pruned.size() + " of out " + + a.length + " paths"); final Set<Path> keep = new LinkedHashSet<Path>(); - for(Path p : a) { + for (Path p : a) { if(pruned.contains(p)) continue; keep.add(p); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -641,47 +641,116 @@ // } /* - * The set of one step extensions of those paths. - * - * @todo build this programmatically by finding the set of edges - * branching from the existing paths to a vertex not already part of - * the existing paths and having a total set of vertices which is - * distinct from all other paths already generated in this breadth - * first expansion of the search space. (ROX further constrains the - * new paths to extend the stop vertex of the path from which they - * are derived.) - * - * @todo always label edges by either minimum bopId or minimum - * estimated cardinality (with tie broken by bopId)? When extending - * a path in which more than one edge can reach the target vertex, - * always chose the edge having the source vertex with the minimum - * cardinality? + * t1 */ - final Path[] paths_t1 = new Path[] {// - // t0 - p0, // (2,3) - p1, // (2,4) - p2, // (4,1) - // t1 - p0.addEdge(queryEngine, limit, g.getEdge(v2, v4)), // aka (v3,v4) - p0.addEdge(queryEngine, limit, g.getEdge(v3, v0)), // - p0.addEdge(queryEngine, limit, g.getEdge(v3, v5)), // - // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v1)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v5)), // - // - p2.addEdge(queryEngine, limit, g.getEdge(v1, v5)), // aka (4,5) - p2.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p2.addEdge(queryEngine, limit, g.getEdge(v4, v2)), // + +// /* +// * The set of one step extensions of those paths. +// * +// * @todo build this programmatically by finding the set of edges +// * branching from the existing paths to a vertex not already part of +// * the existing paths and having a total set of vertices which is +// * distinct from all other paths already generated in this breadth +// * first expansion of the search space. (ROX further constrains the +// * new paths to extend the stop vertex of the path from which they +// * are derived.) +// * +// * @todo always label edges by either minimum bopId or minimum +// * estimated cardinality (with tie broken by bopId)? When extending +// * a path in which more than one edge can reach the target vertex, +// * always chose the edge having the source vertex with the minimum +// * cardinality? +// */ +// final Path[] paths_t1 = new Path[] {// +// // t0 +// p0, // (2,3) +// p1, // (2,4) +// p2, // (4,1) +// // t1 +// p0.addEdge(queryEngine, limit*2, g.getEdge(v2, v4)), // aka (v3,v4) +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v0)), // +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v5)), // +// // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v1)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v5)), // +// // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v1, v5)), // aka (4,5) +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v2)), // +// + /* + * *** Paths @ t1 - }; +p[ 1] 0.69, 68931 168831, [ 2 3 0 ] +p[ 2] 1.00, 99900 199800, [ 2 3 4 ] +p[ 3] 1.00, 99900 199800, [ 2 3 5 ] +p[ 5] 1.00, 999 1998, [ 2 4 1 ] +p[ 6] 100.00, 99900 100899, [ 2 4 3 ] +p[ 7] 20.00, 19980 20979, [ 2 4 5 ] +p[ 9] 16.67, 40650 43089, [ 1 4 5 ] +p[10] 1.00, 2439 4878, [ 1 4 2 ] +p[11] 5.00, 12195 14634, [ 1 4 3 ] + */ +// }; + int round = 1; + + final Path[] paths_t1 = g.expand(queryEngine, limit, round, paths_t0); + System.err.println("\n*** Paths @ t1\n" + JoinGraph.showTable(paths_t1)); - g.pruneJoinPaths(paths_t1); + final Path[] paths_t1_pruned = g.pruneJoinPaths(paths_t1); + + System.err.println("\n*** Paths @ t1 (after pruning)\n" + + JoinGraph.showTable(paths_t1_pruned)); + + /* + * t2 + */ + final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1_pruned); + + System.err.println("\n*** Paths @ t2\n" + + JoinGraph.showTable(paths_t2)); + + final Path[] paths_t2_pruned = g.pruneJoinPaths(paths_t2); + + System.err.println("\n*** Paths @ t2 (after pruning)\n" + + JoinGraph.showTable(paths_t2_pruned)); + + +/* +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (0 5) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (2 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 5) + +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 3) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (1 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (2 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (3 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (4 5) + +p[ 6] 1.00, 999 1998, (2 4) (1 4) (1 5) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (2 3) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (3 4) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (4 5) + +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (0 3) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (1 4) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (3 5) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (4 5) + +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (0 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (2 3) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 5) */ + // final Path selected_t1 = g.getSelectedJoinPath(paths_t1); // // if (selected_t1 != null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |