From: <tho...@us...> - 2011-01-16 00:59:24
|
Revision: 4107 http://bigdata.svn.sourceforge.net/bigdata/?rev=4107&view=rev Author: thompsonbry Date: 2011-01-16 00:59:18 +0000 (Sun, 16 Jan 2011) Log Message: ----------- Change to JoinGraph to let it work with predicates which do not share any variables with other predicates in the join graph. This raises the exploration cost significantly for such queries (e.g., BSBM Q5) but it now produces a good answer (2x faster join path). I've updated the RTO issue (#64) to reflect some additional questions about how to handle predicates which do not share variables and how to handle variables which are "shared" only in the sense that they appear in a constraint on another predicate, but not as part of the access path pattern. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-15 23:53:24 UTC (rev 4106) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-16 00:59:18 UTC (rev 4107) @@ -779,8 +779,9 @@ throw new IllegalArgumentException(); if (shared == null) throw new IllegalArgumentException(); - if (shared.isEmpty()) - throw new IllegalArgumentException(); + // Note: We need to allow edges which do not share variables +// if (shared.isEmpty()) +// throw new IllegalArgumentException(); this.v1 = v1; this.v2 = v2; this.shared = shared; @@ -1782,6 +1783,11 @@ /* * Identify the edges by looking for shared variables among the * predicates. + * + * Note: If a vertex does not share ANY variables then it is paired + * with every other vertex. Such joins will always produce a full + * cross product and they can be taken paired with any of the other + * vertices. */ { @@ -1789,10 +1795,15 @@ for (int i = 0; i < v.length; i++) { + // consider a source vertex. final IPredicate<?> p1 = v[i]; + // #of vertices which share a variable with source vertex. + int nmatched = 0; + for (int j = i + 1; j < v.length; j++) { + // consider a possible target vertex. final IPredicate<?> p2 = v[j]; final Set<IVariable<?>> shared = Rule.getSharedVars(p1, @@ -1800,12 +1811,34 @@ if (shared != null && !shared.isEmpty()) { + // the source and target vertices share var(s). tmp.add(new Edge(V[i], V[j], shared)); + + nmatched++; } } + if (nmatched == 0) { + + /* + * The source vertex does not share any variables. In + * order to explore join paths which include that vertex + * we therefore pair it with each of the other vertices. + */ + for (int j = 0; j < v.length; j++) { + + if (j == i) + continue; + + tmp.add(new Edge(V[i], V[j], + Collections.EMPTY_SET)); + + } + + } + } E = tmp.toArray(new Edge[0]); @@ -2698,8 +2731,10 @@ this.context = context; + // The initial cutoff sampling limit. limit = getLimit(); + // The initial number of edges (1 step paths) to explore. nedges = getNEdges(); if (limit <= 0) Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-15 23:53:24 UTC (rev 4106) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBSBMData.java 2011-01-16 00:59:18 UTC (rev 4107) @@ -100,7 +100,7 @@ private AbstractTripleStore database; /** The initial sampling limit. */ - private final int limit = 1000; + private final int limit = 100; /** The #of edges considered for the initial paths. */ private final int nedges = 2; @@ -113,15 +113,20 @@ * When true, do a warm up run of the plan generated by the static query * optimizer. */ - private final boolean warmUp = true; + private final boolean warmUp = false; /** * The #of times to run each query. Use N GT ONE (1) if you want to converge * onto the hot query performance. */ - private final int ntrials = 1; + private final int ntrials = 3; /** + * When <code>true</code> runs the query in the given order. + */ + private final boolean runGivenOrder = false; + + /** * When <code>true</code> runs the dynamic query optimizer and then evaluates * the generated query plan. */ @@ -501,7 +506,7 @@ final String GIVEN = getName() + " : given ["+i+"] :"; - if (true/* originalOrder */) { + if (runGivenOrder) { runQuery(GIVEN, queryEngine, preds); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |