From: <tho...@us...> - 2010-11-17 21:37:40
|
Revision: 3953 http://bigdata.svn.sourceforge.net/bigdata/?rev=3953&view=rev Author: thompsonbry Date: 2010-11-17 21:37:33 +0000 (Wed, 17 Nov 2010) Log Message: ----------- JoinGraph - javadoc identifying some issues from a call with MikeP. BOpStats - now tracks the #of tasks which have been executed for a given operator. QueryLog - added the opCount column and some code reorganization. QueryEngineTestAnnotations - javadoc. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-17 17:32:16 UTC (rev 3952) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-17 21:37:33 UTC (rev 3953) @@ -123,7 +123,38 @@ * Since the join graph is fed the vertices (APs), it does not have access * to the annotated joins so we need to generated appropriately annotated * joins when sampling an edge and when evaluation a subquery. + * <p> + * One solution would be to always use the unpartitioned views of the + * indices for the runtime query optimizer, which is how we are estimating + * the range counts of the access paths right now. [Note that the static + * query optimizer ignores named and default graphs, while the runtime + * query optimizer SHOULD pay attention to these things and exploit their + * conditional selectivity for the query plan.] * + * @todo When there are optional join graphs, are we going to handle that by + * materializing a sample (or all) of the joins feeding that join graph + * and then apply the runtime optimizer to the optional join graph, + * getting out a sample to feed onto any downstream join graph? + * + * @todo When we run into a cardinality estimation underflow (the expected + * cardinality goes to zero) we could double the sample size for just + * those join paths which hit a zero estimated cardinality and re-run them + * within the round. This would imply that we keep per join path limits. + * The vertex and edge samples are already aware of the limit at which + * they were last sampled so this should not cause any problems there. + * + * @todo When comparing choices among join paths having fully bound tails where + * the estimated cardinality has also gone to zero, we should prefer to + * evaluate vertices in the tail with better index locality first. For + * example, if one vertex had one variable in the original plan while + * another had two variables, then solutions which reach the 2-var vertex + * could be spread out over a much wider range of the selected index than + * those which reach the 1-var vertex. [In order to support this, we would + * need a means to indicate that a fully bound access path should use an + * index specified by the query optimizer rather than the primary index + * for the relation. In addition, this suggests that we should keep bloom + * filters for more than just the SPO(C) index in scale-out.] + * * @todo Examine behavior when we do not have perfect covering indices. This * will mean that some vertices can not be sampled using an index and that * estimation of their cardinality will have to await the estimation of Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-11-17 17:32:16 UTC (rev 3952) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-11-17 21:37:33 UTC (rev 3953) @@ -54,6 +54,13 @@ */ final public CAT elapsed = new CAT(); + /** + * The #of instances of a given operator which have been created for a given + * query. This provides interesting information about the #of task instances + * for each operator which were required to execute a query. + */ + final public CAT opCount = new CAT(); + /** * #of chunks in. */ @@ -83,7 +90,9 @@ * Constructor. */ public BOpStats() { - + + opCount.increment(); + } /** @@ -98,21 +107,18 @@ return; } elapsed.add(o.elapsed.get()); + opCount.add(o.opCount.get()); chunksIn.add(o.chunksIn.get()); unitsIn.add(o.unitsIn.get()); unitsOut.add(o.unitsOut.get()); chunksOut.add(o.chunksOut.get()); -// chunksIn.addAndGet(o.chunksIn.get()); -// unitsIn.addAndGet(o.unitsIn.get()); -// unitsOut.addAndGet(o.unitsOut.get()); -// chunksOut.addAndGet(o.chunksOut.get()); } - public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(super.toString()); sb.append("{elapsed=" + elapsed.get()); + sb.append(",opCount=" + opCount.get()); sb.append(",chunksIn=" + chunksIn.get()); sb.append(",unitsIn=" + unitsIn.get()); sb.append(",chunksOut=" + chunksOut.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-11-17 17:32:16 UTC (rev 3952) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java 2010-11-17 21:37:33 UTC (rev 3953) @@ -69,9 +69,20 @@ boolean DEFAULT_ONE_MESSAGE_PER_CHUNK = false; + /** + * This option may be used to place an optional limit on the #of concurrent + * tasks which may run for the same (bopId,shardId) for a given query. The + * query is guaranteed to make progress as long as this is some positive + * integer. Limiting this value can limit the concurrency with which certain + * operators are evaluated and that can have a negative effect on the + * throughput for a given query. + */ String MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD = QueryEngineTestAnnotations.class.getName() + ".maxConcurrentTasksPerOperatorAndShard"; + /** + * The default is essentially unlimited. + */ int DEFAULT_MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD = Integer.MAX_VALUE; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-17 17:32:16 UTC (rev 3952) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2010-11-17 21:37:33 UTC (rev 3953) @@ -56,6 +56,10 @@ .getLogger(QueryLog.class); static { + logTableHeader(); + } + + static public void logTableHeader() { if(log.isInfoEnabled()) log.info(QueryLog.getTableHeader()); } @@ -74,27 +78,10 @@ try { -// if (log.isDebugEnabled()) { + logDetailRows(q); - /* - * Detail row for each operator in the query. - */ - final Integer[] order = BOpUtility.getEvaluationOrder(q - .getQuery()); - - int orderIndex = 0; - for (Integer bopId : order) { - log - .info(getTableRow(q, orderIndex, bopId, false/* summary */)); - orderIndex++; - } - -// } - - // summary row. - log.info(getTableRow(q, -1/* orderIndex */, q.getQuery().getId(), - true/* summary */)); - + logSummaryRow(q); + } catch (RuntimeException t) { log.error(t,t); @@ -105,6 +92,34 @@ } + /** + * Log a detail row for each operator in the query. + */ + static private void logDetailRows(final IRunningQuery q) { + + final Integer[] order = BOpUtility.getEvaluationOrder(q.getQuery()); + + int orderIndex = 0; + + for (Integer bopId : order) { + + log.info(getTableRow(q, orderIndex, bopId, false/* summary */)); + + orderIndex++; + + } + + } + + /** + * Log a summary row for the query. + */ + static private void logSummaryRow(final IRunningQuery q) { + + log.info(getTableRow(q, -1/* orderIndex */, q.getQuery().getId(), true/* summary */)); + + } + static private String getTableHeader() { final StringBuilder sb = new StringBuilder(); @@ -135,6 +150,7 @@ // dynamics (aggregated for totals as well). sb.append("\tfanIO"); sb.append("\tsumMillis"); // cumulative milliseconds for eval of this operator. + sb.append("\topCount"); // cumulative #of invocations of tasks for this operator. sb.append("\tchunksIn"); sb.append("\tunitsIn"); sb.append("\tchunksOut"); @@ -305,6 +321,8 @@ sb.append('\t'); sb.append(stats.elapsed.get()); sb.append('\t'); + sb.append(stats.opCount.get()); + sb.append('\t'); sb.append(stats.chunksIn.get()); sb.append('\t'); sb.append(stats.unitsIn.get()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |