From: <tho...@us...> - 2013-12-30 13:41:47
|
Revision: 7696 http://bigdata.svn.sourceforge.net/bigdata/?rev=7696&view=rev Author: thompsonbry Date: 2013-12-30 13:41:38 +0000 (Mon, 30 Dec 2013) Log Message: ----------- BOpBase - @Override annotations. QueryLog - pretty print of the RTO computed join path. JGraph - exposed the edge samples to JoinGraph operator. See #64 (RTO). Modified Paths: -------------- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/BOpBase.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/BOpBase.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -175,6 +175,7 @@ } + @Override final public Map<String, Object> annotations() { return Collections.unmodifiableMap(annotations); @@ -234,6 +235,7 @@ } + @Override public BOp get(final int index) { return args[index]; @@ -286,6 +288,7 @@ } + @Override public int arity() { return args.length; @@ -297,6 +300,7 @@ * <p> * Note: This is much less efficient than {@link #argIterator()}. */ + @Override final public List<BOp> args() { return Collections.unmodifiableList(Arrays.asList(args)); @@ -309,6 +313,7 @@ * The iterator does not support removal. (This is more efficient than * #args()). */ + @Override final public Iterator<BOp> argIterator() { return new ArgIterator(); @@ -339,6 +344,7 @@ } // shallow copy + @Override public BOp[] toArray() { final BOp[] a = new BOp[args.length]; @@ -475,6 +481,7 @@ // // } + @Override public Object getProperty(final String name) { return annotations.get(name); @@ -543,6 +550,7 @@ } + @Override public BOpBase setProperty(final String name, final Object value) { final BOpBase tmp = (BOpBase) this.clone(); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/engine/QueryLog.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -51,6 +51,11 @@ import com.bigdata.bop.join.IHashJoinUtility; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoinStats; +import com.bigdata.bop.joinGraph.rto.EdgeSample; +import com.bigdata.bop.joinGraph.rto.JGraph; +import com.bigdata.bop.joinGraph.rto.JoinGraph; +import com.bigdata.bop.joinGraph.rto.Path; +import com.bigdata.bop.joinGraph.rto.PathIds; import com.bigdata.bop.rdf.join.ChunkedMaterializationOp; import com.bigdata.counters.render.XHTMLRenderer; import com.bigdata.rawstore.Bytes; @@ -768,9 +773,9 @@ w.write("<th>evalOrder</th>"); // [0..n-1] if (clusterStats) { w.write("<th>evalContext</th>"); - w.write("<th>controller</th>"); } if (detailedStats) { + w.write("<th>controller</th>"); w.write("<th>bopId</th>"); w.write("<th>predId</th>"); } @@ -996,9 +1001,9 @@ w.write(TDx); if (clusterStats) { w.write(TD); w.write(TDx); // evalContext - w.write(TD); w.write(TDx); // controller? } if (detailedStats) { + w.write(TD); w.write(TDx); // controller w.write(TD); w.write("total"); // bopId w.write(TDx); @@ -1035,12 +1040,12 @@ w.write(TD); w.write(cdata(bop.getEvaluationContext().toString())); w.write(TDx); + } + if (detailedStats) { w.write(TD); w.write(cdata(bop.getProperty(BOp.Annotations.CONTROLLER, - BOp.Annotations.DEFAULT_CONTROLLER).toString())); + BOp.Annotations.DEFAULT_CONTROLLER).toString())); w.write(TDx); - } - if (detailedStats) { w.write(TD); w.write(Integer.toString(bopId)); w.write(TDx); @@ -1074,6 +1079,7 @@ } w.write(TDx); + // summary w.write(TD); if (pred != null) { w.write(cdata(pred.getClass().getSimpleName())); @@ -1153,7 +1159,18 @@ .getProperty(ChunkedMaterializationOp.Annotations.VARS); w.write(cdata(Arrays.toString(vars))); } - w.write(TDx); + if (bop instanceof JoinGraph) { + final Path p = ((JoinGraph) bop).getPath(q); + final Map<PathIds, EdgeSample> samples = ((JoinGraph) bop) + .getSamples(q); + if (p != null && samples != null) { + // Show the RTO discovered join path. + w.write("<pre>"); + w.write(cdata(JGraph.showPath(p, samples))); + w.write("</pre>"); + } + } + w.write(TDx); // end summary /* * Static optimizer metadata. @@ -1432,13 +1449,13 @@ } - private static String cdata(String s) { + private static String cdata(final String s) { return XHTMLRenderer.cdata(s); } - private static String attrib(String s) { + private static String attrib(final String s) { return XHTMLRenderer.attrib(s); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JGraph.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -234,6 +234,7 @@ return Collections.unmodifiableList(Arrays.asList(V)); } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("JoinGraph"); @@ -354,19 +355,88 @@ * @throws IllegalArgumentException * if <i>nedges</i> is non-positive. * @throws Exception + */ + public Path runtimeOptimizer(final QueryEngine queryEngine, + final int limit, final int nedges) throws NoSolutionsException, + Exception { + + /* + * This map is used to associate join path segments (expressed as an + * ordered array of bopIds) with edge sample to avoid redundant effort. + * + * FIXME RTO: HEAP MANAGMENT : This map holds references to the cutoff + * join samples. To ensure that the map has the minimum heap footprint, + * it must be scanned each time we prune the set of active paths and any + * entry which is not a prefix of an active path should be removed. + * + * TODO RTO: MEMORY MANAGER : When an entry is cleared from this map, + * the corresponding allocation in the memory manager (if any) must be + * released. The life cycle of the map needs to be bracketed by a + * try/finally in order to ensure that all allocations associated with + * the map are released no later than when we leave the lexicon scope of + * that clause. + */ + final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); + + return runtimeOptimizer(queryEngine, limit, nedges, edgeSamples); + + } + + /** + * Find a good join path in the data given the join graph. The join path is + * not guaranteed to be the best join path (the search performed by the + * runtime optimizer is not exhaustive) but it should always be a "good" + * join path and may often be the "best" join path. * - * @todo It is possible that this could throw a {@link NoSolutionsException} - * if the cutoff joins do not use a large enough sample to find a join - * path which produces at least one solution (except that no solutions - * for an optional join do not cause the total to fail, nor do no - * solutions for some part of a UNION). + * @param queryEngine + * The query engine. + * @param limit + * The limit for sampling a vertex and the initial limit for + * cutoff join evaluation. + * @param nedges + * The edges in the join graph are sorted in order of increasing + * cardinality and up to <i>nedges</i> of the edges having the + * lowest cardinality are used to form the initial set of join + * paths. For each edge selected to form a join path, the + * starting vertex will be the vertex of that edge having the + * lower cardinality. + * @param sampleType + * Type safe enumeration indicating the algorithm which will be + * used to sample the initial vertices. + * @param edgeSamples + * A map that will be populated with the samples associated with + * each non-pruned join path. This map is used to associate join + * path segments (expressed as an ordered array of bopIds) with + * edge sample to avoid redundant effort. * - * TODO We need to automatically increase the depth of search for - * queries where we have cardinality estimation underflows or punt to - * another method to decide the join order. + * @return The join path identified by the runtime query optimizer as the + * best path given the join graph and the data. + * + * @throws NoSolutionsException + * If there are no solutions for the join graph in the data (the + * query does not have any results). + * @throws IllegalArgumentException + * if <i>queryEngine</i> is <code>null</code>. + * @throws IllegalArgumentException + * if <i>limit</i> is non-positive. + * @throws IllegalArgumentException + * if <i>nedges</i> is non-positive. + * @throws Exception + * + * TODO It is possible that this could throw a + * {@link NoSolutionsException} if the cutoff joins do not use a + * large enough sample to find a join path which produces at + * least one solution (except that no solutions for an optional + * join do not cause the total to fail, nor do no solutions for + * some part of a UNION). + * + * TODO We need to automatically increase the depth of search + * for queries where we have cardinality estimation underflows + * or punt to another method to decide the join order. */ public Path runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final int nedges) + final int limit, final int nedges, + final Map<PathIds, EdgeSample> edgeSamples) throws Exception, NoSolutionsException { if (queryEngine == null) @@ -375,6 +445,8 @@ throw new IllegalArgumentException(); if (nedges <= 0) throw new IllegalArgumentException(); + if (edgeSamples == null) + throw new IllegalArgumentException(); // Setup the join graph. Path[] paths = round0(queryEngine, limit, nedges); @@ -396,24 +468,6 @@ int round = 1; - /* - * This map is used to associate join path segments (expressed as an - * ordered array of bopIds) with edge sample to avoid redundant effort. - * - * FIXME HEAP MANAGMENT : This map holds references to the cutoff join - * samples. To ensure that the map has the minimum heap footprint, it - * must be scanned each time we prune the set of active paths and any - * entry which is not a prefix of an active path should be removed. - * - * TODO MEMORY MANAGER : When an entry is cleared from this map, the - * corresponding allocation in the memory manager (if any) must be - * released. The life cycle of the map needs to be bracketed by a - * try/finally in order to ensure that all allocations associated with - * the map are released no later than when we leave the lexicon scope of - * that clause. - */ - final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); - while (paths.length > 0 && round < nvertices - 1) { /* @@ -1027,6 +1081,7 @@ continue; } + // FIXME RTO: Replace with StaticAnalysis. if (!PartitionedJoinGroup.canJoinUsingConstraints(// x.getPredicates(),// path tVertex.pred,// vertex @@ -1616,7 +1671,8 @@ * @param edgeSamples * A map containing the samples utilized by the {@link Path}. */ - static String showPath(final Path x, final Map<PathIds, EdgeSample> edgeSamples) { + static public String showPath(final Path x, + final Map<PathIds, EdgeSample> edgeSamples) { if (x == null) throw new IllegalArgumentException(); final StringBuilder sb = new StringBuilder(); @@ -1672,20 +1728,20 @@ predId,// NA, "", NA, NA, NA, NA, NA, NA, NA, NA, NA, "", NA, NA);//,NA,NA); } else if(sample instanceof VertexSample) { - /* - * Show the vertex sample for the initial vertex. - * - * Note: we do not store all fields for a vertex sample - * which are stored for an edge sample because so many of - * the values are redundant for a vertex sample. Therefore, - * this sets up local variables which are equivalent to the - * various edge sample columns that we will display. - */ - final long sumRangeCount = sample.estCard; - final long estRead = sample.estCard; - final long tuplesRead = Math.min(sample.estCard, sample.limit); - final long outputCount = Math.min(sample.estCard, sample.limit); - final long adjCard = Math.min(sample.estCard, sample.limit); + /* + * Show the vertex sample for the initial vertex. + * + * Note: we do not store all fields for a vertex sample + * which are stored for an edge sample because so many of + * the values are redundant for a vertex sample. Therefore, + * this sets up local variables which are equivalent to the + * various edge sample columns that we will display. + */ + final long sumRangeCount = sample.estCard; + final long estRead = sample.estCard; + final long tuplesRead = Math.min(sample.estCard, sample.limit); + final long outputCount = Math.min(sample.estCard, sample.limit); + final long adjCard = Math.min(sample.estCard, sample.limit); f.format("% 4d %10s%1s * %10s (%8s %8s %8s %8s %8s %8s) = % 10d % 10d%1s : %10d %10d",// %10d %10s",// predId,// " ",//srcSample.estCard Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/JoinGraph.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -27,6 +27,7 @@ package com.bigdata.bop.joinGraph.rto; +import java.util.LinkedHashMap; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; @@ -132,9 +133,32 @@ String SAMPLE_TYPE = JoinGraph.class.getName() + ".sampleType"; String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); - + } + /** + * Query attribute names for the {@link JoinGraph}. The fully qualified name + * of the attribute is formed by appending the attribute name to the + * "bopId-", where <code>bopId</code> is the value returned by + * {@link BOp#getId()} + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + public interface Attributes { + + /** + * The join path selected by the RTO (output). + */ + String PATH = JoinGraph.class.getName() + ".path"; + + /** + * The samples associated with join path selected by the RTO (output). + */ + String SAMPLES = JoinGraph.class.getName() + ".samples"; + + } + /** * @see Annotations#SELECTED */ @@ -189,7 +213,55 @@ Annotations.DEFAULT_SAMPLE_TYPE)); } - + + /** + * Return the computed join path. + * + * @see Attributes#PATH + */ + public Path getPath(final IRunningQuery q) { + + return (Path) q.getAttributes().get(getId() + "-" + Attributes.PATH); + + } + + /** + * Return the samples associated with the computed join path. + * + * @see Annotations#SAMPLES + */ + @SuppressWarnings("unchecked") + public Map<PathIds, EdgeSample> getSamples(final IRunningQuery q) { + + return (Map<PathIds, EdgeSample>) q.getAttributes().get( + getId() + "-" + Attributes.SAMPLES); + + } + + private void setPath(final IRunningQuery q, final Path p) { + + q.getAttributes().put(getId() + "-" + Attributes.PATH, p); + + } + + private void setSamples(final IRunningQuery q, + final Map<PathIds, EdgeSample> samples) { + + q.getAttributes().put(getId() + "-" + Attributes.SAMPLES, samples); + + } + + /** + * Deep copy constructor. + * + * @param op + */ + public JoinGraph(final JoinGraph op) { + + super(op); + + } + public JoinGraph(final BOp[] args, final NV... anns) { this(args, NV.asMap(anns)); @@ -257,11 +329,11 @@ // private final JGraph g; - final private int limit; - - final private int nedges; - - private final SampleType sampleType; +// final private int limit; +// +// final private int nedges; +// +// final private SampleType sampleType; JoinGraphTask(final BOpContext<IBindingSet> context) { @@ -270,13 +342,13 @@ this.context = context; - // The initial cutoff sampling limit. - limit = getLimit(); - - // The initial number of edges (1 step paths) to explore. - nedges = getNEdges(); - - sampleType = getSampleType(); +// // The initial cutoff sampling limit. +// limit = getLimit(); +// +// // The initial number of edges (1 step paths) to explore. +// nedges = getNEdges(); +// +// sampleType = getSampleType(); // if (limit <= 0) // throw new IllegalArgumentException(); @@ -303,14 +375,38 @@ final long begin = System.nanoTime(); - // Create the join graph. + // Create the join graph. final JGraph g = new JGraph(getVertices(), getConstraints(), - sampleType); + getSampleType()); - // Find the best join path. - final Path p = g.runtimeOptimizer(context.getRunningQuery() - .getQueryEngine(), limit, nedges); + /* + * This map is used to associate join path segments (expressed as an + * ordered array of bopIds) with edge sample to avoid redundant effort. + * + * FIXME RTO: HEAP MANAGMENT : This map holds references to the cutoff + * join samples. To ensure that the map has the minimum heap footprint, + * it must be scanned each time we prune the set of active paths and any + * entry which is not a prefix of an active path should be removed. + * + * TODO RTO: MEMORY MANAGER : When an entry is cleared from this map, + * the corresponding allocation in the memory manager (if any) must be + * released. The life cycle of the map needs to be bracketed by a + * try/finally in order to ensure that all allocations associated with + * the map are released no later than when we leave the lexicon scope of + * that clause. + */ + final Map<PathIds, EdgeSample> edgeSamples = new LinkedHashMap<PathIds, EdgeSample>(); + // Find the best join path. + final Path p = g.runtimeOptimizer(context.getRunningQuery() + .getQueryEngine(), getLimit(), getNEdges(), edgeSamples); + + // Set attribute for the join path result. + setPath(context.getRunningQuery(), p); + + // Set attribute for the join path samples. + setSamples(context.getRunningQuery(), edgeSamples); + final long mark = System.nanoTime(); final long elapsed_queryOptimizer = mark - begin; Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/Path.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -181,6 +181,7 @@ } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Path{["); @@ -751,7 +752,7 @@ if (sourceSample.getSample() == null) throw new IllegalArgumentException(); - // Figure out which constraints attach to each predicate. + // Figure out which constraints attach to each predicate. FIXME RTO Replace with StaticAnalysis. final IConstraint[][] constraintAttachmentArray = PartitionedJoinGroup .getJoinGraphConstraints(path, constraints, null/*knownBound*/, pathIsComplete); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata/src/java/com/bigdata/bop/joinGraph/rto/SampleBase.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -118,7 +118,7 @@ /** * Release the sampled solution set. * - * TODO MEMORY MANAGER : release. + * FIXME RTO : MEMORY MANAGER : release. */ void releaseSample() { @@ -183,6 +183,7 @@ // NOP } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(getClass().getSimpleName()); Modified: branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java =================================================================== --- branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2013-12-30 12:27:34 UTC (rev 7695) +++ branches/BIGDATA_RELEASE_1_3_0/bigdata-rdf/src/java/com/bigdata/rdf/sparql/ast/eval/AST2BOpRTO.java 2013-12-30 13:41:38 UTC (rev 7696) @@ -50,9 +50,7 @@ import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.QueryHints; -import com.bigdata.rdf.sparql.ast.QueryOptimizerEnum; import com.bigdata.rdf.sparql.ast.StatementPatternNode; -import com.bigdata.rdf.sparql.ast.StaticAnalysis; /** * Integration with the Runtime Optimizer (RTO). @@ -122,7 +120,10 @@ final StatementPatternNode sp = (StatementPatternNode) child; final boolean optional = sp.isOptional(); if(optional) { - // TODO Handle optional SPs in joinGraph. + /* + * TODO Handle optional SPs in joinGraph (by ordering them + * in the tail so as to minimize the cost function). + */ break; } @@ -165,8 +166,6 @@ // Something the RTO can handle. sps.add(sp); /* - * TODO Assign predId? - * * FIXME Handle Triples vs Quads, Default vs Named Graph, and * DataSet. This probably means pushing more logic down into * the RTO from AST2BOpJoins. @@ -237,7 +236,7 @@ new NV(BOp.Annotations.BOP_ID, ctx.nextId()),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// - new NV(BOp.Annotations.CONTROLLER, true),// TODO DROP the "CONTROLLER" annotation. The concept is not required. + new NV(BOp.Annotations.CONTROLLER, true),// Drop "CONTROLLER" annotation? // new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// // new NV(PipelineOp.Annotations.LAST_PASS, true),// required new NV(JoinGraph.Annotations.SELECTED, selectVars This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |