This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <tho...@us...> - 2010-11-15 18:13:15
|
Revision: 3949 http://bigdata.svn.sourceforge.net/bigdata/?rev=3949&view=rev Author: thompsonbry Date: 2010-11-15 18:13:06 +0000 (Mon, 15 Nov 2010) Log Message: ----------- More work on the runtime query optimizer. It now converges onto a single solution. Modified PipelineJoin to support a cutoff (LIMIT annotation). Modified PipelineJoin to accurately track the counters required to compute the join hit ratio. Modified PipelineJoin to quietly ignore empty binding set chunks. Modified the QueryEngine to track recently terminated queries in a 'doneQueries' LRU. This is used to detect events which arrive late for a recently terminated query. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -192,13 +192,16 @@ super(runningQuery.getFederation(), runningQuery.getIndexManager()); - this.runningQuery = runningQuery; if (stats == null) throw new IllegalArgumentException(); + if (source == null) throw new IllegalArgumentException(); + if (sink == null) throw new IllegalArgumentException(); + + this.runningQuery = runningQuery; this.partitionId = partitionId; this.stats = stats; this.source = source; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -68,6 +68,23 @@ String ALT_SINK_REF = PipelineOp.class.getName() + ".altSinkRef"; + /** + * The value reported by {@link PipelineOp#isSharedState()} (default + * {@value #DEFAULT_SHARED_STATE}). This may be overridden to + * <code>true</code> to have instances operators evaluated in the same + * query engine context share the same {@link BOpStats} instance. + * <p> + * Note: {@link BOp#getEvaluationContext()} MUST be overridden to return + * {@link BOpEvaluationContext#CONTROLLER} if this annotation is + * overridden to <code>true</code>. + * <p> + * When <code>true</code>, the {@link QueryEngine} will impose the + * necessary constraints when the operator is evaluated. + */ + String SHARED_STATE = PipelineOp.class.getName() + ".sharedState"; + + boolean DEFAULT_SHARED_STATE = false; + } /** @@ -135,18 +152,14 @@ /** * Return <code>true</code> iff {@link #newStats()} must be shared across * all invocations of {@link #eval(BOpContext)} for this operator for a - * given query (default <code>false</code>). - * <p> - * Note: {@link BOp#getEvaluationContext()} MUST be overridden to return - * {@link BOpEvaluationContext#CONTROLLER} if this method is overridden to - * return <code>true</code>. - * <p> - * When <code>true</code>, the {@link QueryEngine} will impose the necessary - * constraints when the operator is evaluated. + * given query. + * + * @see Annotations#SHARED_STATE */ public boolean isSharedState() { - - return false; + + return getProperty(Annotations.SHARED_STATE, + Annotations.DEFAULT_SHARED_STATE); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -216,7 +216,7 @@ final long rangeCount = accessPath.rangeCount(false/* exact */); - if (limit > rangeCount) { + if (limit >= rangeCount) { /* * The sample will contain everything in the access path. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -50,21 +50,20 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.Var; import com.bigdata.bop.ap.SampleIndex; import com.bigdata.bop.bindingSet.HashBindingSet; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.RunningQuery; import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; +import com.bigdata.bop.rdf.join.DataSetJoin; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; @@ -91,6 +90,41 @@ * its costs. For example, by pruning the search, by recognizing when the * query is simple enough to execute directly, by recognizing when we have * already materialized the answer to the query, etc. + * + * @todo Cumulative estimated cardinality is an estimate of the work to be done. + * However, the actual cost of a join depends on whether we will use + * nested index subquery or a hash join and the cost of that operation on + * the database. There could be counter examples where the cost of the + * hash join with a range scan using the unbound variable is LT the nested + * index subquery. For those cases, we will do the same amount of IO on + * the hash join but there will still be a lower cardinality to the join + * path since we are feeding in fewer solutions to be joined. + * + * @todo Look at the integration with the SAIL. We decorate the joins with some + * annotations. Those will have to be correctly propagated to the "edges" + * in order for edge sampling and incremental evaluation (or final + * evaluation) to work. The {@link DataSetJoin} essentially inlines one of + * its access paths. That should really be changed into an inline access + * path and a normal join operator so we can defer some of the details + * concerning the join operator annotations until we decide on the join + * path to be executed. An inline AP really implies an inline relation, + * which in turn implies that the query is a searchable context for + * query-local resources. + * <p> + * For s/o, when the AP is remote, the join evaluation context must be ANY + * and otherwise (for s/o) it must be SHARDED. + * <p> + * Since the join graph is fed the vertices (APs), it does not have access + * to the annotated joins so we need to generated appropriately annotated + * joins when sampling an edge and when evaluation a subquery. + * + * @todo Examine behavior when we do not have perfect covering indices. This + * will mean that some vertices can not be sampled using an index and that + * estimation of their cardinality will have to await the estimation of + * the cardinality of the edge(s) leading to that vertex. Still, the + * approach should be able to handle queries without perfect / covering + * automatically. Then experiment with carrying fewer statement indices + * for quads. */ public class JoinGraph extends PipelineOp { @@ -170,10 +204,10 @@ } - /** - * Used to assign row identifiers. - */ - static private final IVariable<Integer> ROWID = Var.var("__rowid"); +// /** +// * Used to assign row identifiers. +// */ +// static private final IVariable<Integer> ROWID = Var.var("__rowid"); /** * A sample of a {@link Vertex} (an access path). @@ -301,7 +335,8 @@ /** * Take a sample of the vertex. If the sample is already exact, then - * this is a NOP. + * this is a NOP. If the vertex was already sampled to that limit, then + * this is a NOP (you have to raise the limit to re-sample the vertex). * * @param limit * The sample cutoff. @@ -327,6 +362,16 @@ } + if (oldSample != null && oldSample.limit >= limit) { + + /* + * The vertex was already sampled to this limit. + */ + + return; + + } + final BOpContextBase context = new BOpContextBase(queryEngine); final IRelation r = context.getRelation(pred); @@ -384,8 +429,8 @@ } - if (log.isInfoEnabled()) - log.info("Sampled: " + sample); + if (log.isTraceEnabled()) + log.trace("Sampled: " + sample); return; @@ -394,6 +439,46 @@ } /** + * Type safe enumeration describes the edge condition (if any) for a + * cardinality estimate. + */ + public static enum EstimateEnum { + /** + * An estimate, but not any of the edge conditions. + */ + Normal(" "), + /** + * The cardinality estimate is exact. + */ + Exact("E"), + /** + * The cardinality estimation is a lower bound (the actual cardinality + * may be higher than the estimated value). + */ + LowerBound("L"), + /** + * Flag is set when the cardinality estimate underflowed (false zero + * (0)). + */ + Underflow("U"); + + private EstimateEnum(final String code) { + + this.code = code; + + } + + private final String code; + + public String getCode() { + + return code; + + } + + } // EstimateEnum + + /** * A sample of an {@link Edge} (a join). */ public static class EdgeSample { @@ -405,6 +490,13 @@ public final long rangeCount; /** + * <code>true</code> iff the source sample is exact (because the source + * is either a fully materialized vertex or an edge whose solutions have + * been fully materialized). + */ + public final boolean sourceSampleExact; + + /** * The limit used to sample the edge (this is the limit on the #of * solutions generated by the cutoff join used when this sample was * taken). @@ -438,49 +530,14 @@ public final long estimatedCardinality; /** - * Flag is set when the estimate is likely to be a lower bound for the - * cardinality of the edge. - * <p> - * If the {@link #inputCount} is ONE (1) and the {@link #outputCount} is - * the {@link #limit} then the {@link #estimatedCardinality} is a lower - * bound as more than {@link #outputCount} solutions could have been - * produced by the join against a single input solution. - */ - public final boolean estimateIsLowerBound; - - /** - * Flag indicates that the {@link #estimatedCardinality} underflowed. - * <p> - * Note: When the source vertex sample was not exact, then it is - * possible for the cardinality estimate to underflow. When, in - * addition, {@link #outputCount} is LT {@link #limit}, then feeding the - * sample of source tuples in is not sufficient to generated the desired - * #of output tuples. In this case, {@link #f join hit ratio} will be - * low. It may even be that zero output tuples were generated, in which - * case the join hit ratio will appear to be zero. However, the join hit - * ratio actually underflowed and an apparent join hit ratio of zero - * does not imply that the join will be empty unless the source vertex - * sample is actually the fully materialized access path - see - * {@link VertexSample#exact} and {@link #exact}. - */ - public final boolean estimateIsUpperBound; - - /** - * <code>true</code> if the sample is the exact solution for the join - * path. - * <p> - * Note: If the entire source vertex is being feed into the sample, - * {@link VertexSample#exact} flags this condition, and outputCount is - * also LT the limit, then the edge sample is the actual result of the - * join. That is, feeding all source tuples into the join gives fewer - * than the desired number of output tuples. + * Indicates whether the estimate is exact, an upper bound, or a lower + * bound. * - * TODO This field marks this condition and should be used to avoid - * needless re-computation of a join whose exact solution is already - * known. + * TODO This field should be used to avoid needless re-computation of a + * join whose exact solution is already known. */ - public final boolean exact; - + public final EstimateEnum estimateEnum; + /** * The sample of the solutions for the join path. */ @@ -504,9 +561,12 @@ */ EdgeSample( // final VertexSample sourceVertexSample, - final long sourceSampleRangeCount, - final boolean sourceSampleExact, final int limit, - final int inputCount, final int outputCount, + final long sourceSampleRangeCount,// + final boolean sourceSampleExact, // + final int sourceSampleLimit,// + final int limit,// + final int inputCount, // + final int outputCount,// final IBindingSet[] sample) { if (sample == null) @@ -514,6 +574,8 @@ // this.rangeCount = sourceVertexSample.rangeCount; this.rangeCount = sourceSampleRangeCount; + + this.sourceSampleExact = sourceSampleExact; this.limit = limit; @@ -525,24 +587,64 @@ estimatedCardinality = (long) (rangeCount * f); - estimateIsLowerBound = inputCount == 1 && outputCount == limit; + if (sourceSampleExact && outputCount < limit) { + /* + * Note: If the entire source vertex is being fed into the + * cutoff join and the cutoff join outputCount is LT the limit, + * then the sample is the actual result of the join. That is, + * feeding all source solutions into the join gives fewer than + * the desired number of output solutions. + */ + estimateEnum = EstimateEnum.Exact; + } else if (inputCount == 1 && outputCount == limit) { + /* + * If the inputCount is ONE (1) and the outputCount is the + * limit, then the estimated cardinality is a lower bound as + * more than outputCount solutions might be produced by the join + * when presented with a single input solution. + */ + estimateEnum = EstimateEnum.LowerBound; + } else if (!sourceSampleExact + && inputCount == Math.min(sourceSampleLimit, rangeCount) + && outputCount == 0) { + /* + * When the source sample was not exact, the inputCount is EQ to + * the lesser of the source range count and the source sample + * limit, and the outputCount is ZERO (0), then feeding in all + * source solutions in is not sufficient to generate any output + * solutions. In this case, the estimated join hit ratio appears + * to be zero. However, the estimation of the join hit ratio + * actually underflowed and the real join hit ratio might be a + * small non-negative value. A real zero can only be identified + * by executing the full join. + * + * Note: An apparent join hit ratio of zero does NOT imply that + * the join will be empty (unless the source vertex sample is + * actually the fully materialized access path - this case is + * covered above). + */ + estimateEnum = EstimateEnum.Underflow; + } else { + estimateEnum = EstimateEnum.Normal; + } - // final boolean sourceSampleExact = sourceVertexSample.exact; - estimateIsUpperBound = !sourceSampleExact && outputCount < limit; - - this.exact = sourceSampleExact && outputCount < limit; - this.sample = sample; } public String toString() { - return getClass().getName() + "{inputRangeCount=" + rangeCount - + ", limit=" + limit + ", inputCount=" + inputCount - + ", outputCount=" + outputCount + ", f=" + f - + ", estimatedCardinality=" + estimatedCardinality - + ", estimateIsLowerBound=" + estimateIsLowerBound - + ", estimateIsUpperBound=" + estimateIsUpperBound - + ", sampleIsExactSolution=" + exact + "}"; + return getClass().getName() // + + "{ rangeCount=" + rangeCount// + + ", sourceSampleExact=" + sourceSampleExact// + + ", limit=" + limit // + + ", inputCount=" + inputCount// + + ", outputCount=" + outputCount // + + ", f=" + f// + + ", estimatedCardinality=" + estimatedCardinality// + + ", estimateEnum=" + estimateEnum// +// + ", estimateIsLowerBound=" + estimateIsLowerBound// +// + ", estimateIsUpperBound=" + estimateIsUpperBound// +// + ", sampleIsExactSolution=" + estimateIsExact // + + "}"; } }; @@ -703,6 +805,14 @@ throw new IllegalArgumentException(); /* + * Note: There is never a need to "re-sample" the edge. Unlike ROX, + * we always can sample a vertex. This means that we can sample the + * edges exactly once, during the initialization of the join graph. + */ + if (sample != null) + throw new RuntimeException(); + + /* * Figure out which vertex has the smaller cardinality. The sample * of that vertex is used since it is more representative than the * sample of the other vertex. @@ -722,7 +832,7 @@ } /* - * TODO This is difficult to setup because we do not have a concept + * TODO This is awkward to setup because we do not have a concept * (or class) corresponding to a fly weight relation and we do not * have a general purpose relation, just arrays or sequences of * IBindingSets. Also, all relations are persistent. Temporary @@ -740,10 +850,6 @@ * Together, this means that we are dealing with IBindingSet[]s for * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. - * - * TODO On subsequent iterations we would probably re-sample [v] and - * we would run against the materialized intermediate result for - * [v']. */ /* @@ -763,7 +869,8 @@ // Sample the edge and save the sample on the edge as a side-effect. this.sample = estimateCardinality(queryEngine, limit, v, vp, - v.sample.rangeCount, v.sample.exact, sourceSample); + v.sample.rangeCount, v.sample.exact, v.sample.limit, + sourceSample); return sample.estimatedCardinality; @@ -793,17 +900,28 @@ public EdgeSample estimateCardinality(final QueryEngine queryEngine, final int limit, final Vertex vSource, final Vertex vTarget, final long sourceSampleRangeCount, - final boolean sourceSampleExact, IBindingSet[] sourceSample) + final boolean sourceSampleExact, + final int sourceSampleLimit, + final IBindingSet[] sourceSample) throws Exception { if (limit <= 0) throw new IllegalArgumentException(); - // Inject a rowId column. - sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, - sourceSample); +// // Inject a rowId column. +// sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, +// sourceSample); /* + * Note: This sets up a cutoff pipeline join operator which makes an + * accurate estimate of the #of input solutions consumed and the #of + * output solutions generated. From that, we can directly compute + * the join hit ratio. This approach is preferred to injecting a + * "RowId" column as the estimates are taken based on internal + * counters in the join operator and the join operator knows how to + * cutoff evaluation as soon as the limit is satisfied, thus + * avoiding unnecessary effort. + * * TODO Any constraints on the edge (other than those implied by * shared variables) need to be annotated on the join. Constraints * (other than range constraints which are directly coded by the @@ -811,31 +929,61 @@ * they can reduce the cardinality of the join and that is what we * are trying to estimate here. */ + final int joinId = 1; final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // - new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred - .setBOpId(3))); + new NV(BOp.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred + .setBOpId(3)), + // disallow parallel evaluation. + new NV(PipelineJoin.Annotations.MAX_PARALLEL,0), + // disable access path coalescing + new NV(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,false), + // cutoff join. + new NV(PipelineJoin.Annotations.LIMIT,(long)limit), + /* + * Note: In order to have an accurate estimate of the join + * hit ratio we need to make sure that the join operator + * runs using a single PipelineJoinStats instance which will + * be visible to us when the query is cutoff. In turn, this + * implies that the join must be evaluated on the query + * controller. + * + * @todo This implies that sampling of scale-out joins must + * be done using remote access paths. + */ + new NV(PipelineJoin.Annotations.SHARED_STATE,true), + new NV(PipelineJoin.Annotations.EVALUATION_CONTEXT,BOpEvaluationContext.CONTROLLER) +// // make sure the chunks are large enough to hold the result. +// new NV(PipelineJoin.Annotations.CHUNK_CAPACITY,limit), +// // no chunk timeout +// new NV(PipelineJoin.Annotations.CHUNK_TIMEOUT,Long.MAX_VALUE) + ); - final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// - NV.asMap(// - new NV(BOp.Annotations.BOP_ID, 2), // - new NV(SliceOp.Annotations.LIMIT, (long) limit), // - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER))); +// BOpContext context = new BOpContext(runningQuery, partitionId, stats, source, sink, sink2); +// joinOp.eval(context); + +// final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// +// NV.asMap(// +// new NV(BOp.Annotations.BOP_ID, 2), // +// new NV(SliceOp.Annotations.LIMIT, (long) limit), // +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER))); + final PipelineOp queryOp = joinOp; + // run the cutoff sampling of the edge. final UUID queryId = UUID.randomUUID(); final RunningQuery runningQuery = queryEngine.eval(queryId, - sliceOp, new LocalChunkMessage<IBindingSet>(queryEngine, + queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp.getId()/* startId */, -1 /* partitionId */, new ThickAsynchronousIterator<IBindingSet[]>( new IBindingSet[][] { sourceSample }))); - // #of source samples consumed. - int inputCount = 0; - // #of output samples generated. - int outputCount = 0; +// // #of source samples consumed. +// int inputCount; +// // #of output samples generated. +// int outputCount = 0; final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { try { @@ -845,20 +993,30 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); +// final int rowid = (Integer) bset.get(ROWID).get(); +// if (rowid > inputCount) +// inputCount = rowid; result.add(bset); - outputCount++; +// outputCount++; } - // #of input rows consumed. - inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) - .get()); +// // #of input rows consumed. +// inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) +// .get()); } finally { - // verify no problems. FIXME Restore test of the query. - // runningQuery.get(); + // verify no problems. + runningQuery.get(); } } finally { runningQuery.cancel(true/* mayInterruptIfRunning */); } + // The join hit ratio can be computed directly from these stats. + final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery + .getStats().get(joinId); + + if (log.isDebugEnabled()) + log.debug(joinStats.toString()); + /* * TODO Improve comments here. See if it is possible to isolate a * common base class which would simplify the setup of the cutoff @@ -866,12 +1024,16 @@ */ final EdgeSample edgeSample = new EdgeSample( - sourceSampleRangeCount, sourceSampleExact, limit, - inputCount, outputCount, result - .toArray(new IBindingSet[result.size()])); + sourceSampleRangeCount, // + sourceSampleExact, // @todo redundant with sourceSampleLimit + sourceSampleLimit, // + limit, // + (int) joinStats.inputSolutions.get(),// + (int) joinStats.outputSolutions.get(), // + result.toArray(new IBindingSet[result.size()])); - if (log.isInfoEnabled()) - log.info("edge=" + this + ", sample=" + edgeSample); + if (log.isTraceEnabled()) + log.trace("edge=" + this + ", sample=" + edgeSample); return edgeSample; @@ -892,12 +1054,14 @@ /** * The sample obtained by the step-wise cutoff evaluation of the ordered - * edges of the path. This sample is generated one edge at a time rather - * than by attempting the cutoff evaluation of the entire join path (the - * latter approach does allow us to limit the amount of work to be done - * to satisfy the cutoff). + * edges of the path. + * <p> + * Note: This sample is generated one edge at a time rather than by + * attempting the cutoff evaluation of the entire join path (the latter + * approach does allow us to limit the amount of work to be done to + * satisfy the cutoff). */ - final public EdgeSample sample; + public EdgeSample sample; /** * The cumulative estimated cardinality of the path. This is zero for an @@ -1012,23 +1176,84 @@ return false; } +// /** +// * Return <code>true</code> if this path is an unordered super set of +// * the given path. In the case where both paths have the same vertices +// * this will also return <code>true</code>. +// * +// * @param p +// * Another path. +// * +// * @return <code>true</code> if this path is an unordered super set of +// * the given path. +// */ +// public boolean isUnorderedSuperSet(final Path p) { +// +// if (p == null) +// throw new IllegalArgumentException(); +// +// if (edges.size() < p.edges.size()) { +// /* +// * Fast rejection. This assumes that each edge after the first +// * adds one distinct vertex to the path. That assumption is +// * enforced by #addEdge(). +// */ +// return false; +// } +// +// final Vertex[] v1 = getVertices(); +// final Vertex[] v2 = p.getVertices(); +// +// if (v1.length < v2.length) { +// // Proven false since the other set is larger. +// return false; +// } +// +// /* +// * Scan the vertices of the caller's path. If any of those vertices +// * are NOT found in this path then the caller's path can not be a +// * subset of this path. +// */ +// for (int i = 0; i < v2.length; i++) { +// +// final Vertex tmp = v2[i]; +// +// boolean found = false; +// for (int j = 0; j < v1.length; j++) { +// +// if (v1[j] == tmp) { +// found = true; +// break; +// } +// +// } +// +// if (!found) { +// return false; +// } +// +// } +// +// return true; +// +// } + /** - * Return <code>true</code> if this path is an unordered super set of - * the given path. In the case where both paths have the same vertices - * this will also return <code>true</code>. + * Return <code>true</code> if this path is an unordered variant of the + * given path (same vertices in any order). * * @param p * Another path. * - * @return <code>true</code> if this path is an unordered super set of - * the given path. + * @return <code>true</code> if this path is an unordered variant of the + * given path. */ - public boolean isUnorderedSuperSet(final Path p) { + public boolean isUnorderedVariant(final Path p) { if (p == null) throw new IllegalArgumentException(); - if (edges.size() < p.edges.size()) { + if (edges.size() != p.edges.size()) { /* * Fast rejection. This assumes that each edge after the first * adds one distinct vertex to the path. That assumption is @@ -1040,15 +1265,17 @@ final Vertex[] v1 = getVertices(); final Vertex[] v2 = p.getVertices(); - if (v1.length < v2.length) { - // Proven false since the other set is larger. + if (v1.length != v2.length) { + + // Reject (this case is also covered by the test above). return false; + } /* * Scan the vertices of the caller's path. If any of those vertices - * are NOT found in this path then the caller's path can not be a - * subset of this path. + * are NOT found in this path the paths are not unordered variations + * of one aother. */ for (int i = 0; i < v2.length; i++) { @@ -1170,24 +1397,24 @@ * the new join path we have to do a one step cutoff evaluation of * the new Edge, given the sample available on the current Path. * - * TODO It is possible for the path sample to be empty. Unless the + * FIXME It is possible for the path sample to be empty. Unless the * sample also happens to be exact, this is an indication that the - * estimated cardinality has underflowed. How are we going to deal - * with this situation?!? What would appear to matter is the amount - * of work being performed by the join in achieving that low - * cardinality. If we have to do a lot of work to get a small - * cardinality then we should prefer join paths which achieve the - * same reduction in cardinality with less 'intermediate - * cardinality' - that is, by examining fewer possible solutions. - * [In fact, the estimated (cumulative) cardinality might not be a - * good reflection of the IOs to be done -- this needs more - * thought.] + * estimated cardinality has underflowed. We track the estimated + * cumulative cardinality, so this does not make the join path an + * immediate winner, but it does mean that we can not probe further + * on that join path as we lack any intermediate solutions to feed + * into the downstream joins. [If we re-sampled the edges in the + * join path in each round then this would help to establish a + * better estimate in successive rounds.] */ final EdgeSample edgeSample = e.estimateCardinality(queryEngine, limit, sourceVertex, targetVertex, - this.sample.estimatedCardinality, this.sample.exact, - this.sample.sample); + this.sample.estimatedCardinality, + this.sample.estimateEnum == EstimateEnum.Exact, + this.sample.limit,// + this.sample.sample// + ); { @@ -1275,23 +1502,77 @@ * @return A table with that data. */ static public String showTable(final Path[] a) { + + return showTable(a, null/* pruned */); + + } + + /** + * Comma delimited table showing the estimated join hit ratio, the estimated + * cardinality, and the set of vertices for each of the specified join + * paths. + * + * @param a + * A set of paths (typically those before pruning). + * @param pruned + * The set of paths after pruning (those which were retained) + * (optional). When given, the paths which were pruned are marked + * in the table. + * + * @return A table with that data. + */ + static public String showTable(final Path[] a,final Path[] pruned) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); + f.format("%5s %10s%1s * %7s (%3s/%3s) = %10s%1s : %10s %10s", + "path",// + "rangeCount",// + "",// sourceSampleExact + "f",// + "out",// + "in",// + "estCard",// + "",// estimateIs(Exact|LowerBound|UpperBound) + "sumEstCard",// + "joinPath\n" + ); for (int i = 0; i < a.length; i++) { final Path x = a[i]; + // true iff the path survived pruning. + Boolean prune = null; + if (pruned != null) { + prune = Boolean.TRUE; + for (Path y : pruned) { + if (y == x) { + prune = Boolean.FALSE; + break; + } + } + } if (x.sample == null) { - f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); + f.format("p[%2d] %10d%1s * %7s (%3s/%3s) = %10s%1s : %10s", i, "N/A", "", "N/A", "N/A", "N/A", "N/A", "", "N/A"); } else { - f.format("p[%2d] % 7.2f, % 10d % 10d", i, x.sample.f, - x.sample.estimatedCardinality, - x.cumulativeEstimatedCardinality); + f.format("p[%2d] %10d%1s * % 7.2f (%3d/%3d) = % 10d%1s : % 10d", i, + x.sample.rangeCount,// + x.sample.sourceSampleExact?"E":"",// + x.sample.f,// + x.sample.outputCount,// + x.sample.inputCount,// + x.sample.estimatedCardinality,// + x.sample.estimateEnum.getCode(),// + x.cumulativeEstimatedCardinality// + ); } - sb.append(", ["); + sb.append(" ["); final Vertex[] vertices = x.getVertices(); for (Vertex v : vertices) { f.format("%2d ", v.pred.getId()); } sb.append("]"); + if (pruned != null) { + if (prune) + sb.append(" pruned"); + } // for (Edge e : x.edges) // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() // + ")"); @@ -1326,16 +1607,6 @@ * the timeout should be used to protect against join paths which take a * long time to materialize <i>cutoff</i> solutions rather than to fine tune * the running time of the query optimizer. - * - * TODO Runtime query optimization is probably useless (or else should rely - * on materialization of intermediate results) when the cardinality of the - * vertices and edges for the query is small. This would let us balance the - * design characteristics of MonetDB and bigdata. For this purpose, we need - * to flag when a {@link VertexSample} is complete (e.g., the cutoff is GTE - * the actual range count). This also needs to be done for each join path so - * we can decide when the sample for the path is in fact the exact solution - * rather than an estimate of the cardinality of the solution together with - * a sample of the solution. */ public static class JGraph { @@ -1432,8 +1703,7 @@ * @param queryEngine * @param limit * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. A reasonable value is - * <code>100</code>. + * cutoff join evaluation. * * @throws Exception */ @@ -1474,7 +1744,7 @@ * @todo When executing the query, it is actually being executed as * a subquery. Therefore we have to take appropriate care to ensure * that the results are copied out of the subquery and into the - * parent query. + * parent query. See SubqueryTask for how this is done. * * @todo When we execute the query, we should clear the references * to the sample (unless they are exact, in which case they can be @@ -1588,7 +1858,7 @@ * * @param queryEngine * The query engine. - * @param limit + * @param limitIn * The limit (this is automatically multiplied by the round * to increase the sample size in each round). * @param round @@ -1602,12 +1872,12 @@ * * @throws Exception */ - public Path[] expand(final QueryEngine queryEngine, int limit, + public Path[] expand(final QueryEngine queryEngine, int limitIn, final int round, final Path[] a) throws Exception { if (queryEngine == null) throw new IllegalArgumentException(); - if (limit <= 0) + if (limitIn <= 0) throw new IllegalArgumentException(); if (round <= 0) throw new IllegalArgumentException(); @@ -1617,7 +1887,7 @@ throw new IllegalArgumentException(); // increment the limit by itself in each round. - limit *= round; + final int limit = round * limitIn; final List<Path> tmp = new LinkedList<Path>(); @@ -1628,15 +1898,41 @@ // Vertices are inserted into this collection when they are resampled. final Set<Vertex> resampled = new LinkedHashSet<Vertex>(); - + // Then expand each path. for (Path x : a) { - if (x.edges.size() < round) { + final int nedges = x.edges.size(); + + if (nedges < round) { + // Path is from a previous round. continue; + } + /* + * The only way to increase the accuracy of our estimates for + * edges as we extend the join paths is to re-sample each edge + * in the join path in path order. + * + * Note: An edge must be sampled for each distinct join path + * prefix in which it appears within each round. However, it is + * common for surviving paths to share a join path prefix, so do + * not re-sample a given path prefix more than once per round. + * Also, do not re-sample paths which are from rounds before the + * immediately previous round as those paths will not be + * extended in this round. + * + * FIXME Find all vertices in use by all paths which survived + * into this round. Re-sample those vertices to the new limit + * (resampling a vertex is a NOP if it has been resampled to the + * desired limit so we can do this incrementally rather than up + * front). For each edge of each path in path order, re-sample + * the edge. Shared prefix samples should be reused, but samples + * of the same edge with a different prefix must not be shared. + */ + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); @@ -1657,28 +1953,32 @@ continue; } - final Vertex newVertex = v1Found ? edgeInGraph.v2 + // the target vertex for the new edge. + final Vertex tVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; - if (used.contains(newVertex)) { +// // the source vertex for the new edge. +// final Vertex sVertex = v1Found ? edgeInGraph.v1 +// : edgeInGraph.v2; + + if (used.contains(tVertex)) { // Vertex already used to extend this path. continue; } // add the new vertex to the set of used vertices. - used.add(newVertex); + used.add(tVertex); - if (!resampled.add(newVertex)&&round>1) { + if (resampled.add(tVertex)) { /* - * Resample this vertex before we sample a new edge + * (Re-)sample this vertex before we sample a new edge * which targets this vertex. */ - newVertex.sample(queryEngine, limit); + tVertex.sample(queryEngine, limit); } // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit, - edgeInGraph); + final Path p = x.addEdge(queryEngine, limit, edgeInGraph); // Add to the set of paths for this round. tmp.add(p); @@ -1689,17 +1989,18 @@ final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); + final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); + if (log.isDebugEnabled()) - log.debug("\n*** round=" + round + " : generated paths\n" - + JoinGraph.showTable(paths_tp1)); + log.debug("\n*** round=" + round + ", limit=" + limit + + " : generated paths\n" + + JoinGraph.showTable(paths_tp1, paths_tp1_pruned)); - final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); - if (log.isInfoEnabled()) - log.info("\n*** round=" + round + ": paths{in=" + a.length - + ",considered=" + paths_tp1.length + ",out=" - + paths_tp1_pruned.length + "}\n" - + JoinGraph.showTable(paths_tp1_pruned)); + log.info("\n*** round=" + round + ", limit=" + limit + + ": paths{in=" + a.length + ",considered=" + + paths_tp1.length + ",out=" + paths_tp1_pruned.length + + "}\n" + JoinGraph.showTable(paths_tp1_pruned)); return paths_tp1_pruned; @@ -1919,52 +2220,34 @@ } /** - * Prune paths which are dominated by other paths. Start the algorithm - * by passing in all edges which have the minimum cardinality (when - * comparing their expected cardinality after rounding to 2 significant - * digits). + * Prune paths which are dominated by other paths. Paths are extended in + * each round. Paths from previous rounds are always pruned. Of the new + * paths in each round, the following rule is applied to prune the + * search to just those paths which are known to dominate the other + * paths covering the same set of vertices: * <p> - * If there is a path [p] whose total cost is LTE the cost of executing - * just its last edge [e], then the path [p] dominates all paths - * beginning with edge [e]. The dominated paths should be pruned. [This - * is a degenerate case of the next rule.] - * <p> - * If there is a path, [p] != [p1], where [p] is an unordered superset - * of [p1] (that is the vertices of p are a superset of the vertices of - * p1, but allowing the special case where the set of vertices are the - * same), and the cumulative cost of [p] is LTE the cumulative cost of - * [p1], then [p] dominates (or is equivalent to) [p1] and p1 should be + * If there is a path, [p] != [p1], where [p] is an unordered variant of + * [p1] (that is the vertices of p are the same as the vertices of p1), + * and the cumulative cost of [p] is LTE the cumulative cost of [p1], + * then [p] dominates (or is equivalent to) [p1] and p1 should be * pruned. - * <p> - * If there is a path, [p], which has the same vertices as a path [p1] - * and the cumulative cost of [p] is LTE the cumulative cost of [p1], - * then [p] dominates (or is equivalent to) [p1]. The path [p1] should - * be pruned. [This is a degenerate case of the prior rule.] * * @param a * A set of paths. * * @return The set of paths with all dominated paths removed. - * - * FIXME This does not give us a stopping condition unless the - * set of paths becomes empty. I think it will tend to search - * too far for a best path, running the risk of increasing - * inaccuracy introduced by propagation of samples. Resampling - * the vertices and increasing the vertex and edge cutoff at - * each iteration of the search could compensate for that. - * - * TODO Cumulative estimated cardinality is an estimate of the - * work to be done. However, the actual cost of a join depends - * on whether we will use nested index subquery or a hash join - * and the cost of that operation on the database. There could - * be counter examples where the cost of the hash join with a - * range scan using the unbound variable is LT the nested index - * subquery. For those cases, we will do the same amount of IO - * on the hash join but there will still be a lower cardinality - * to the join path since we are feeding in fewer solutions to - * be joined. */ public Path[] pruneJoinPaths(final Path[] a) { + /* + * Find the length of the longest path(s). All shorter paths are + * dropped in each round. + */ + int maxPathLen = 0; + for(Path p : a) { + if(p.edges.size()>maxPathLen) { + maxPathLen = p.edges.size(); + } + } final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); final Set<Path> pruned = new LinkedHashSet<Path>(); @@ -1972,6 +2255,14 @@ final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); + if (Pi.edges.size() < maxPathLen) { + /* + * Only the most recently generated set of paths survive to + * the next round. + */ + pruned.add(Pi); + continue; + } if (pruned.contains(Pi)) continue; for (int j = 0; j < a.length; j++) { @@ -1982,7 +2273,7 @@ throw new RuntimeException("Not sampled: " + Pj); if (pruned.contains(Pj)) continue; - final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); + final boolean isPiSuperSet = Pi.isUnorderedVariant(Pj); if (!isPiSuperSet) { // Can not directly compare these join paths. continue; @@ -2071,63 +2362,23 @@ } - // /** - // * Return <code>true</code> iff there exists at least one {@link Edge} - // * branching from a vertex NOT found in the set of vertices which have - // * visited. - // * - // * @param visited - // * A set of vertices. - // * - // * @return <code>true</code> if there are more edges to explore. - // */ - // private boolean moreEdgesToVisit(final Set<Vertex> visited) { - // - // // Consider all edges. - // for(Edge e : E) { - // - // if (visited.contains(e.v1) && visited.contains(e.v2)) { - // /* - // * Since both vertices for this edge have been executed the - // * edge is now redundant. Either it was explicitly executed - // * or another join path was used which implies the edge by - // * transitivity in the join graph. - // */ - // continue; - // } - // - // /* - // * We found a counter example (an edge which has not been - // * explored). - // */ - // if (log.isTraceEnabled()) - // log.trace("Edge has not been explored: " + e); - // - // return true; - // - // } - // - // // No more edges to explore. - // return false; - // - // } - } - private static double roundToSignificantFigures(final double num, - final int n) { - if (num == 0) { - return 0; - } +// @todo Could be used to appropriately ignore false precision in cardinality estimates. +// private static double roundToSignificantFigures(final double num, +// final int n) { +// if (num == 0) { +// return 0; +// } +// +// final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); +// final int power = n - (int) d; +// +// final double magnitude = Math.pow(10, power); +// final long shifted = Math.round(num * magnitude); +// return shifted / magnitude; +// } - final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); - final int power = n - (int) d; - - final double magnitude = Math.pow(10, power); - final long shifted = Math.round(num * magnitude); - return shifted / magnitude; - } - /** * Places vertices into order by the {@link BOp#getId()} associated with * their {@link IPredicate}. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-11-14 16:45:55 UTC (rev 3948) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-11-15 18:13:06 UTC (rev 3949) @@ -29,9 +29,12 @@ import java.rmi.RemoteException; import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.Map; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -39,6 +42,8 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; import org.apache.log4j.Logger; @@ -298,22 +303,68 @@ } /** + * Lock used to guard register / halt of a query. + */ + private final ReentrantLock lock = new ReentrantLock(); + + /** + * Signaled when no queries are running. + */ + private final Condition nothingRunning = lock.newCondition(); + + /** * The currently executing queries. */ - final protected ConcurrentHashMap<UUID/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<UUID, RunningQuery>(); + final private ConcurrentHashMap<UUID/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<UUID, RunningQuery>(); - /** - * A queue of {@link RunningQuery}s having binding set chunks available for - * consumption. - * - * @todo Be careful when testing out a {@link PriorityBlockingQueue} here. - * First, that collection is intrinsically bounded (it is backed by an - * array) so it will BLOCK under heavy load and could be expected to - * have some resize costs if the queue size becomes too large. Second, - * either {@link RunningQuery} needs to implement an appropriate - * {@link Comparator} or we need to pass one into the constructor for - * the queue. - */ + /** + * LRU cache used to handle problems with asynchronous termination of + * running queries. + * <p> + * Note: Holding onto the query references here might pin memory retained by + * those queries. However, all we really need is the Haltable (Future) of + * that query in this map. + * + * @todo This should not be much of a hot spot even though it is not thread + * safe but the synchronized() call could force cache stalls anyway. A + * concurrent hash map with an approximate LRU access policy might be + * a better choice. + * + * @todo The maximum cache capacity here is a SWAG. It should be large + * enough that we can not have a false cache miss on a system which is + * heavily loaded by a bunch of light queries. + */ + private LinkedHashMap<UUID, Future<Void>> doneQueries = new LinkedHashMap<UUID,Future<Void>>( + 16/* initialCapacity */, .75f/* loadFactor */, true/* accessOrder */) { + + private static final long serialVersionUID = 1L; + + @Override + protected boolean removeEldestEntry(Map.Entry<UUID, Future<Void>> eldest) { + + return size() > 100/* maximumCacheCapacity */; + + } + }; + + /** + * A queue of {@link RunningQuery}s having binding set chunks available for + * consumption. + * + * @todo Handle priority for selective queries based on the time remaining + * until the timeout. + * <p> + * Handle priority for unselective queries based on the order in which + * they are submitted? + * <p> + * Be careful when testing out a {@link PriorityBlockingQueue} here. + * First, that collection is intrinsically bounded (it is backed by an + * array) so it will BLOCK under heavy load and could be expected to + * have some resize costs if the queue size becomes too large. Second, + * either {@link RunningQuery} needs to implement an appropriate + * {@link Comparator} or we need to pass one into the constructor for + * the queue. + */ final private BlockingQueue<RunningQuery> priorityQueue = new LinkedBlockingQueue<RunningQuery>(); // final private BlockingQueue<RunningQuery> priorityQueue = new PriorityBlockingQueue<RunningQuery>( // ); @@ -432,27 +483,6 @@ * for the JVM to finalize the {@link QueryEngine} if the application no * longer holds a hard reference to it. The {@link QueryEngine} is then * automatically closed from within its finalizer method. - * - * @todo Handle priority for selective queries based on the time remaining - * until the timeout. - * <p> - * Handle priority for unselective queries based on the order in which - * they are submitted? - * - * @todo The approach taken by the {@link QueryEngine} executes one task per - * pipeline bop per chunk. Outside of how the tasks are scheduled, - * this corresponds closely to the historical pipeline query - * evaluation. - * <p> - * Chunk concatenation could be performed here if we (a) mark the - * {@link LocalChunkMessage} with a flag to indicate when it has been - * accepted; and (b) rip through the incoming chunks for the query for - * the target bop and combine them to feed the task. Chunks which have - * already been assigned would be dropped when take() discovers them. - * [The chunk combination could also be done when we output the chunk - * if the sink has not been taken, e.g., by combining the chunk into - * the same target ByteBuffer, or when we add the chunk to the - * RunningQuery.] */ static private class QueryEngineTask implements Runnable { @@ -523,18 +553,12 @@ if (!msg.isMaterialized()) throw new IllegalStateException(); - final RunningQuery q = runningQueries.get(msg.getQueryId()); + final RunningQuery q = getRunningQuery(msg.getQueryId()); if(q == null) { /* * The query is not registered on this node. - * - * FIXME We should recognize the difference between a query which - * was never registered (and throw an error here) and a query which - * is done and has been removed from runningQueries. One way to do - * this is with an LRU of recently completed queries. */ -// return false; throw new IllegalStateException(); } @@ -561,27 +585,24 @@ /** * Shutdown the {@link QueryEngine} (blocking). The {@link QueryEngine} will * not accept new queries, but existing queries will run to completion. - * - * @todo This sleeps until {@link #runningQueries} is empty. It could be - * signaled when that collection becomes empty if we protected the - * collection with a lock for mutation (or if we just notice each time - * a query terminates). However, that would restrict the concurrency - * for query start/stop. */ public void shutdown() { // normal termination. shutdown = true; - while(!runningQueries.isEmpty()) { - - try { - Thread.sleep(100/*ms*/); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - - } + lock.lock(); + try { + while (!runningQueries.isEmpty()) { + try { + nothingRunning.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } finally { + lock.unlock(); + } // hook for subclasses. didShutdown(); @@ -643,37 +664,6 @@ } - /** - * The query is no longer running. Resources associated with the query - * should be released. - * - * @todo A race is possible where a query is cancelled on a node where the - * node receives notice to start the query after the cancelled message - * has arrived. To avoid having such queries linger, we should have a - * a concurrent hash set with an approximate LRU policy containing the - * identifiers for queries which have been cancelled, possibly paired - * with the cause (null if normal execution). That will let us handle - * any reasonable concurrent indeterminism between cancel and start - * notices for a query. - * <p> - * Another way in which this might be addressed is by involving the - * client each time a query start is propagated to a node. If we - * notify the client that the query will start on the node first, then - * the client can always issue the cancel notices [unless the client - * dies, in which case we still want to kill the query which could be - * done based on a service disappearing from a jini registry or - * zookeeper.] - */ - protected void halt(final RunningQuery q) { - - // remove from the set of running queries. - runningQueries.remove(q.getQueryId(), q); - - if (log.isInfoEnabled()) - log.info("Removed entry for query: " + q.getQueryId()); - - } - /* * IQueryPeer */ @@ -690,13 +680,33 @@ } + /** + * {@inheritDoc} + * <p> + * The default implementation is a NOP. + */ + public void cancelQuery(UUID queryId, Throwable cause) { + // NOP + } + /* * IQueryClient */ + public PipelineOp getQuery(final UUID queryId) { + + final RunningQuery q = getRunningQuery(queryId); + + if (q == null) + throw new IllegalArgumentException(); + + return q.getQuery(); + + } + public void startOp(final StartOpMessage msg) throws RemoteException { - final RunningQuery q = runningQueries.get(msg.queryId); + final RunningQuery q = getRunningQuery(msg.queryId); if (q != null) { @@ -708,7 +718,7 @@ public void haltOp(final HaltOpMessage msg) throws RemoteException { - final RunningQuery q = runningQueries.get(msg.queryId); + final RunningQuery q = getRunningQuery(msg.queryId); if (q != null) { @@ -829,7 +839,7 @@ assertRunning(); - putRunningQuery(queryId, runningQuery); + putIfAbsent(queryId, runningQuery); runningQuery.startQuery(msg); @@ -839,56 +849,224 @@ } - /** - * Return the {@link RunningQuery} associated with that query identifier. - * - * @param queryId - * The query identifier. - * - * @return The {@link RunningQuery} -or- <code>null</code> if there is no - * query associated with that query identifier. + /* + * Management of running queries. */ + + /** + * Places the {@link RunningQuery} object into the internal map. + * + * @param queryId + * The query identifier. + * @param runningQuery + * The {@link RunningQuery}. + * + * @return The {@link RunningQuery} -or- another {@link RunningQuery} iff + * one exists with the same {@link UUID}. + */ + protected RunningQuery putIfAbsent(final UUID queryId, + final RunningQuery runningQuery) { + + if (queryId == null) + throw new IllegalArgumentException(); + + if (runningQuery == null) + throw new IllegalArgumentException(); + + // First, check [runningQueries] w/o acquiring a lock. + { + final RunningQuery tmp = runningQueries.get(queryId); + + if (tmp != null) { + + // Found existing query. + return tmp; + + } + + } + + /* + * A lock is used to address a race condition here with the concurrent + * registration and halt of a query. + */ + + lock.lock(); + + try { + + // Test for a recently terminated query. + final Future<Void> doneQueryFuture = doneQueries.get(queryId); + + if (doneQueryFuture != null) { + + // Throw out an appropriate exception for a halted query. + handleDoneQuery(queryId, doneQueryFuture); + + // Should never get here. + throw n... [truncated message content] |
From: <btm...@us...> - 2010-11-14 16:46:02
|
Revision: 3948 http://bigdata.svn.sourceforge.net/bigdata/?rev=3948&view=rev Author: btmurphy Date: 2010-11-14 16:45:55 +0000 (Sun, 14 Nov 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - phase 1 of callable executor (client service) smart proxy work. Added code to deal with the new asynchronous initializaztion of the shard (data) service; that is, code that tests to determine whether the shard service is not ready because it's still initializing, and then performs a set of retries to allow the shard service to complete its startup processing Modified Paths: -------------- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/EmbeddedShardLocator.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/Util.java branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/TestBigdataClient.java Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/EmbeddedShardLocator.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/EmbeddedShardLocator.java 2010-11-12 21:29:20 UTC (rev 3947) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/EmbeddedShardLocator.java 2010-11-14 16:45:55 UTC (rev 3948) @@ -1338,10 +1338,10 @@ /* * Note: By setting this to null we are indicating to * the RegisterIndexTask on the data service that it - * needs to set the resourceMetadata[] when the index is - * actually registered based on the live journal as of - * the when the task actually executes on the data - * service. + * needs to set the resourceMetadata[] when the index + * is actually registered based on the live journal + * as of the when the task actually executes on the + * data service. */ null,//[resources] Signal to the RegisterIndexTask. null //[cause] Signal to RegisterIndexTask @@ -1350,11 +1350,53 @@ // */ // ,"createScaleOutIndex(name="+scaleOutIndexName+") " )); - - dataServices[i].registerIndex - (Util.getIndexPartitionName(scaleOutIndexName, - pmd.getPartitionId()), - md); + + // The shard service (as currently implemented) may not be + // completely initialized if it is just being started + // when this method is called (for example, in a test + // environment). This is because the shard service creates + // a StoreManager (via a Resource), which depends on + // discovering a transaction service; and it sets up + // counters, which depend on discovering a load balancer. + // Thus, to address the case where the shard service is + // not yet ready, test for such a situation; and apply a + // retry-to-failure strategy + boolean registered = false; + try { + dataServices[i].registerIndex + ( Util.getIndexPartitionName(scaleOutIndexName, + pmd.getPartitionId()), + md ); + registered = true; + } catch(Throwable t1) { + if ( !Util.causeNoSuchObject(t1) ) { + throw new Exception(t1); + } + //wait for data service to finish initializing + int nWait = 5; + for(int n=0; n<nWait; n++) { + Util.delayMS(1000L); + try { + dataServices[i].registerIndex + ( Util.getIndexPartitionName + (scaleOutIndexName, + pmd.getPartitionId()), + md ); + registered = true; + break; + } catch(Throwable t2) { + if ( !Util.causeNoSuchObject(t2) ) { + throw new Exception(t2); + } + } + } + } + if (!registered) {// try one last time + dataServices[i].registerIndex + ( Util.getIndexPartitionName(scaleOutIndexName, + pmd.getPartitionId()), + md ); + } partitions[i] = pmd; } Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java 2010-11-12 21:29:20 UTC (rev 3947) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java 2010-11-14 16:45:55 UTC (rev 3948) @@ -273,8 +273,8 @@ ((this.localResources).getScheduledExecutor()) .scheduleWithFixedDelay (deferredInitTask, - 20L*1000L,//initial delay - 30L*1000L,//period + 1L*1000L,//initial delay + 3L*1000L,//period TimeUnit.MILLISECONDS); } } @@ -318,7 +318,9 @@ * not complete within a timeout. */ public boolean isOpen() { - return ( (concurrencyMgr != null) && (concurrencyMgr.isOpen()) ); + return ( (concurrencyMgr != null) && + (concurrencyMgr.isOpen()) && + deferredInitDone ); } synchronized public void shutdown() { Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java 2010-11-12 21:29:20 UTC (rev 3947) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/ServiceImpl.java 2010-11-14 16:45:55 UTC (rev 3948) @@ -86,6 +86,7 @@ import java.io.File; import java.io.IOException; +import java.rmi.NoSuchObjectException; import java.rmi.RemoteException; import java.util.ArrayList; import java.util.Arrays; @@ -188,7 +189,7 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); embeddedShardService.registerIndex(name, metadata); } @@ -196,14 +197,14 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); embeddedShardService.dropIndex(name); } public IBlock readBlock(IResourceMetadata resource, long addr) throws RemoteException, IOException { - readyState.check(); + readyCheck(); return embeddedShardService.readBlock(resource, addr); } @@ -211,7 +212,7 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); return embeddedShardService.getIndexMetadata(name, timestamp); } @@ -225,7 +226,7 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); return embeddedShardService.rangeIterator (tx, name, fromKey, toKey, capacity, flags, filter); } @@ -234,7 +235,7 @@ public <T> Future<T> submit(IDataServiceCallable<T> task) throws RemoteException { - readyState.check(); + readyCheck(); Exporter exporter = null; try { exporter = Util.getExporter(config, @@ -256,7 +257,7 @@ public Future submit(long tx, String name, IIndexProcedure proc) throws RemoteException { - readyState.check(); + readyCheck(); Exporter exporter = null; try { exporter = Util.getExporter(config, @@ -279,19 +280,19 @@ public boolean purgeOldResources(long timeout, boolean truncateJournal) throws RemoteException, InterruptedException { - readyState.check(); + readyCheck(); return embeddedShardService.purgeOldResources(timeout,truncateJournal); } public void setReleaseTime(long releaseTime) throws RemoteException, IOException { - readyState.check(); + readyCheck(); embeddedShardService.setReleaseTime(releaseTime); } public void abort(long tx) throws RemoteException, IOException { - readyState.check(); + readyCheck(); embeddedShardService.abort(tx); } @@ -299,14 +300,14 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); return embeddedShardService.singlePhaseCommit(tx); } public void prepare(long tx, long revisionTime) throws RemoteException, IOException, Throwable { - readyState.check(); + readyCheck(); embeddedShardService.prepare(tx, revisionTime); } @@ -314,19 +315,19 @@ throws RemoteException, IOException, InterruptedException, ExecutionException { - readyState.check(); + readyCheck(); embeddedShardService.forceOverflow(immediate, compactingMerge); } public long getAsynchronousOverflowCounter() throws RemoteException, IOException { - readyState.check(); + readyCheck(); return embeddedShardService.getAsynchronousOverflowCounter(); } public boolean isOverflowActive() throws RemoteException, IOException { - readyState.check(); + readyCheck(); return embeddedShardService.isOverflowActive(); } @@ -668,10 +669,40 @@ readyState.ready();//ready to accept calls from clients } + // Private methods + + // Throws NoSuchObjectException if the service has either + // not completed initialization and registration with the + // lookup service, or not created the concurrency manager + // and resource (which depends on discovering the transaction + // service) + private void readyCheck() { + readyState.check();//completed service init? + + // created concurrency and resource managers? + if ( !embeddedShardService.isOpen() ) { + throw new RemoteExceptionWrapper + (new NoSuchObjectException("not ready")); + } + } + private void shutdownDo(ShutdownType type) { (new ShutdownThread(type)).start(); } + private void killDo(int status) { + String[] groups = ((DiscoveryGroupManagement)ldm).getGroups(); + LookupLocator[] locs = ((DiscoveryLocatorManagement)ldm).getLocators(); + logger.log(Level.INFO, killStr+" [groups=" + +Util.writeGroupArrayToString(groupsToJoin) + +", locators=" + +Util.writeArrayElementsToString(locatorsToJoin)+"]"); + + System.exit(status); + } + + // Nested classes + /** * Used to shutdown the service asynchronously. */ @@ -745,15 +776,15 @@ } } - private void killDo(int status) { - String[] groups = ((DiscoveryGroupManagement)ldm).getGroups(); - LookupLocator[] locs = ((DiscoveryLocatorManagement)ldm).getLocators(); - logger.log(Level.INFO, killStr+" [groups=" - +Util.writeGroupArrayToString(groupsToJoin) - +", locators=" - +Util.writeArrayElementsToString(locatorsToJoin)+"]"); - - System.exit(status); + private static class RemoteExceptionWrapper extends RuntimeException { + private static final long serialVersionUID = 1L; + private final RemoteException wrapped; + public RemoteExceptionWrapper(RemoteException wrapped) { + this.wrapped = wrapped; + } + private Object writeReplace() { + return wrapped; + } } /** Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/Util.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/Util.java 2010-11-12 21:29:20 UTC (rev 3947) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/util/Util.java 2010-11-14 16:45:55 UTC (rev 3948) @@ -73,6 +73,7 @@ import net.jini.lookup.ServiceDiscoveryManager; import java.io.IOException; +import java.rmi.NoSuchObjectException; import java.rmi.Remote; import java.rmi.server.ExportException; import java.util.Collection; @@ -110,6 +111,26 @@ return min; } + public static void delayMS(long nMS) { + try { + Thread.sleep(nMS); + } catch (InterruptedException e) { } + } + + public static boolean causeNoSuchObject(Throwable t) { + if (t instanceof NoSuchObjectException) return true; + + // test cause chain for NoSuchObjectException + Throwable cause = t.getCause(); + while ( (cause != null) && + !(cause instanceof NoSuchObjectException) ) + { + cause = cause.getCause(); + } + if (cause == null) return false; + return (cause instanceof NoSuchObjectException); + } + /* Convenience method that can be called when a service exits, or * when failure occurs during the service's initialization process. * This method un-does any work that may have already been completed; Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/TestBigdataClient.java =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/TestBigdataClient.java 2010-11-12 21:29:20 UTC (rev 3947) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/TestBigdataClient.java 2010-11-14 16:45:55 UTC (rev 3948) @@ -132,7 +132,33 @@ metadata.setDeleteMarkers(true); - fed.registerIndex(metadata); +//BTM - PRE_CLIENT_SERVICE - BEGIN +//BTM - PRE_CLIENT_SERVICE fed.registerIndex(metadata); + boolean registered = false; + try { + fed.registerIndex(metadata); + registered = true; + } catch(Throwable t1) { + if ( !Util.causeNoSuchObject(t1) ) { + throw new Exception(t1); + } + //wait for data service to finish initializing + int nWait = 5; + for(int i=0; i<nWait; i++) { + Util.delayMS(1000L); + try { + fed.registerIndex(metadata); + registered = true; + break; + } catch(Throwable t2) { + if ( !Util.causeNoSuchObject(t2) ) { + throw new Exception(t2); + } + } + } + } + assertTrue("failed to register metadata", registered); +//BTM - PRE_CLIENT_SERVICE - END final IIndex ndx = fed.getIndex(name, ITx.UNISOLATED); @@ -166,9 +192,7 @@ if (dataService0 == null) { for(int i=0; i<nWait; i++) { - try { - Thread.sleep(1L*1000L); - } catch (InterruptedException e) { } + Util.delayMS(1000L); dataService0 = helper.getDataService0(); if (dataService0 != null) break; } @@ -182,9 +206,7 @@ if (dataService1 == null) { for(int i=0; i<nWait; i++) { - try { - Thread.sleep(1L*1000L); - } catch (InterruptedException e) { } + Util.delayMS(1000L); dataService1 = helper.getDataService1(); if (dataService1 != null) break; } @@ -194,19 +216,60 @@ } //BTM - END --------------------------------------------------- - final UUID indexUUID = fed.registerIndex( metadata, // - // separator keys. - new byte[][] { - new byte[]{}, - TestKeyBuilder.asSortKey(500) - },// - // data service assignments. - new UUID[] { // -//BTM helper.getDataService0().getServiceUUID(),// -//BTM helper.getDataService1().getServiceUUID() // -dataService0UUID, -dataService1UUID - }); +//BTM - PRE_CLIENT_SERVICE - BEGIN +//BTM - PRE_CLIENT_SERVICE final UUID indexUUID = fed.registerIndex( metadata, // +//BTM - PRE_CLIENT_SERVICE // separator keys. +//BTM - PRE_CLIENT_SERVICE new byte[][] { +//BTM - PRE_CLIENT_SERVICE new byte[]{}, +//BTM - PRE_CLIENT_SERVICE TestKeyBuilder.asSortKey(500) +//BTM - PRE_CLIENT_SERVICE },// +//BTM - PRE_CLIENT_SERVICE // data service assignments. +//BTM - PRE_CLIENT_SERVICE new UUID[] { // +//BTM - PRE_CLIENT_SERVICE dataService0UUID, +//BTM - PRE_CLIENT_SERVICE dataService1UUID +//BTM - PRE_CLIENT_SERVICE }); + UUID indexUUID = null; + boolean registered = false; + try { + indexUUID = + fed.registerIndex + ( metadata, + new byte[][] + { new byte[]{}, + TestKeyBuilder.asSortKey(500) }, + new UUID[] { dataService0UUID, + dataService1UUID } + ); + registered = true; + } catch(Throwable t1) { + if ( !Util.causeNoSuchObject(t1) ) { + throw new Exception(t1); + } + //wait for data service to finish initializing + nWait = 5; + for(int i=0; i<nWait; i++) { + Util.delayMS(1000L); + try { + indexUUID = + fed.registerIndex + ( metadata, + new byte[][] + { new byte[]{}, + TestKeyBuilder.asSortKey(500) }, + new UUID[] { dataService0UUID, + dataService1UUID } + ); + registered = true; + break; + } catch(Throwable t2) { + if ( !Util.causeNoSuchObject(t2) ) { + throw new Exception(t2); + } + } + } + } + assertTrue("failed to register metadata", registered); +//BTM - PRE_CLIENT_SERVICE - END final IIndex ndx = fed.getIndex(name, ITx.UNISOLATED); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 21:29:27
|
Revision: 3947 http://bigdata.svn.sourceforge.net/bigdata/?rev=3947&view=rev Author: thompsonbry Date: 2010-11-12 21:29:20 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Added tearDown() to the unit test to close the journal. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 21:02:48 UTC (rev 3946) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 21:29:20 UTC (rev 3947) @@ -1440,9 +1440,6 @@ public void runtimeOptimizer(final QueryEngine queryEngine, final int limit) throws Exception { - // // The set of vertices which have been consumed by the query. - // final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); - // Setup the join graph. Path[] paths = round0(queryEngine, limit, 2/* nedges */); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 21:02:48 UTC (rev 3946) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 21:29:20 UTC (rev 3947) @@ -213,6 +213,18 @@ } + protected void tearDown() throws Exception { + + if(jnl != null) { + jnl.close(); + jnl = null; + } + + super.tearDown(); + + } + + /** * Loads the data into the closureStore and computes the closure. */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 21:02:59
|
Revision: 3946 http://bigdata.svn.sourceforge.net/bigdata/?rev=3946&view=rev Author: thompsonbry Date: 2010-11-12 21:02:48 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Added LUBM Q8 to the test class. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 20:47:51 UTC (rev 3945) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 21:02:48 UTC (rev 3946) @@ -616,4 +616,144 @@ } // test_Q9 + /** + * LUBM Query 8 + * + * <pre> + * PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> + * PREFIX ub: <http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#> + * SELECT ?x ?y ?z + * WHERE{ + * ?y a ub:Department . + * ?x a ub:Student; + * ub:memberOf ?y . + * ?y ub:subOrganizationOf <http://www.University0.edu> . + * ?x ub:emailAddress ?z . + * } + * </pre> + * @throws Exception + */ + public void test_query8() throws Exception { + final AbstractTripleStore database = (AbstractTripleStore) jnl + .getResourceLocator() + .locate(namespace, jnl.getLastCommitTime()); + + if (database == null) + throw new RuntimeException("Not found: " + namespace); + + /* + * Resolve terms against the lexicon. + */ + final BigdataValueFactory f = database.getLexiconRelation() + .getValueFactory(); + + final BigdataURI rdfType = f + .createURI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + + final BigdataURI department = f + .createURI("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#Department"); + + final BigdataURI student = f + .createURI("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#Student"); + + final BigdataURI memberOf = f + .createURI("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#memberOf"); + + final BigdataURI subOrganizationOf = f + .createURI("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#subOrganizationOf"); + + final BigdataURI emailAddress = f + .createURI("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#emailAddress"); + + final BigdataURI university0 = f + .createURI("http://www.University0.edu"); + + final BigdataValue[] terms = new BigdataValue[] { rdfType, department, + student, memberOf, subOrganizationOf, emailAddress, university0 }; + + // resolve terms. + database.getLexiconRelation() + .addTerms(terms, terms.length, true/* readOnly */); + + { + for (BigdataValue tmp : terms) { + System.out.println(tmp + " : " + tmp.getIV()); + if (tmp.getIV() == null) + throw new RuntimeException("Not defined: " + tmp); + } + } + + final IPredicate[] preds; + { + final IVariable<?> x = Var.var("x"); + final IVariable<?> y = Var.var("y"); + final IVariable<?> z = Var.var("z"); + + // The name space for the SPO relation. + final String[] relation = new String[] { namespace + ".spo" }; + + final long timestamp = jnl.getLastCommitTime(); + + int nextId = 0; + + // ?y a ub:Department . + final IPredicate p0 = new SPOPredicate(new BOp[] { y, + new Constant(rdfType.getIV()), + new Constant(department.getIV()) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(IPredicate.Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x a ub:Student; + final IPredicate p1 = new SPOPredicate(new BOp[] { x, + new Constant(rdfType.getIV()), + new Constant(student.getIV()) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(IPredicate.Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // (?x) ub:memberOf ?y . + final IPredicate p2 = new SPOPredicate(new BOp[] { x, + new Constant(memberOf.getIV()), y },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(IPredicate.Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?y ub:subOrganizationOf <http://www.University0.edu> . + final IPredicate p3 = new SPOPredicate(new BOp[] { y, + new Constant(subOrganizationOf.getIV()), + new Constant(university0.getIV()) },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(IPredicate.Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // ?x ub:emailAddress ?z . + final IPredicate p4 = new SPOPredicate(new BOp[] { x, + new Constant(emailAddress.getIV()), z },// + new NV(BOp.Annotations.BOP_ID, nextId++),// + new NV(IPredicate.Annotations.TIMESTAMP, timestamp),// + new NV(IPredicate.Annotations.RELATION_NAME, relation)// + ); + + // the vertices of the join graph (the predicates). + preds = new IPredicate[] { p0, p1, p2, p3, p4 }; + } + + { + final int limit = 100; + + final QueryEngine queryEngine = QueryEngineFactory + .getQueryController(jnl/* indexManager */); + + final JGraph g = new JGraph(preds); + + g.runtimeOptimizer(queryEngine, limit); + + } + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 20:48:01
|
Revision: 3945 http://bigdata.svn.sourceforge.net/bigdata/?rev=3945&view=rev Author: thompsonbry Date: 2010-11-12 20:47:51 +0000 (Fri, 12 Nov 2010) Log Message: ----------- More clean up on JoinGraph. Added LUBM Q9 to the test case. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:52:10 UTC (rev 3944) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 20:47:51 UTC (rev 3945) @@ -20,7 +20,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ + */ /* * Created on Aug 16, 2010 */ @@ -77,8 +77,9 @@ * support of runtime query optimization. A join graph is a collection of * relations and joins which connect those relations. This boils down to a * collection of {@link IPredicate}s (selects on relations) and shared variables - * (which identify joins). - * <p> + * (which identify joins). Operators other than standard joins (including + * optional joins, sort, order by, etc.) must be handled downstream from the + * join graph in a "tail plan". * * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join * Graph Isolation. @@ -86,130 +87,98 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * TODO Some edges can be eliminated by transitivity. For example, given - * - * <pre> - * query: - * - * :- (A loves B), (B loves A), (B marriedTo C). - * - * vertices: - * - * v1=(A loves B); - * v2=(B loves A); - * v3=(B marriedTo C); - * - * edges: - * - * e1=(v1,v2) // JOIN( SCAN(A loves B), SCAN(B loves A)) - * e2=(v1,v3) // JOIN( SCAN(A loves B), SCAN(B marriedTo C)) - * e3=(v2,v3) // JOIN( SCAN(B loves A), SCAN(B marriedTo C)) - * - * It is necessary to execute e1 and either e2 or e3, but not both e2 and e3. - * </pre> - * - * TODO In order to combine pipelining with runtime query optimization we need - * to sample based on the first chunk(s) delivered by the pipeline. If - * necessary, we can buffer multiple chunks for semi-selective queries. - * However, for unselective queries we would accept as many buffers worth - * of bindings as were allowed for a given join and then sample the - * binding sets from left hand side (the buffers) and run those samples - * against the right hand side (the local shard). + * @todo Examine the overhead of the runtime optimizer. Look at ways to prune + * its costs. For example, by pruning the search, by recognizing when the + * query is simple enough to execute directly, by recognizing when we have + * already materialized the answer to the query, etc. */ public class JoinGraph extends PipelineOp { - private static final transient Logger log = Logger.getLogger(JoinGraph.class); - - private static final long serialVersionUID = 1L; + private static final transient Logger log = Logger + .getLogger(JoinGraph.class); - /** - * Known annotations. - */ - public interface Annotations extends PipelineOp.Annotations { + private static final long serialVersionUID = 1L; + + /** + * Known annotations. + */ + public interface Annotations extends PipelineOp.Annotations { + /** - * The vertices of the join graph expressed an an {@link IPredicate}[]. - */ + * The vertices of the join graph expressed an an {@link IPredicate}[]. + */ String VERTICES = JoinGraph.class.getName() + ".vertices"; - + /** - * The initial limit for cutoff sampling (default {@value #DEFAULT_LIMIT}). + * The initial limit for cutoff sampling (default + * {@value #DEFAULT_LIMIT}). */ String LIMIT = JoinGraph.class.getName() + ".limit"; - - int DEFAULT_LIMIT = 100; - } + int DEFAULT_LIMIT = 100; + } + /** - * @see Annotations#VERTICES - */ - public IPredicate[] getVertices() { - - return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); - - } + * @see Annotations#VERTICES + */ + public IPredicate[] getVertices() { - /** - * @see Annotations#LIMIT - */ - public int getLimit() { - - return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); - - } - - public JoinGraph(final NV ...anns) { - + return (IPredicate[]) getRequiredProperty(Annotations.VERTICES); + + } + + /** + * @see Annotations#LIMIT + */ + public int getLimit() { + + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); + + } + + public JoinGraph(final NV... anns) { + this(BOpBase.NOARGS, NV.asMap(anns)); - - } + } + /** * - * TODO We can derive the vertices from the join operators or the join - * operators from the vertices. However, if a specific kind of join - * operator is required then the question is whether we have better - * information to make that choice when the join graph is evaluated or - * before it is constructed. - * - * TODO How we will handle optional joins? Presumably they are outside of - * the code join graph as part of the tail attached to that join - * graph. - * * TODO How can join constraints be moved around? Just attach them where - * ever a variable becomes bound? And when do we filter out variables - * which are not required downstream? Once we decide on a join path - * and execute it fully (rather than sampling that join path). + * ever a variable becomes bound? And when do we filter out variables which + * are not required downstream? Once we decide on a join path and execute it + * fully (rather than sampling that join path). */ - public JoinGraph(final BOp[] args, final Map<String,Object> anns) { + public JoinGraph(final BOp[] args, final Map<String, Object> anns) { - super(args,anns); + super(args, anns); - switch (getEvaluationContext()) { - case CONTROLLER: - break; - default: - throw new UnsupportedOperationException( - Annotations.EVALUATION_CONTEXT + "=" - + getEvaluationContext()); - } + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } - } + } - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - return new FutureTask<Void>(new JoinGraphTask(context)); + return new FutureTask<Void>(new JoinGraphTask(context)); - } + } - /** - * Used to assign row identifiers. - */ + /** + * Used to assign row identifiers. + */ static private final IVariable<Integer> ROWID = Var.var("__rowid"); - /** - * A sample of a {@link Vertex} (an access path). - */ - public static class VertexSample { + /** + * A sample of a {@link Vertex} (an access path). + */ + public static class VertexSample { /** * Fast range count. This will be the same for each sample taken @@ -218,23 +187,22 @@ */ public final long rangeCount; - /** + /** * The limit used to produce the {@link #sample}. */ - public final int limit; + public final int limit; /** * When <code>true</code>, the result is not a sample but the * materialized access path. * - * @todo When <code>true</code>, we could run the join against the - * sample rather than the disk. This would require wrapping the - * sample as an access path. Since all exact samples will be - * pretty small, this is not likely to have any great performance - * benefit. + * TODO When <code>true</code>, we could run the join against the sample + * rather than the disk. This would require wrapping the sample as an + * access path. Since all exact samples will be pretty small, this is + * not likely to have any great performance benefit. */ - public final boolean exact; - + public final boolean exact; + /** * Sample. */ @@ -247,34 +215,35 @@ * @param exact * @param sample */ - public VertexSample(final long rangeCount, final int limit, final boolean exact, final Object[] sample) { + public VertexSample(final long rangeCount, final int limit, + final boolean exact, final Object[] sample) { if (rangeCount < 0L) throw new IllegalArgumentException(); if (limit <= 0) throw new IllegalArgumentException(); - + if (sample == null) throw new IllegalArgumentException(); this.rangeCount = rangeCount; - + this.limit = limit; - + this.exact = exact; - + this.sample = sample; - + } public String toString() { return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; } - - } + } + /** * A vertex of the join graph is an annotated relation (this corresponds to * an {@link IPredicate} with additional annotations to support the adaptive @@ -299,20 +268,20 @@ * The most recently taken sample of the {@link Vertex}. */ VertexSample sample = null; - + Vertex(final IPredicate<?> pred) { if (pred == null) throw new IllegalArgumentException(); - + this.pred = pred; - + } - + public String toString() { return "Vertex{pred=" + pred + ",sample=" + sample + "}"; - + } /** @@ -334,30 +303,31 @@ * Take a sample of the vertex. If the sample is already exact, then * this is a NOP. * - * @param context * @param limit * The sample cutoff. */ - public void sample(final BOpContextBase context, final int limit) { + public void sample(final QueryEngine queryEngine, final int limit) { - if (context == null) + if (queryEngine == null) throw new IllegalArgumentException(); if (limit <= 0) throw new IllegalArgumentException(); - + final VertexSample oldSample = this.sample; - if(oldSample != null && oldSample.exact) { + if (oldSample != null && oldSample.exact) { /* * The old sample is already the full materialization of the * vertex. */ - + return; - + } + + final BOpContextBase context = new BOpContextBase(queryEngine); final IRelation r = context.getRelation(pred); @@ -371,12 +341,12 @@ /* * Materialize the access path. * - * @todo This could be more efficient if we raised it onto the - * AP or if we overrode CHUNK_CAPACITY and the fully buffered + * TODO This could be more efficient if we raised it onto the AP + * or if we overrode CHUNK_CAPACITY and the fully buffered * iterator threshold such that everything was materialized as a * single chunk. */ - + final List<Object> tmp = new ArrayList<Object>((int) rangeCount); final IChunkedIterator<Object> itr = ap.iterator(); @@ -396,25 +366,31 @@ sample = new VertexSample(rangeCount, limit, true/* exact */, tmp.toArray(new Object[0])); - - return; - } + } else { - /* - * Materialize a random sample from the access path. - */ - - final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // - NV.asMap(// + /* + * Materialize a random sample from the access path. + */ + + final SampleIndex sampleOp = new SampleIndex( + new BOp[] {}, // + NV.asMap(// new NV(SampleIndex.Annotations.PREDICATE, pred),// new NV(SampleIndex.Annotations.LIMIT, limit))); - sample = new VertexSample(rangeCount, limit, false/*exact*/, sampleOp - .eval(context)); + sample = new VertexSample(rangeCount, limit, false/* exact */, + sampleOp.eval(context)); + } + + if (log.isInfoEnabled()) + log.info("Sampled: " + sample); + + return; + } - + } /** @@ -449,13 +425,13 @@ * anything. This is not 100%, merely indicative. */ public final int outputCount; - + /** * The ratio of the #of input samples consumed to the #of output samples * generated (the join hit ratio or scale factor). */ public final double f; - + /** * The estimated cardinality of the join. */ @@ -499,12 +475,12 @@ * join. That is, feeding all source tuples into the join gives fewer * than the desired number of output tuples. * - * @todo This field marks this condition and should be used to avoid - * needless recomputation of a join whose exact solution is - * already known. + * TODO This field marks this condition and should be used to avoid + * needless re-computation of a join whose exact solution is already + * known. */ - public final boolean exact; - + public final boolean exact; + /** * The sample of the solutions for the join path. */ @@ -526,40 +502,39 @@ * @param outputCount * The #of binding sets generated before the join was cutoff. */ - EdgeSample(//final VertexSample sourceVertexSample, + EdgeSample( + // final VertexSample sourceVertexSample, final long sourceSampleRangeCount, - final boolean sourceSampleExact, - final int limit, + final boolean sourceSampleExact, final int limit, final int inputCount, final int outputCount, final IBindingSet[] sample) { - if(sample == null) + if (sample == null) throw new IllegalArgumentException(); - -// this.rangeCount = sourceVertexSample.rangeCount; + + // this.rangeCount = sourceVertexSample.rangeCount; this.rangeCount = sourceSampleRangeCount; - + this.limit = limit; - + this.inputCount = inputCount; - + this.outputCount = outputCount; - + f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); estimatedCardinality = (long) (rangeCount * f); - + estimateIsLowerBound = inputCount == 1 && outputCount == limit; - -// final boolean sourceSampleExact = sourceVertexSample.exact; - estimateIsUpperBound = !sourceSampleExact - && outputCount < limit; - + + // final boolean sourceSampleExact = sourceVertexSample.exact; + estimateIsUpperBound = !sourceSampleExact && outputCount < limit; + this.exact = sourceSampleExact && outputCount < limit; - + this.sample = sample; } - + public String toString() { return getClass().getName() + "{inputRangeCount=" + rangeCount + ", limit=" + limit + ", inputCount=" + inputCount @@ -567,10 +542,9 @@ + ", estimatedCardinality=" + estimatedCardinality + ", estimateIsLowerBound=" + estimateIsLowerBound + ", estimateIsUpperBound=" + estimateIsUpperBound - + ", sampleIsExactSolution=" + exact - + "}"; + + ", sampleIsExactSolution=" + exact + "}"; } - + }; /** @@ -603,13 +577,14 @@ * not been sampled. */ public EdgeSample sample = null; - - public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) { + + public Edge(final Vertex v1, final Vertex v2, + final Set<IVariable<?>> shared) { if (v1 == null) throw new IllegalArgumentException(); if (v2 == null) throw new IllegalArgumentException(); - if (shared==null) + if (shared == null) throw new IllegalArgumentException(); if (shared.isEmpty()) throw new IllegalArgumentException(); @@ -624,8 +599,10 @@ * for each vertex. */ public String toString() { - - return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + ")" + + return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + + "), estCard=" + + (sample == null ? "N/A" : sample.estimatedCardinality) + ", shared=" + shared.toString() + ", sample=" + sample + "}"; @@ -635,9 +612,9 @@ * Equality is determined by reference testing. */ public boolean equals(final Object o) { - + return this == o; - + } /** @@ -657,24 +634,25 @@ final int h; if (h1 < h2) { - + h = h1 * 31 + h2; - + } else { - + h = h2 * 31 + h1; - + } hash = h; } - return hash; + return hash; - } - private int hash; + } - /** + private int hash; + + /** * Return the vertex with the smaller estimated cardinality. * * @throws IllegalStateException @@ -684,15 +662,15 @@ if (v1.sample == null) // vertex not sampled. throw new IllegalStateException(); - + if (v2.sample == null) // vertex not sampled. throw new IllegalStateException(); - + return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; - + } - - /** + + /** * Return the vertex with the larger estimated cardinality (the vertex * not returned by {@link #getMinimumCardinalityVertex()}). * @@ -703,12 +681,12 @@ // The vertex with the minimum cardinality. final Vertex o = getMinimumCardinalityVertex(); - + // Return the other vertex. return (v1 == o) ? v2 : v1; - + } - + /** * Estimate the cardinality of the edge. * @@ -716,7 +694,7 @@ * * @return The estimated cardinality of the edge. * - * @throws Exception + * @throws Exception */ public long estimateCardinality(final QueryEngine queryEngine, final int limit) throws Exception { @@ -763,21 +741,22 @@ * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. * - * TODO On subsequent iterations we would probably re-sample [v] - * and we would run against the materialized intermediate result for + * TODO On subsequent iterations we would probably re-sample [v] and + * we would run against the materialized intermediate result for * [v']. */ /* * Convert the source sample into an IBindingSet[]. * - * @todo We might as well do this when we sample the vertex. + * TODO We might as well do this when we sample the vertex. */ final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; { for (int i = 0; i < sourceSample.length; i++) { final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, bset); + BOpContext.copyValues((IElement) v.sample.sample[i], + v.pred, bset); sourceSample[i] = bset; } } @@ -819,7 +798,7 @@ if (limit <= 0) throw new IllegalArgumentException(); - + // Inject a rowId column. sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, sourceSample); @@ -834,15 +813,14 @@ */ final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE,vTarget.pred.setBOpId(3)) - ); + new NV(PipelineJoin.Annotations.PREDICATE, vTarget.pred + .setBOpId(3))); final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// NV.asMap(// new NV(BOp.Annotations.BOP_ID, 2), // - new NV(SliceOp.Annotations.LIMIT, (long)limit), // - new NV( - BOp.Annotations.EVALUATION_CONTEXT, + new NV(SliceOp.Annotations.LIMIT, (long) limit), // + new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER))); // run the cutoff sampling of the edge. @@ -875,31 +853,18 @@ .get()); } finally { // verify no problems. FIXME Restore test of the query. -// runningQuery.get(); + // runningQuery.get(); } } finally { runningQuery.cancel(true/* mayInterruptIfRunning */); } /* - * Note: This needs to be based on the source vertex having the - * minimum cardinality for the Path which is being extended which - * connects via some edge defined in the join graph. If a different - * vertex is chosen as the source then the estimated cardinality - * will be falsely high by whatever ratio the chosen vertex - * cardinality exceeds the one having the minimum cardinality which - * is connected via an edge to the target vertex). - * - * FIXME I am not convinced that this approach is quite right. I am - * also not convinced that this approach will correctly carry the - * additional metadata on the EdgeSample (exact, estimate overflow - * and underflow, etc). [This needs to be the estimated cardinality - * of the path which is being extended by an edge to the target - * vertex.] + * TODO Improve comments here. See if it is possible to isolate a + * common base class which would simplify the setup of the cutoff + * join and the computation of the sample stats. */ -// final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample -// : vTarget.sample; - + final EdgeSample edgeSample = new EdgeSample( sourceSampleRangeCount, sourceSampleExact, limit, inputCount, outputCount, result @@ -911,64 +876,14 @@ return edgeSample; } - + } -// /** -// * A path sample includes the materialized binding sets from the as-executed -// * join path. -// * -// * @todo The sample {@link IBindingSet}[] could be saved with the -// * {@link EdgeSample}. However, when we are sampling a join path we -// * want to associate the net sample with the path, not each edge in -// * that path, because we need to be able to generate join paths in -// * which the path is extended from any vertex already part of the path -// * to any vertex which has not yet incorporated in the path and has -// * not yet been executed. To do this we need to intermediate results -// * for the path, which includes all variables bound by each join for -// * each edge in the path, not just on an edge by edge basis. -// */ -// public static class PathSample extends EdgeSample { -// -// /** -// * <code>true</code> if the sample is the exact solution for the join path. -// */ -// private final boolean exact; -// -// /** -// * The sample of the solutions for the join path. -// */ -// private final IBindingSet[] sample; -// -// PathSample(final long inputRangeCount, final int limit, -// final int inputCount, final int outputCount, -// final boolean exact, final IBindingSet[] sample) { -// -// super(inputRangeCount, limit, inputCount, outputCount); -// -// if(sample == null) -// throw new IllegalArgumentException(); -// -// this.exact = exact; -// -// this.sample = sample; -// -// } -// -// public String toString() { -// -// return super.toString() + ":{exact=" + exact + ", sampleSize=" -// + sample.length + "}"; -// -// } -// -// } - /** * A sequence of {@link Edge}s (aka join steps). */ public static class Path { - + /** * An immutable ordered list of the edges in the (aka the sequence of * joins represented by this path). @@ -995,19 +910,6 @@ */ final public long cumulativeEstimatedCardinality; - /** - * The vertex at which the path from which this path was derived - * stopped. This is initialized to the source vertex when entering the - * chainSample() method. - * - * @todo This is used by ROX to only grow the path from its end. We - * could of course just look at the last edge in the path. - * However, I think that I prefer to grow a path from any - * branching vertex as long as the path does not duplicate any - * path already generated (including those which were pruned). - */ - private Vertex stopVertex; - public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Path{"); @@ -1015,7 +917,8 @@ for (Edge e : edges) { if (!first) sb.append(","); - sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); + sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + + ")"); first = false; } sb.append(",cumEstCard=" + cumulativeEstimatedCardinality @@ -1042,23 +945,27 @@ if (e == null) throw new IllegalArgumentException(); - + if (e.sample == null) - throw new IllegalArgumentException("Not sampled: "+e); - + throw new IllegalArgumentException("Not sampled: " + e); + this.edges = Collections.singletonList(e); - + this.sample = e.sample; - + this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; - + } /** * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} - * @param edges The edges in the new path. - * @param cumulativeEstimatedCardinality The cumulative estimated cardinality of the new path. - * @param sample The sample from the last + * + * @param edges + * The edges in the new path. + * @param cumulativeEstimatedCardinality + * The cumulative estimated cardinality of the new path. + * @param sample + * The sample from the last */ private Path(final List<Edge> edges, final long cumulativeEstimatedCardinality, @@ -1066,19 +973,19 @@ if (edges == null) throw new IllegalArgumentException(); - + if (cumulativeEstimatedCardinality < 0) throw new IllegalArgumentException(); - + if (sample == null) throw new IllegalArgumentException(); this.edges = Collections.unmodifiableList(edges); - + this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; - + this.sample = sample; - + } /** @@ -1132,7 +1039,7 @@ final Vertex[] v1 = getVertices(); final Vertex[] v2 = p.getVertices(); - + if (v1.length < v2.length) { // Proven false since the other set is larger. return false; @@ -1164,7 +1071,7 @@ } return true; - + } /** @@ -1172,8 +1079,8 @@ * * @return The vertices (in path order). * - * @todo this could be rewritten without the toArray() using a method - * which visits the vertices of a path in any order. + * TODO This could be rewritten without the toArray() using a + * method which visits the vertices of a path in any order. */ public Vertex[] getVertices() { final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); @@ -1190,7 +1097,7 @@ * * @param p * The given path. - * + * * @return <code>true</code> if this path begins with the given path. */ public boolean beginsWith(final Path p) { @@ -1210,10 +1117,10 @@ return false; } } - + return true; } - + /** * Add an edge to a path, computing the estimated cardinality of the new * path, and returning the new path. @@ -1272,11 +1179,11 @@ * cardinality then we should prefer join paths which achieve the * same reduction in cardinality with less 'intermediate * cardinality' - that is, by examining fewer possible solutions. + * [In fact, the estimated (cumulative) cardinality might not be a + * good reflection of the IOs to be done -- this needs more + * thought.] */ -// final IBindingSet[] sample = BOpUtility.injectRowIdColumn(ROWID, -// 0/* start */, this.sample.sample); - final EdgeSample edgeSample = e.estimateCardinality(queryEngine, limit, sourceVertex, targetVertex, this.sample.estimatedCardinality, this.sample.exact, @@ -1286,9 +1193,9 @@ final List<Edge> edges = new ArrayList<Edge>( this.edges.size() + 1); - + edges.addAll(this.edges); - + edges.add(e); final long cumulativeEstimatedCardinality = this.cumulativeEstimatedCardinality @@ -1303,58 +1210,58 @@ return tmp; } - + } - -// /** -// * Equality is defined by comparison of the unordered set of edges. -// */ -// public boolean equals(final Object o) { -// if (this == o) -// return true; -// if (!(o instanceof Path)) -// return false; -// final Path t = (Path) o; -// if (edges.length != t.edges.length) -// return false; -// for (Edge e : edges) { -// boolean found = false; -// for (Edge x : t.edges) { -// if (x.equals(e)) { -// found = true; -// break; -// } -// } -// if (!found) -// return false; -// } -// return true; -// } -// -// /** -// * The hash code of path is defined as the bit-wise XOR of the hash -// * codes of the edges in that path. -// */ -// public int hashCode() { -// -// if (hash == 0) { -// -// int result = 0; -// -// for(Edge e : edges) { -// -// result ^= e.hashCode(); -// -// } -// -// hash = result; -// -// } -// return hash; -// -// } -// private int hash; + // /** + // * Equality is defined by comparison of the unordered set of edges. + // */ + // public boolean equals(final Object o) { + // if (this == o) + // return true; + // if (!(o instanceof Path)) + // return false; + // final Path t = (Path) o; + // if (edges.length != t.edges.length) + // return false; + // for (Edge e : edges) { + // boolean found = false; + // for (Edge x : t.edges) { + // if (x.equals(e)) { + // found = true; + // break; + // } + // } + // if (!found) + // return false; + // } + // return true; + // } + // + // /** + // * The hash code of path is defined as the bit-wise XOR of the hash + // * codes of the edges in that path. + // */ + // public int hashCode() { + // + // if (hash == 0) { + // + // int result = 0; + // + // for(Edge e : edges) { + // + // result ^= e.hashCode(); + // + // } + // + // hash = result; + // + // } + // return hash; + // + // } + // private int hash; + } /** @@ -1364,13 +1271,13 @@ * * @param a * An array of join paths. - * + * * @return A table with that data. */ static public String showTable(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - for(int i=0; i<a.length; i++) { + for (int i = 0; i < a.length; i++) { final Path x = a[i]; if (x.sample == null) { f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); @@ -1381,18 +1288,18 @@ } sb.append(", ["); final Vertex[] vertices = x.getVertices(); - for(Vertex v : vertices) { + for (Vertex v : vertices) { f.format("%2d ", v.pred.getId()); } sb.append("]"); -// for (Edge e : x.edges) -// sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() -// + ")"); + // for (Edge e : x.edges) + // sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() + // + ")"); sb.append("\n"); } return sb.toString(); } - + /** * A join graph (data structure and methods only). * @@ -1442,9 +1349,6 @@ */ private final Edge[] E; - // The set of vertices which have been consumed by the query. - private final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); - public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } @@ -1457,28 +1361,25 @@ final StringBuilder sb = new StringBuilder(); sb.append("JoinGraph"); sb.append("{V=["); - for(Vertex v : V) { - sb.append("\nV["+v.pred.getId()+"]="+v); + for (Vertex v : V) { + sb.append("\nV[" + v.pred.getId() + "]=" + v); } sb.append("],E=["); - for(Edge e : E) { - sb.append("\n"+e); + for (Edge e : E) { + sb.append("\n" + e); } - sb.append("\n],ExecutedVertices=["); - for(Vertex v : executedVertices) { - sb.append("\nV["+v.pred.getId()+"]="+v); - } sb.append("\n]}"); return sb.toString(); - -// return super.toString() + "{V=" + Arrays.toString(V) + ",E=" -// + Arrays.toString(E) + ", executedVertices="+executedVertices+"}"; + + // return super.toString() + "{V=" + Arrays.toString(V) + ",E=" + // + Arrays.toString(E) + + // ", executedVertices="+executedVertices+"}"; } - + public JGraph(final IPredicate[] v) { - if (v == null) - throw new IllegalArgumentException(); + if (v == null) + throw new IllegalArgumentException(); if (v.length < 2) throw new IllegalArgumentException(); @@ -1527,18 +1428,200 @@ } /** + * + * @param queryEngine + * @param limit + * The limit for sampling a vertex and the initial limit for + * cutoff join evaluation. A reasonable value is + * <code>100</code>. + * + * @throws Exception + */ + public void runtimeOptimizer(final QueryEngine queryEngine, + final int limit) throws Exception { + + // // The set of vertices which have been consumed by the query. + // final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); + + // Setup the join graph. + Path[] paths = round0(queryEngine, limit, 2/* nedges */); + + /* + * The input paths for the first round have two vertices (one edge + * is two vertices). Each round adds one more vertex, so we have + * three vertices by the end of round 1. We are done once we have + * generated paths which include all vertices. + * + * This occurs at round := nvertices - 1 + */ + + final int nvertices = V.length; + + int round = 1; + + while (round < nvertices - 1) { + + paths = expand(queryEngine, limit, round++, paths); + + } + + /* + * FIXME Choose the best join path and execute it (or return the + * evaluation order to the caller). + * + * FIXME This must either recognize each time a join path is known + * to dominate all other join paths and then execute it or iterator + * until the total join path is decided and then execute the + * original query using that join path. + * + * @todo When executing the query, it is actually being executed as + * a subquery. Therefore we have to take appropriate care to ensure + * that the results are copied out of the subquery and into the + * parent query. + * + * @todo When we execute the query, we should clear the references + * to the sample (unless they are exact, in which case they can be + * used as is) in order to release memory associated with those + * samples if the query is long running. + */ + + } + + /** + * Choose the starting vertices. + * + * @param nedges + * The maximum #of edges to choose. + */ + public Path[] choseStartingPaths(final int nedges) { + + final List<Path> tmp = new LinkedList<Path>(); + + // All edges in the graph. + final Edge[] edges = getEdges().toArray(new Edge[0]); + + // Sort them by ascending expected cardinality. + Arrays.sort(edges, 0, edges.length, + EstimatedEdgeCardinalityComparator.INSTANCE); + + // Choose the top-N edges (those with the least cardinality). + for (int i = 0; i < edges.length && i < nedges; i++) { + + tmp.add(new Path(edges[i])); + + } + + final Path[] a = tmp.toArray(new Path[tmp.size()]); + + return a; + + } + + /** + * Choose up to <i>nedges</i> edges to be the starting point. + * + * @param queryEngine + * The query engine. + * @param limit + * The cutoff used when sampling the vertices and when + * sampling the edges. + * @param nedges + * The maximum #of edges to choose. Those having the smallest + * expected cardinality will be chosen. + * + * @throws Exception + */ + public Path[] round0(final QueryEngine queryEngine, final int limit, + final int nedges) throws Exception { + + /* + * Sample the vertices. + */ + sampleVertices(queryEngine, limit); + + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("Vertices:\n"); + for (Vertex v : V) { + sb.append(v.toString()); + sb.append("\n"); + } + log.info(sb.toString()); + } + + /* + * Estimate the cardinality for each edge. + * + * TODO It would be very interesting to see the variety and/or + * distribution of the values bound when the edge is sampled. This + * can be easily done using a hash map with a counter. That could + * tell us a lot about the cardinality of the next join path + * (sampling the join path also tells us a lot, but it does not + * explain it as much as seeing the histogram of the bound values). + * I believe that there are some interesting online algorithms for + * computing the N most frequent observations and the like which + * could be used here. + */ + estimateEdgeWeights(queryEngine, limit); + + if (log.isInfoEnabled()) { + final StringBuilder sb = new StringBuilder(); + sb.append("Edges:\n"); + for (Edge e : E) { + sb.append(e.toString()); + sb.append("\n"); + } + log.info(sb.toString()); + } + + /* + * Choose the initial set of paths. + */ + final Path[] paths_t0 = choseStartingPaths(nedges); + + if (log.isInfoEnabled()) + log.info("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); + + return paths_t0; + + } + + /** * Do one breadth first expansion. * * @param queryEngine + * The query engine. * @param limit + * The limit (this is automatically multiplied by the round + * to increase the sample size in each round). * @param round + * The round number in [1:n]. * @param a - * @return + * The set of paths from the previous round. For the first + * round, this is formed from the initial set of edges to + * consider. + * + * @return The set of paths which survived pruning in this round. + * * @throws Exception */ - final public Path[] expand(final QueryEngine queryEngine, final int limit, + public Path[] expand(final QueryEngine queryEngine, int limit, final int round, final Path[] a) throws Exception { + if (queryEngine == null) + throw new IllegalArgumentException(); + if (limit <= 0) + throw new IllegalArgumentException(); + if (round <= 0) + throw new IllegalArgumentException(); + if (a == null) + throw new IllegalArgumentException(); + if (a.length == 0) + throw new IllegalArgumentException(); + + // increment the limit by itself in each round. + limit *= round; + final List<Path> tmp = new LinkedList<Path>(); // First, copy all existing paths. @@ -1546,16 +1629,20 @@ tmp.add(x); } + // Vertices are inserted into this collection when they are resampled. + final Set<Vertex> resampled = new LinkedHashSet<Vertex>(); + // Then expand each path. for (Path x : a) { if (x.edges.size() < round) { - + // Path is from a previous round. continue; - } + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); + // Check all edges in the graph. for (Edge edgeInGraph : E) { @@ -1573,18 +1660,27 @@ continue; } - final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; - - if(used.contains(newVertex)) { + final Vertex newVertex = v1Found ? edgeInGraph.v2 + : edgeInGraph.v1; + + if (used.contains(newVertex)) { // Vertex already used to extend this path. continue; } - + // add the new vertex to the set of used vertices. used.add(newVertex); + + if (!resampled.add(newVertex)&&round>1) { + /* + * Resample this vertex before we sample a new edge + * which targets this vertex. + */ + newVertex.sample(queryEngine, limit); + } // Extend the path to the new vertex. - final Path p = x.addEdge(queryEngine, limit * round, + final Path p = x.addEdge(queryEngine, limit, edgeInGraph); // Add to the set of paths for this round. @@ -1596,19 +1692,22 @@ final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); - System.err.println("\n*** Paths @ round=" + round + "\n" - + JoinGraph.showTable(paths_tp1)); + if (log.isDebugEnabled()) + log.debug("\n*** round=" + round + " : generated paths\n" + + JoinGraph.showTable(paths_tp1)); final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); - System.err.println("\n*** Paths @ round=" + round - + " (after pruning)\n" - + JoinGraph.showTable(paths_tp1_pruned)); + if (log.isInfoEnabled()) + log.info("\n*** round=" + round + ": paths{in=" + a.length + + ",considered=" + paths_tp1.length + ",out=" + + paths_tp1_pruned.length + "}\n" + + JoinGraph.showTable(paths_tp1_pruned)); return paths_tp1_pruned; - + } - + /** * Return the {@link Vertex} whose {@link IPredicate} is associated with * the given {@link BOp.Annotations#BOP_ID}. @@ -1619,8 +1718,8 @@ * vertex in the join graph. */ public Vertex getVertex(int bopId) { - for(Vertex v : V) { - if(v.pred.getId()==bopId) + for (Vertex v : V) { + if (v.pred.getId() == bopId) return v; } return null; @@ -1639,7 +1738,7 @@ * the join graph. */ public Edge getEdge(Vertex v1, Vertex v2) { - for(Edge e : E) { + for (Edge e : E) { if (e.v1 == v1 && e.v2 == v2) return e; if (e.v1 == v2 && e.v2 == v1) @@ -1647,22 +1746,23 @@ } return null; } - + /** - * Obtain a sample and estimated cardinality (fast range count) for each vertex. + * Obtain a sample and estimated cardinality (fast range count) for each + * vertex. * - * @param context + * @param queryEngine * @param limit * The sample size. */ - public void sampleVertices(final BOpContextBase context, final int limit) { + public void sampleVertices(final QueryEngine queryEngine, final int limit) { for (Vertex v : V) { - v.sample(context, limit); - + v.sample(queryEngine, limit); + } - + } /** @@ -1687,8 +1787,7 @@ } - e.estimateCardinality( - queryEngine, limit); + e.estimateCardinality(queryEngine, limit); } @@ -1706,14 +1805,14 @@ * are no {@link Edge}s having an estimated cardinality. */ public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) { - + long minCard = Long.MIN_VALUE; Edge minEdge = null; for (Edge e : E) { if (e.sample == null) { - + // Edge has not been sampled. continue; @@ -1721,12 +1820,12 @@ if (visited != null && (visited.contains(e.v1) || visited.contains(e.v2))) { - + // A vertex of that edge has already been consumed. continue; - + } - + final long estimatedCardinality = e.sample.estimatedCardinality; if (minEdge == null || estimatedCardinality < minCard) { @@ -1740,22 +1839,24 @@ } return minEdge; - + } -// /** -// * Return the {@link Edge} having the minimum estimated cardinality out -// * of those edges whose cardinality has been estimated. -// * -// * @return The minimum cardinality edge -or- <code>null</code> if there -// * are no {@link Edge}s having an estimated cardinality. -// */ -// public Edge getMinimumCardinalityEdge() { -// -// return getMinimumCardinalityEdge(null); -// -// } - + // /** + // * Return the {@link Edge} having the minimum estimated cardinality + // out + // * of those edges whose cardinality has been estimated. + // * + // * @return The minimum cardinality edge -or- <code>null</code> if + // there + // * are no {@link Edge}s having an estimated cardinality. + // */ + // public Edge getMinimumCardinalityEdge() { + // + // return getMinimumCardinalityEdge(null); + // + // } + /** * Return the #of edges in which the given vertex appears where the * other vertex of the edge does not appear in the set of visited @@ -1765,7 +1866,7 @@ * The vertex. * @param visited * A set of vertices to be excluded from consideration. - * + * * @return The #of such edges. */ public int getEdgeCount(final Vertex v, final Set<Vertex> visited) { @@ -1787,17 +1888,17 @@ * @return Those edges. */ public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) { - + if (v == null) throw new IllegalArgumentException(); if (visited != null && visited.contains(v)) return Collections.emptyList(); - + final List<Edge> tmp = new LinkedList<Edge>(); - + for (Edge e : E) { - + if (v.equals(e.v1) || v.equals(e.v2)) { if (visited != null) { @@ -1811,464 +1912,16 @@ } tmp.add(e); - - } - - } - - return tmp; - - } - /** - * - * @param queryEngine - * @param limit - * The limit for sampling a vertex and the initial limit for - * cutoff join evaluation. A reasonable value is - * <code>100</code>. - * @param timeout - * The timeout for cutoff join path evaluation - * (milliseconds). A reasonable value is <code>100</code>ms. - * @throws Exception - * - * FIXME This must either return the query plan or copy the - * results as they are materialized to the sink for the join - * graph operator. - * - * - * @todo We do not need the [timeout] as long as we evaluate each cutoff - * join separately. The limited number of input solutions to the - * join automatically limits the amount of work the join can do. - * However, if we do cutoff evaluation of a series of edges then - * it is possible to do a lot of work in order to find [limit] - * solutions. In this case, a [timeout] protects us against join - * paths which have poor correlations and large cardinality for - * their vertices (a lot of solutions are considered to produce - * very few results). - */ - public void runtimeOptimizer(final QueryEngine queryEngine, - final int limit, final long timeout) throws Exception { - - final BOpContextBase context = new BOpContextBase(queryEngine); - - if (log.isInfoEnabled()) - log.info("limit=" + limit); - - /* - * Sample the vertices. - * - * TODO Sampling for scale-out not yet finished. - * - * FIXME Re-sampling will always produce the same sample depending - * on the sample operator impl (it should be random, but it is not). - */ - sampleVertices(context, limit); - - if(log.isDebugEnabled()) - log.debug("joinGraph=" + toString()); - - /* - * Estimate the cardinality and weights for each edge, obtaining the - * Edge with the minimum estimated cardinality. This will be the - * starting point for the join graph evaluation. - * - * @todo It would be very interesting to see the variety and/or - * distribution of the values bound when the edge is sampled. This - * can be easily done using a hash map with a counter. That could - * tell us a lot about the cardinality of the next join path - * (sampling the join path also tells us a lot, but it does not - * explain it as much as seeing the histogram of the bound values). - * I believe that there are some interesting online algorithms for - * computing the N most frequent observations and the like which - * could be used here. - * - * TODO ROX is choosing the starting edge based on the minimum - * estimated cardinality. However, it is possible for there to be - * more than one edge with an estimated cardinality which is - * substantially to the minimum estimated cardinality. It would be - * best to start from multiple vertices so we can explore join paths - * which begin with those alternative starting vertices as well. - * (LUBM Q2 is an example of such a query). - */ - estimateEdgeWeights(queryEngine, limit); - - while(moreEdgesToVisit(executedVertices)) { - - // Decide on the next join path to execute. - final Path p = chainSample(queryEngine, limit, timeout); - - for(Edge e : p.edges) { - - /* - * FIXME Finish the algorithm. - * - * Execute the edge. We have two choices here. If join path - * is currently materialized and the expected cardinality of - * the edge is small to moderate (LTE limit * 10) then we - * can simply materialize the result of evaluating the edge. - * - * In this case, we replace the sample for the vertex with - * the actual result of evaluating the edge. [This concept - * pre-supposes that a vertex sample is the set of matching - * elements and that we do not store the binding sets which - * satisfy the join path. I think that this is perhaps the - * primary point of difference for MonetDB/ROX and bigdata. - * bigdata is working with IBindingSet[]s and should - * associate the set of intermediate solutions which - * represent the materialized intermediate result with the - * join path, not the vertex or the edge.] - * - * Otherwise, either the join path is already only a sample - * or the expected cardinality of this edge is too large so - * we do the cutoff evaluation of the edge in order to - * propagate a sample. - * - * 1. exec(e,T1(v1),T2(v2)) - */ - - executedVertices.add(e.v1); - executedVertices.add(e.v2); - } - /* - * Re-sample edges branching from any point in the path which we - * just executed. The purpose of this is to improve the - * detection of correlations using a materialized sample of the - * intermediate results (which will be correlated) rather than - * independent samples of the vertices (which are not - * correlated). - * - * Also, note that ROX only samples vertices which satisfy the - * zero investment property and therefore there could be - * vertices which have not yet been sampled if some vertices are - * not associated with an index. - * - * @todo This could just be another call to sampleVertices() and - * estimateEdgeWeights() if those methods accepted the set of - * already executed vertices so they could make the proper - * exclusions (or if we had a method which returned the - * un-executed vertices and/or edges). - */ -// e.v1.sample(context, limit); -// e.v2.sample(context, limit); - } - } + return tmp; - /** - * Return <code>true</code> iff there exists at least one {@link Edge} - * branching from a vertex NOT found in the set of vertices which have - * visited. - * - * @param visited - * A set of vertices. - * - * @return <code>true</code> if there are more edges to explore. - */ - private boolean moreEdgesToVisit(final Set<Vertex> visited) { - - // Consider all edges. - for(Edge e : E) { - - if (visited.contains(e.v1) && visited.contains(e.v2)) { - /* - * Since both vertices for this edge have been executed the - * edge is now redundant. Either it was explicitly executed - * or another join path was used which implies the edge by - * transitivity in the join graph. - */ - continue; - } - - /* - * We found a counter example (an edge which has not been - * explored). - */ - if (log.isTraceEnabled()) - log.trace("Edge has not been explored: " + e); - - return true; - - } - - // No more edges to explore. - return false; - } /** - * E - * - * @param limit - * @return - * - * TODO How to indicate the set of edges which remain to be - * explored? - * - * @throws Exception - */ - public Path chainSample(final QueryEngine queryEngine, final int limit, - final long timeout) throws Exception { - - final Vertex source; - { - /* - * Find the edge having the minimum estimated cardinality. - */ - final Edge e = getMinimumCardinalityEdge(executedVertices); - - if (e == null) - throw new RuntimeException("No weighted edges."); - - /* - * Decide which vertex of that edge will be the starting point - * for chain sampling (if any). - */ - if (getEdgeCount(e.v1, executedVertices) > 1 - || getEdgeCount(e.v2, executedVertices) > 1) { - /* - * There is at least one vertex of that edge which branches. - * Chain sampling will begin with the vertex of that edge - * which has the lower estimated cardinality. - * - * TODO It could be that the minimum cardinality vertex does - * not branch. What happens for that code path? Do we just - * execute that edge and then reenter chain sampling? If so, - * it would be cleared to test for this condition explicitly - * up front. - */ - source = e.getMinimumCardinalityVertex(); - } else { - /* - * There is no vertex which branches for that edge. This is - * a stopping condition for chain sampling. The path - * consisting of just that edge is returned and should be - * executed by the caller. - */ - return new Path(e); - } - - } - - /* - * Setup some data structures for one or more breadth first - * expansions of the set of path(s) which are being sampled. This - * iteration will continue until we reach a stopping condition. - */ - - // The set of paths being considered. - final List<Path> paths = new LinkedList<Path>(); - - { - // The current path. - final Path p = new Path(); - - p.stopVertex = source; -// p.inputSample = source.sample; - paths.add(p); - } - - // initialize the cutoff to the limit used to sample the vertices. - int cutoff = limit; - long cutoffMillis = timeout; - - final Set<Vertex> unsampled = new LinkedHashSet<Vertex>( - executedVertices); - - /* - * One breadth first expansion of the join paths. - * - * Note: This expands each join path one vertex in each iteration. - * However, different join paths can expand from different vertices. - * - * For ROX, each join path is expanded from the last vertex which - * was added to that join path so the initial edge for each join - * path strongly determines the edges in the join graph along which - * that join path can grow. - * - * For bigdata, we can grow the path from any vertex already in the - * path to any vertex which (a) is not yet in the path; and (b) has - * not yet been evaluated. - * - * This suggests that this loop must consider each of the paths to - * decide whether that path can be extended. - */ - while (moreEdgesToVisit(unsampled)) { - - // increment the cutoff. - cutoff += limit; - cutoffMillis += timeout; - - // Consider each path. - for(Path p : paths) { - - /* - * The vertex at which we stopped expanding that path the - * last time. - * - * TODO ROX might have to traverse vertex to vertex along - * edges, but we can execute any edge whose preconditions - * have been satisfied. - */ - final Vertex v = p.stopVertex; - - // TODO depends on the notion of the paths remaining. - if (getEdgeCount(v, null/*executed+sampled(p)*/) > 0) { - /* - * This path branches at this vertex, so remove the old - * path 1st. - */ - paths.remove(p); - } - - // For each edge which is a neighbor of the vertex [v]. - final List<Edge> neighbors = null; - for(Edge e : neighbors) { - // 1. append the edge to the path - final Path p1 = p.addEdge(queryEngine, cutoff, e); - // 3. add the path to paths. - paths.add(p1); - } - - } - - final Path p = getSelectedJoinPath(paths.toArray(new Path[paths.size()])); - - if(p != null) { - - return p; - - } - - } // while(moreEdgesToSample) - - final Path p = getBestAlternativeJoinPath(paths.toArray(new Path[paths.size()])); - - if(p != null) { - - return p; - - } - - // TODO ROX as given can return null here, which looks like a bug. - return null; - - } // chainSample() - - /** - * Return the path which is selected by the termination criteria - * (looking for a path which dominates the alternatives). - * - * @param a - * An array of {@link Path}s to be tested. - * - * @return The selected path -or- <code>null</code> if none of the paths - * is selected. - * - * @todo Should we only explore beneath the diagonal? - * - * @todo What is the basis for comparing the expected cardinality of - * join paths? Where one path is not simply the one step extension - * of the other. - * <p> - * This rule might only be able to compare the costs for paths in - * which one path directly extends another. - * <p> - * It is not clear that this code is comparing all paths which - * need to be compared. - * - * @todo I have restated the termination rule as follows. - * <p> - * If there is a path [p] whose total cost is LTE the cost of - * executing just its last edge [e], then the path [p] dominates - * all paths beginning with edge [e]. The dominated paths should - * be pruned. - * <p> - * If there is a path, [p], which is an unordered extension of - * another path, [p1] (the vertices of p are a superset of the - * vertices of p1), and the cost of [p] is LTE the cost of [p1], - * then [p] dominates [p1]. The dominated paths should be pruned. - * <p> - * If there is a path, [p], which has the same vertices as a path - * [p1] and the cost of [p] is LTE the cost of [p1], then [p] - * dominates (or is equivalent to) [p1]. The path [p1] should be - * pruned. - * - * For a given path length [l], if no paths of length [l] remain - * then the minimum cost path of length GT [l] may be executed. - * - * @todo Due to sampling error and the desire to be robust to small - * differences in the expected cost of an operation, we should - * only consider two significant digits when comparing estimates - * of cost. E.g., 990 and 1000 should not be differentiated as - * they are the same within the sampling error. This should be - * used to chose all starting vertices which have the same minimum - * cardinality. - */ - public Path getSelectedJoinPath(final Path[] a) { - final StringBuilder sb = new StringBuilder(); - final Formatter f = new Formatter(sb); - Path p = null; - for (int i = 0; i < a.length; i++) { - final Path Pi = a[i]; - if (Pi.sample == null) - throw new RuntimeException("Not sampled: " + Pi); - for (int j = 0; j < a.length; j++) { - if (i == j) - continue; - final Path Pj = a[j]; - if (Pj.sample == null) - throw new RuntimeException("Not sampled: " + Pj); - /* - * FIXME This needs to compare the cost of Pj given path Pi - * against the cost of Pj when executed as a single edge (or - * by any other alternative join path sequence). The choice - * of Pi and Pj is not coherent and the same value of costPj - * is being used for both sides of the equation. - */ - final long costPi = Pi.sample.estimatedCardinality; - final double sfPi = Pi.sample.f; - final long costPj = Pj.sample.estimatedCardinality; - final long expectedCombinedCost = costPi - + (long) (sfPi * costPj); - /* - * @todo I think that LTE makes more sense here since having - * the same net cardinality for a given edge after - * performing more steps would appear to be worth while. - */ - final boolean lte = expectedCombinedCost <= costPj; - { - f - .format( - ... [truncated message content] |
From: <tho...@us...> - 2010-11-12 17:52:18
|
Revision: 3944 http://bigdata.svn.sourceforge.net/bigdata/?rev=3944&view=rev Author: thompsonbry Date: 2010-11-12 17:52:10 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Added the path prunning logic into JGraph.expand() Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:28:10 UTC (rev 3943) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:52:10 UTC (rev 3944) @@ -1554,9 +1554,9 @@ continue; } - + // The set of vertices used to expand this path in this round. final Set<Vertex> used = new LinkedHashSet<Vertex>(); - + // Check all edges in the graph. for (Edge edgeInGraph : E) { // Figure out which vertices are already part of this path. @@ -1589,12 +1589,23 @@ // Add to the set of paths for this round. tmp.add(p); - + } } - return tmp.toArray(new Path[tmp.size()]); + final Path[] paths_tp1 = tmp.toArray(new Path[tmp.size()]); + + System.err.println("\n*** Paths @ round=" + round + "\n" + + JoinGraph.showTable(paths_tp1)); + + final Path[] paths_tp1_pruned = pruneJoinPaths(paths_tp1); + + System.err.println("\n*** Paths @ round=" + round + + " (after pruning)\n" + + JoinGraph.showTable(paths_tp1_pruned)); + + return paths_tp1_pruned; } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:28:10 UTC (rev 3943) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:52:10 UTC (rev 3944) @@ -632,133 +632,12 @@ System.err.println("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); -// final Path selected_t0 = g.getSelectedJoinPath(paths_t0); -// -// if (selected_t0 != null) { -// -// System.err.println("Selected path: " + selected_t0); -// -// } - - /* - * t1 - */ - -// /* -// * The set of one step extensions of those paths. -// * -// * @todo build this programmatically by finding the set of edges -// * branching from the existing paths to a vertex not already part of -// * the existing paths and having a total set of vertices which is -// * distinct from all other paths already generated in this breadth -// * first expansion of the search space. (ROX further constrains the -// * new paths to extend the stop vertex of the path from which they -// * are derived.) -// * -// * @todo always label edges by either minimum bopId or minimum -// * estimated cardinality (with tie broken by bopId)? When extending -// * a path in which more than one edge can reach the target vertex, -// * always chose the edge having the source vertex with the minimum -// * cardinality? -// */ -// final Path[] paths_t1 = new Path[] {// -// // t0 -// p0, // (2,3) -// p1, // (2,4) -// p2, // (4,1) -// // t1 -// p0.addEdge(queryEngine, limit*2, g.getEdge(v2, v4)), // aka (v3,v4) -// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v0)), // -// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v5)), // -// // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v1)), // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // -// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v5)), // -// // -// p2.addEdge(queryEngine, limit*2, g.getEdge(v1, v5)), // aka (4,5) -// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // -// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v2)), // -// - /* - * *** Paths @ t1 - -p[ 1] 0.69, 68931 168831, [ 2 3 0 ] -p[ 2] 1.00, 99900 199800, [ 2 3 4 ] -p[ 3] 1.00, 99900 199800, [ 2 3 5 ] -p[ 5] 1.00, 999 1998, [ 2 4 1 ] -p[ 6] 100.00, 99900 100899, [ 2 4 3 ] -p[ 7] 20.00, 19980 20979, [ 2 4 5 ] -p[ 9] 16.67, 40650 43089, [ 1 4 5 ] -p[10] 1.00, 2439 4878, [ 1 4 2 ] -p[11] 5.00, 12195 14634, [ 1 4 3 ] - - */ -// }; int round = 1; + final Path[] paths_t1 = g.expand(queryEngine, limit, round++, paths_t0); + final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1); + final Path[] paths_t3 = g.expand(queryEngine, limit, round++, paths_t2); + final Path[] paths_t4 = g.expand(queryEngine, limit, round++, paths_t3); - final Path[] paths_t1 = g.expand(queryEngine, limit, round, paths_t0); - - System.err.println("\n*** Paths @ t1\n" - + JoinGraph.showTable(paths_t1)); - - final Path[] paths_t1_pruned = g.pruneJoinPaths(paths_t1); - - System.err.println("\n*** Paths @ t1 (after pruning)\n" - + JoinGraph.showTable(paths_t1_pruned)); - - /* - * t2 - */ - - final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1_pruned); - - System.err.println("\n*** Paths @ t2\n" - + JoinGraph.showTable(paths_t2)); - - final Path[] paths_t2_pruned = g.pruneJoinPaths(paths_t2); - - System.err.println("\n*** Paths @ t2 (after pruning)\n" - + JoinGraph.showTable(paths_t2_pruned)); - - -/* -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (0 5) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (2 4) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 4) -p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 5) - -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 3) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 5) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (1 5) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (2 4) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (3 4) -p[ 5] 1.00, 99900 199800, (2 3) (3 5) (4 5) - -p[ 6] 1.00, 999 1998, (2 4) (1 4) (1 5) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (2 3) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (3 4) -p[ 6] 1.00, 999 1998, (2 4) (1 4) (4 5) - -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (0 3) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (1 4) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (3 5) -p[ 7] 100.00, 99900 100899, (2 4) (3 4) (4 5) - -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (0 5) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 4) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 5) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (2 3) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 4) -p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 5) */ - -// final Path selected_t1 = g.getSelectedJoinPath(paths_t1); -// -// if (selected_t1 != null) { -// -// System.err.println("Selected path: " + selected_t1); -// -// } - } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 17:28:16
|
Revision: 3943 http://bigdata.svn.sourceforge.net/bigdata/?rev=3943&view=rev Author: thompsonbry Date: 2010-11-12 17:28:10 +0000 (Fri, 12 Nov 2010) Log Message: ----------- more on runtime query optimizer Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -1184,6 +1184,35 @@ final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); return a; } + + /** + * Return <code>true</code> if this path begins with the given path. + * + * @param p + * The given path. + * + * @return <code>true</code> if this path begins with the given path. + */ + public boolean beginsWith(final Path p) { + + if (p == null) + throw new IllegalArgumentException(); + + if (p.edges.size() > edges.size()) { + // Proven false since the caller's path is longer. + return false; + } + + for (int i = 0; i < p.edges.size(); i++) { + final Edge eSelf = edges.get(i); + final Edge eOther = p.edges.get(i); + if (eSelf != eOther) { + return false; + } + } + + return true; + } /** * Add an edge to a path, computing the estimated cardinality of the new @@ -1498,6 +1527,78 @@ } /** + * Do one breadth first expansion. + * + * @param queryEngine + * @param limit + * @param round + * @param a + * @return + * @throws Exception + */ + final public Path[] expand(final QueryEngine queryEngine, final int limit, + final int round, final Path[] a) throws Exception { + + final List<Path> tmp = new LinkedList<Path>(); + + // First, copy all existing paths. + for (Path x : a) { + tmp.add(x); + } + + // Then expand each path. + for (Path x : a) { + + if (x.edges.size() < round) { + + continue; + + } + + final Set<Vertex> used = new LinkedHashSet<Vertex>(); + + for (Edge edgeInGraph : E) { + + // Figure out which vertices are already part of this path. + final boolean v1Found = x.contains(edgeInGraph.v1); + final boolean v2Found = x.contains(edgeInGraph.v2); + + if (!v1Found && !v2Found) { + // Edge is not connected to this path. + continue; + } + + if (v1Found && v2Found) { + // Edge is already present in this path. + continue; + } + + final Vertex newVertex = v1Found ? edgeInGraph.v2 : edgeInGraph.v1; + + if(used.contains(newVertex)) { + // Vertex already used to extend this path. + continue; + } + + // add the new vertex to the set of used vertices. + used.add(newVertex); + + // Extend the path to the new vertex. + final Path p = x.addEdge(queryEngine, limit * round, + edgeInGraph); + + // Add to the set of paths for this round. + tmp.add(p); + + } + + } + + return tmp.toArray(new Path[tmp.size()]); + + } + + /** * Return the {@link Vertex} whose {@link IPredicate} is associated with * the given {@link BOp.Annotations#BOP_ID}. * @@ -2209,12 +2310,16 @@ final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); + if (pruned.contains(Pi)) + continue; for (int j = 0; j < a.length; j++) { if (i == j) continue; final Path Pj = a[j]; if (Pj.sample == null) throw new RuntimeException("Not sampled: " + Pj); + if (pruned.contains(Pj)) + continue; final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); if(!isPiSuperSet) { // Can not directly compare these join paths. @@ -2223,23 +2328,35 @@ final long costPi = Pi.cumulativeEstimatedCardinality; final long costPj = Pj.cumulativeEstimatedCardinality; final boolean lte = costPi <= costPj; + List<Integer> prunedByThisPath = null; + if (lte) { + prunedByThisPath = new LinkedList<Integer>(); + if (pruned.add(Pj)) + prunedByThisPath.add(j); + for (int k = 0; k < a.length; k++) { + final Path x = a[k]; + if (x.beginsWith(Pj)) { + if (pruned.add(x)) + prunedByThisPath.add(k); + } + } + } { f .format( "Comparing: P[%2d] with P[%2d] : %10d %2s %10d %s", i, j, costPi, (lte ? "<=" : ">"), - costPj, lte ? " **prune P["+j+"]**" : ""); + costPj, lte ? " *** pruned " + + prunedByThisPath : ""); System.err.println(sb); sb.setLength(0); } - if (lte) { - pruned.add(Pj); - } } // Pj } // Pi - System.err.println("Pruned "+pruned.size()+" of out "+a.length+" paths"); + System.err.println("Pruned " + pruned.size() + " of out " + + a.length + " paths"); final Set<Path> keep = new LinkedHashSet<Path>(); - for(Path p : a) { + for (Path p : a) { if(pruned.contains(p)) continue; keep.add(p); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:17:33 UTC (rev 3942) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 17:28:10 UTC (rev 3943) @@ -641,47 +641,116 @@ // } /* - * The set of one step extensions of those paths. - * - * @todo build this programmatically by finding the set of edges - * branching from the existing paths to a vertex not already part of - * the existing paths and having a total set of vertices which is - * distinct from all other paths already generated in this breadth - * first expansion of the search space. (ROX further constrains the - * new paths to extend the stop vertex of the path from which they - * are derived.) - * - * @todo always label edges by either minimum bopId or minimum - * estimated cardinality (with tie broken by bopId)? When extending - * a path in which more than one edge can reach the target vertex, - * always chose the edge having the source vertex with the minimum - * cardinality? + * t1 */ - final Path[] paths_t1 = new Path[] {// - // t0 - p0, // (2,3) - p1, // (2,4) - p2, // (4,1) - // t1 - p0.addEdge(queryEngine, limit, g.getEdge(v2, v4)), // aka (v3,v4) - p0.addEdge(queryEngine, limit, g.getEdge(v3, v0)), // - p0.addEdge(queryEngine, limit, g.getEdge(v3, v5)), // - // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v1)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p1.addEdge(queryEngine, limit, g.getEdge(v4, v5)), // - // - p2.addEdge(queryEngine, limit, g.getEdge(v1, v5)), // aka (4,5) - p2.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // - p2.addEdge(queryEngine, limit, g.getEdge(v4, v2)), // + +// /* +// * The set of one step extensions of those paths. +// * +// * @todo build this programmatically by finding the set of edges +// * branching from the existing paths to a vertex not already part of +// * the existing paths and having a total set of vertices which is +// * distinct from all other paths already generated in this breadth +// * first expansion of the search space. (ROX further constrains the +// * new paths to extend the stop vertex of the path from which they +// * are derived.) +// * +// * @todo always label edges by either minimum bopId or minimum +// * estimated cardinality (with tie broken by bopId)? When extending +// * a path in which more than one edge can reach the target vertex, +// * always chose the edge having the source vertex with the minimum +// * cardinality? +// */ +// final Path[] paths_t1 = new Path[] {// +// // t0 +// p0, // (2,3) +// p1, // (2,4) +// p2, // (4,1) +// // t1 +// p0.addEdge(queryEngine, limit*2, g.getEdge(v2, v4)), // aka (v3,v4) +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v0)), // +// p0.addEdge(queryEngine, limit*2, g.getEdge(v3, v5)), // +// // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v1)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p1.addEdge(queryEngine, limit*2, g.getEdge(v4, v5)), // +// // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v1, v5)), // aka (4,5) +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v3)), // +// p2.addEdge(queryEngine, limit*2, g.getEdge(v4, v2)), // +// + /* + * *** Paths @ t1 - }; +p[ 1] 0.69, 68931 168831, [ 2 3 0 ] +p[ 2] 1.00, 99900 199800, [ 2 3 4 ] +p[ 3] 1.00, 99900 199800, [ 2 3 5 ] +p[ 5] 1.00, 999 1998, [ 2 4 1 ] +p[ 6] 100.00, 99900 100899, [ 2 4 3 ] +p[ 7] 20.00, 19980 20979, [ 2 4 5 ] +p[ 9] 16.67, 40650 43089, [ 1 4 5 ] +p[10] 1.00, 2439 4878, [ 1 4 2 ] +p[11] 5.00, 12195 14634, [ 1 4 3 ] + */ +// }; + int round = 1; + + final Path[] paths_t1 = g.expand(queryEngine, limit, round, paths_t0); + System.err.println("\n*** Paths @ t1\n" + JoinGraph.showTable(paths_t1)); - g.pruneJoinPaths(paths_t1); + final Path[] paths_t1_pruned = g.pruneJoinPaths(paths_t1); + + System.err.println("\n*** Paths @ t1 (after pruning)\n" + + JoinGraph.showTable(paths_t1_pruned)); + + /* + * t2 + */ + final Path[] paths_t2 = g.expand(queryEngine, limit, round++, paths_t1_pruned); + + System.err.println("\n*** Paths @ t2\n" + + JoinGraph.showTable(paths_t2)); + + final Path[] paths_t2_pruned = g.pruneJoinPaths(paths_t2); + + System.err.println("\n*** Paths @ t2 (after pruning)\n" + + JoinGraph.showTable(paths_t2_pruned)); + + +/* +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (0 5) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (2 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 4) +p[ 4] 0.69, 68931 168831, (2 3) (0 3) (3 5) + +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 3) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (0 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (1 5) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (2 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (3 4) +p[ 5] 1.00, 99900 199800, (2 3) (3 5) (4 5) + +p[ 6] 1.00, 999 1998, (2 4) (1 4) (1 5) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (2 3) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (3 4) +p[ 6] 1.00, 999 1998, (2 4) (1 4) (4 5) + +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (0 3) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (1 4) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (3 5) +p[ 7] 100.00, 99900 100899, (2 4) (3 4) (4 5) + +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (0 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (1 5) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (2 3) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 4) +p[ 8] 20.00, 19980 20979, (2 4) (4 5) (3 5) */ + // final Path selected_t1 = g.getSelectedJoinPath(paths_t1); // // if (selected_t1 != null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-12 17:17:40
|
Revision: 3942 http://bigdata.svn.sourceforge.net/bigdata/?rev=3942&view=rev Author: martyncutcher Date: 2010-11-12 17:17:33 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Remove BlobAllocator and handle deferFree Blobs with addressing sign convention Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Removed Paths: ------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-11-12 16:33:28 UTC (rev 3941) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-11-12 17:17:33 UTC (rev 3942) @@ -1,343 +0,0 @@ -package com.bigdata.rwstore; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.log4j.Logger; - -import com.bigdata.rwstore.RWStore.AllocationStats; -import com.bigdata.util.ChecksumUtility; - -/** - * BlobAllocator. - * - * Manages Blob allocations using a list of {@link FixedAllocator}s. - * - * The main advantage of this is for re-allocation, since the - * {@link FixedAllocator}s can be efficiently re-cycled where a fixed Blob - * creates issues of best fit and fragmentation. - * - * Some simple patterns would cause un-reallocatable storage, consider a Blob - * that always re-allocated to a larger size, or a pattern where several blobs - * got larger together, in these scenarios, smaller allocations would never be - * re-used, whilst the mechanism of component based allocation is easily - * re-used. - * - * @author mgc - */ -public class BlobAllocator implements Allocator { - - private static final transient Logger log = Logger.getLogger(BlobAllocator.class); - - final private int[] m_hdrs = new int[254]; - final private RWStore m_store; - private int m_diskAddr; - private int m_index; - private int m_sortAddr; - private ArrayList m_freeList; - private long m_startAddr; - /** - * There are 256 ints in a BlobAllocator, the first is used to provide the - * sortAddr, and the last for the checksum, leaving 254 BlobHdr addresses - */ - private int m_freeSpots = 254; - - public BlobAllocator(final RWStore store, final int sortAddr) { - m_store = store; - m_sortAddr = sortAddr; - - if (log.isInfoEnabled()) - log.info("New BlobAllocator"); - } - - public void addAddresses(final ArrayList addrs) { - // not relevant for BlobAllocators - } - - public boolean addressInRange(final int addr) { - // not relevant for BlobAllocators - return false; - } - - /** - * Should not be called directly since the PSOutputStream - * manages the blob allocations. - */ - public int alloc(final RWStore store, final int size, final IAllocationContext context) { - throw new UnsupportedOperationException("Blob allocators do not allocate addresses directly"); - } - - public boolean free(final int addr, final int sze) { - if (sze < (m_store.m_maxFixedAlloc-4)) - throw new IllegalArgumentException("Unexpected address size"); - final int alloc = m_store.m_maxFixedAlloc-4; - final int blcks = (alloc - 1 + sze)/alloc; - - int hdr_idx = (-addr) & RWStore.OFFSET_BITS_MASK; - if (hdr_idx > m_hdrs.length) - throw new IllegalArgumentException("free BlobAllocation problem, hdr offset: " + hdr_idx + ", avail:" + m_hdrs.length); - - final int hdr_addr = m_hdrs[hdr_idx]; - - if (hdr_addr == 0) { - return false; - } - - // read in header block, then free each reference - final byte[] hdr = new byte[(blcks+1) * 4 + 4]; // add space for checksum - m_store.getData(hdr_addr, hdr); - - final DataInputStream instr = new DataInputStream( - new ByteArrayInputStream(hdr, 0, hdr.length-4) ); - try { - final int allocs = instr.readInt(); - for (int i = 0; i < allocs; i++) { - final int nxt = instr.readInt(); - m_store.free(nxt, m_store.m_maxFixedAlloc); - } - m_store.free(hdr_addr, hdr.length); - m_hdrs[hdr_idx] = 0; - if (m_freeSpots++ == 0) { - m_freeList.add(this); - } - - return true; - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - } - - public int getFirstFixedForBlob(final int addr, final int sze) { - if (sze < (m_store.m_maxFixedAlloc-4)) - throw new IllegalArgumentException("Unexpected address size: " + sze); - - final int alloc = m_store.m_maxFixedAlloc-4; - final int blcks = (alloc - 1 + sze)/alloc; - - final int hdr_idx = (-addr) & RWStore.OFFSET_BITS_MASK; - if (hdr_idx > m_hdrs.length) - throw new IllegalArgumentException("free BlobAllocation problem, hdr offset: " + hdr_idx + ", avail:" + m_hdrs.length); - - final int hdr_addr = m_hdrs[hdr_idx]; - - if (hdr_addr == 0) { - throw new IllegalArgumentException("getFirstFixedForBlob called with unallocated address"); - } - - // read in header block, then free each reference - final byte[] hdr = new byte[(blcks+1) * 4 + 4]; // add space for checksum - m_store.getData(hdr_addr, hdr); - - final DataInputStream instr = new DataInputStream( - new ByteArrayInputStream(hdr, 0, hdr.length-4) ); - try { - final int nallocs = instr.readInt(); - final int faddr = instr.readInt(); - - return faddr; - - } catch (IOException ioe) { - throw new RuntimeException("Unable to retrieve first fixed address", ioe); - } - } - - public int getBlockSize() { - // Not relevant for Blobs - return 0; - } - - public int getDiskAddr() { - return m_diskAddr; - } - - /** - * returns physical address of blob header if any. - */ - public long getPhysicalAddress(final int offset) { - return m_store.physicalAddress(m_hdrs[offset]); - } - - /** - * Since the Blob Allocator simply manages access to FixedAllocation blocks it does not manage any - * allocations directly. - */ - public int getPhysicalSize(final int offset) { - return 0; - } - - /** - * The startAddr - */ - public long getStartAddr() { - // not relevant for blob - return RWStore.convertAddr(m_sortAddr); - } - - public String getStats(final AtomicLong counter) { - return ""; - } - - /** - * hasFree if there are any non-zero entries in the m_hdr array; - */ - public boolean hasFree() { - return m_freeSpots > 0; - } - - public void preserveSessionData() { - // all data held by fixed allocators - } - - /** - * FIXME: There is a symmetry problem with read/write where one takes a Stream and the other - * return a byte[]. This is problematical with using the checksums. - */ - public void read(final DataInputStream str) { - m_freeSpots = 0; - try { - for (int i = 0; i < 254; i++) { - m_hdrs[i] = str.readInt(); - if (m_hdrs[i] == 0) m_freeSpots++; - } - final int chk = str.readInt(); - // checksum int chk = ChecksumUtility.getCHK().checksum(buf, str.size()); - - } catch (IOException e) { - log.error(e,e); - throw new IllegalStateException(e); - } - } - - public void setDiskAddr(final int addr) { - m_diskAddr = addr; - } - - public void setFreeList(final ArrayList list) { - m_freeList = list; - - if (hasFree()) { - m_freeList.add(this); - } - } - - /** - * setIndex is called in two places, firstly to set the original index and secondly on restore - * from storage to re-establish the order. - * - * When called initially, the m_startAddr will be zero and so must be set by retrieving the - * m_startAddr of the previous block (if any). Now, since a Blob must use fixed allocations we - * are guaranteed that a BlobAllocator will not be the first allocator. To derive a startAddr that - * can safely be used to sort a BlobAllocator against the previous (and subsequent) allocators we - * access the previous allocators address. - */ - public void setIndex(final int index) { - m_index = index; - } - - // @todo why is this a NOP? Javadoc. - public boolean verify(final int addr) { - // TODO Auto-generated method stub - return false; - } - - public byte[] write() { - try { - final byte[] buf = new byte[1024]; // @todo why this const? - final DataOutputStream str = new DataOutputStream(new FixedOutputStream(buf)); - - str.writeInt(m_sortAddr); - - for (int i = 0; i < 254; i++) { // @todo why this const? - str.writeInt(m_hdrs[i]); - } - - // add checksum - final int chk = ChecksumUtility.getCHK().checksum(buf, str.size()); - str.writeInt(chk); - - return buf; - } catch (IOException ioe) { - throw new IllegalStateException(ioe); - } - } - - public int compareTo(final Object o) { - final Allocator alloc = (Allocator) o; - - assert getStartAddr() != alloc.getStartAddr(); - - return (getStartAddr() < alloc.getStartAddr()) ? -1 : 1; - } - - public int register(final int addr) { - assert m_freeSpots > 0; - - m_store.addToCommit(this); - - for (int i = 0; i < 254; i++) { - if (m_hdrs[i] == 0) { - m_hdrs[i] = addr; - - if (--m_freeSpots == 0) { - m_freeList.remove(this); - } - - final int ret = -((m_index << RWStore.OFFSET_BITS) + i); - if (((-ret) & RWStore.OFFSET_BITS_MASK) > m_hdrs.length) - throw new IllegalStateException("Invalid blob offset: " + ((-ret) & RWStore.OFFSET_BITS_MASK)); - - return ret; - } - } - - throw new IllegalStateException("BlobAllocator unable to find free slot"); - } - - public int getRawStartAddr() { - return m_sortAddr; - } - - public int getIndex() { - return m_index; - } - - public int getBlobHdrAddress(final int hdrIndex) { - return m_hdrs[hdrIndex]; - } - - public void appendShortStats(final StringBuilder str, final AllocationStats[] stats) { - if (stats == null) { - str.append("Index: " + m_index + ", address: " + getStartAddr() + ", BLOB\n"); - } else { - stats[stats.length-1].m_filledSlots += 254 - m_freeSpots; - stats[stats.length-1].m_reservedSlots += 254; - } - } - - public boolean isAllocated(final int offset) { - return m_hdrs[offset] != 0; - } - - /** - * This is okay as a NOP. The true allocation is managed by the - * FixedAllocators. - */ - public void detachContext(final IAllocationContext context) { - // NOP - } - - /** - * Since the real allocation is in the FixedAllocators, this should delegate - * to the first address, in which case - */ - public boolean canImmediatelyFree(final int addr, final int size, final IAllocationContext context) { - final int faddr = this.getFirstFixedForBlob(addr, size); - - return m_store.getBlockByAddress(faddr).canImmediatelyFree(faddr, 0, context); - } - -} Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java 2010-11-12 16:33:28 UTC (rev 3941) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java 2010-11-12 17:17:33 UTC (rev 3942) @@ -117,18 +117,18 @@ */ public File getStoreFile(); - /** - * Called by the PSOutputStream to register the header block of a blob. The - * store must return a new address that is used to retrieve the blob header. - * This double indirection is required to be able to manage the blobs, since - * the blob header itself is of variable size and is handled by the standard - * FixedAllocators in the RWStore. - * - * @param addr - * The address of the header block of the blob. - * - * @return The - */ - public int registerBlob(int addr); +// /** +// * Called by the PSOutputStream to register the header block of a blob. The +// * store must return a new address that is used to retrieve the blob header. +// * This double indirection is required to be able to manage the blobs, since +// * the blob header itself is of variable size and is handled by the standard +// * FixedAllocators in the RWStore. +// * +// * @param addr +// * The address of the header block of the blob. +// * +// * @return The +// */ +// public int registerBlob(int addr); } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-12 16:33:28 UTC (rev 3941) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-12 17:17:33 UTC (rev 3942) @@ -320,7 +320,7 @@ private ArrayList<FixedAllocator> m_freeFixed[]; /** lists of free blob allocators. */ - private final ArrayList<BlobAllocator> m_freeBlobs; + // private final ArrayList<BlobAllocator> m_freeBlobs; /** lists of blocks requiring commitment. */ private final ArrayList<Allocator> m_commitList; @@ -520,7 +520,7 @@ m_allocs = new ArrayList<Allocator>(); - m_freeBlobs = new ArrayList<BlobAllocator>(); + // m_freeBlobs = new ArrayList<BlobAllocator>(); try { final RandomAccessFile m_raf = fileMetadata.getRandomAccessFile(); @@ -915,20 +915,17 @@ final int allocSize = strBuf.readInt(); // if Blob < 0 final Allocator allocator; final ArrayList<? extends Allocator> freeList; - if (allocSize > 0) { - int index = 0; - int fixedSize = m_minFixedAlloc; - while (fixedSize < allocSize) - fixedSize = 64 * m_allocSizes[++index]; + assert allocSize > 0; - allocator = new FixedAllocator(this, allocSize);//, m_writeCache); + int index = 0; + int fixedSize = m_minFixedAlloc; + while (fixedSize < allocSize) + fixedSize = 64 * m_allocSizes[++index]; - freeList = m_freeFixed[index]; - } else { - allocator = new BlobAllocator(this, allocSize); - freeList = m_freeBlobs; - } + allocator = new FixedAllocator(this, allocSize);//, m_writeCache); + freeList = m_freeFixed[index]; + allocator.read(strBuf); allocator.setDiskAddr(i); // store bit, not physical // address! @@ -1757,7 +1754,7 @@ m_commitList.clear(); m_allocs.clear(); - m_freeBlobs.clear(); + // m_freeBlobs.clear(); final int numFixed = m_allocSizes.length; for (int i = 0; i < numFixed; i++) { @@ -2904,34 +2901,34 @@ * blob data, the blob allocator retrieves the blob header and reads the * data from that into the passed byte array. */ - public int registerBlob(final int addr) { - m_allocationLock.lock(); - try { - BlobAllocator ba = null; - if (m_freeBlobs.size() > 0) { - ba = (BlobAllocator) m_freeBlobs.get(0); - } - if (ba == null) { - final Allocator lalloc = (Allocator) m_allocs.get(m_allocs.size() - 1); - // previous block start address - final int psa = lalloc.getRawStartAddr(); - assert (psa - 1) > m_nextAllocation; - ba = new BlobAllocator(this, psa - 1); - ba.setFreeList(m_freeBlobs); // will add itself to the free list - ba.setIndex(m_allocs.size()); - m_allocs.add(ba); - } +// public int registerBlob(final int addr) { +// m_allocationLock.lock(); +// try { +// BlobAllocator ba = null; +// if (m_freeBlobs.size() > 0) { +// ba = (BlobAllocator) m_freeBlobs.get(0); +// } +// if (ba == null) { +// final Allocator lalloc = (Allocator) m_allocs.get(m_allocs.size() - 1); +// // previous block start address +// final int psa = lalloc.getRawStartAddr(); +// assert (psa - 1) > m_nextAllocation; +// ba = new BlobAllocator(this, psa - 1); +// ba.setFreeList(m_freeBlobs); // will add itself to the free list +// ba.setIndex(m_allocs.size()); +// m_allocs.add(ba); +// } +// +// if (!m_commitList.contains(ba)) { +// m_commitList.add(ba); +// } +// +// return ba.register(addr); +// } finally { +// m_allocationLock.unlock(); +// } +// } - if (!m_commitList.contains(ba)) { - m_commitList.add(ba); - } - - return ba.register(addr); - } finally { - m_allocationLock.unlock(); - } - } - public void addToCommit(final Allocator allocator) { if (!m_commitList.contains(allocator)) { m_commitList.add(allocator); @@ -3108,11 +3105,11 @@ public void deferFree(final int rwaddr, final int sze) { m_allocationLock.lock(); try { - m_deferredFreeOut.writeInt(rwaddr); - - final Allocator alloc = getBlockByAddress(rwaddr); - if (alloc instanceof BlobAllocator) { + if (sze > this.m_maxFixedAlloc) { + m_deferredFreeOut.writeInt(-rwaddr); m_deferredFreeOut.writeInt(sze); + } else { + m_deferredFreeOut.writeInt(rwaddr); } } catch (IOException e) { throw new RuntimeException("Could not free: rwaddr=" + rwaddr @@ -3207,12 +3204,11 @@ while (nxtAddr != 0) { // while (false && addrs-- > 0) { - final Allocator alloc = getBlock(nxtAddr); - if (alloc instanceof BlobAllocator) { + if (nxtAddr > 0) { // Blob final int bloblen = strBuf.readInt(); assert bloblen > 0; // a Blob address MUST have a size - immediateFree(nxtAddr, bloblen); + immediateFree(-nxtAddr, bloblen); } else { immediateFree(nxtAddr, 0); // size ignored for FreeAllocators } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 16:33:34
|
Revision: 3941 http://bigdata.svn.sourceforge.net/bigdata/?rev=3941&view=rev Author: thompsonbry Date: 2010-11-12 16:33:28 +0000 (Fri, 12 Nov 2010) Log Message: ----------- more work on runtime query optimization Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -31,6 +31,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.Formatter; import java.util.Iterator; import java.util.LinkedHashSet; @@ -525,14 +526,18 @@ * @param outputCount * The #of binding sets generated before the join was cutoff. */ - EdgeSample(final VertexSample sourceVertexSample, final int limit, + EdgeSample(//final VertexSample sourceVertexSample, + final long sourceSampleRangeCount, + final boolean sourceSampleExact, + final int limit, final int inputCount, final int outputCount, final IBindingSet[] sample) { if(sample == null) throw new IllegalArgumentException(); - this.rangeCount = sourceVertexSample.rangeCount; +// this.rangeCount = sourceVertexSample.rangeCount; + this.rangeCount = sourceSampleRangeCount; this.limit = limit; @@ -546,10 +551,11 @@ estimateIsLowerBound = inputCount == 1 && outputCount == limit; - estimateIsUpperBound = !sourceVertexSample.exact +// final boolean sourceSampleExact = sourceVertexSample.exact; + estimateIsUpperBound = !sourceSampleExact && outputCount < limit; - this.exact = sourceVertexSample.exact && outputCount < limit; + this.exact = sourceSampleExact && outputCount < limit; this.sample = sample; } @@ -777,8 +783,9 @@ } // Sample the edge and save the sample on the edge as a side-effect. - this.sample = estimateCardinality(queryEngine, limit, v, vp, sourceSample); - + this.sample = estimateCardinality(queryEngine, limit, v, vp, + v.sample.rangeCount, v.sample.exact, sourceSample); + return sample.estimatedCardinality; } @@ -806,7 +813,9 @@ */ public EdgeSample estimateCardinality(final QueryEngine queryEngine, final int limit, final Vertex vSource, final Vertex vTarget, - IBindingSet[] sourceSample) throws Exception { + final long sourceSampleRangeCount, + final boolean sourceSampleExact, IBindingSet[] sourceSample) + throws Exception { if (limit <= 0) throw new IllegalArgumentException(); @@ -884,13 +893,15 @@ * FIXME I am not convinced that this approach is quite right. I am * also not convinced that this approach will correctly carry the * additional metadata on the EdgeSample (exact, estimate overflow - * and underflow, etc). + * and underflow, etc). [This needs to be the estimated cardinality + * of the path which is being extended by an edge to the target + * vertex.] */ - final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample - : vTarget.sample; +// final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample +// : vTarget.sample; final EdgeSample edgeSample = new EdgeSample( - moreSelectiveVertexSample/* vSource.sample */, limit, + sourceSampleRangeCount, sourceSampleExact, limit, inputCount, outputCount, result .toArray(new IBindingSet[result.size()])); @@ -958,25 +969,42 @@ */ public static class Path { + /** + * An immutable ordered list of the edges in the (aka the sequence of + * joins represented by this path). + */ public final List<Edge> edges; - /* - * These fields carry state used by chainSample. It would be better to - * have that state on a data structure which is purely local to - * chainSample, but perhaps Path is that data structure. + /** + * The sample obtained by the step-wise cutoff evaluation of the ordered + * edges of the path. This sample is generated one edge at a time rather + * than by attempting the cutoff evaluation of the entire join path (the + * latter approach does allow us to limit the amount of work to be done + * to satisfy the cutoff). */ - - public EdgeSample sample = null; + final public EdgeSample sample; -// /** -// * Input to the next round of sampling. -// */ -// private VertexSample inputSample; + /** + * The cumulative estimated cardinality of the path. This is zero for an + * empty path. For a path consisting of a single edge, this is the + * estimated cardinality of that edge. When creating a new path adding + * an edge to an existing path, the cumulative cardinality of the new + * path is the cumulative cardinality of the existing path plus the + * estimated cardinality of the cutoff join of the new edge given the + * input sample of the existing path. + */ + final public long cumulativeEstimatedCardinality; /** * The vertex at which the path from which this path was derived * stopped. This is initialized to the source vertex when entering the * chainSample() method. + * + * @todo This is used by ROX to only grow the path from its end. We + * could of course just look at the last edge in the path. + * However, I think that I prefer to grow a path from any + * branching vertex as long as the path does not duplicate any + * path already generated (including those which were pruned). */ private Vertex stopVertex; @@ -990,7 +1018,8 @@ sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); first = false; } - sb.append(",sample=" + sample + "}"); + sb.append(",cumEstCard=" + cumulativeEstimatedCardinality + + ",sample=" + sample + "}"); return sb.toString(); } @@ -998,7 +1027,9 @@ * Create an empty path. */ public Path() { - this.edges = new LinkedList<Edge>(); + this.edges = Collections.emptyList(); + this.cumulativeEstimatedCardinality = 0; + this.sample = null; } /** @@ -1008,14 +1039,49 @@ * The edge. */ public Path(final Edge e) { + if (e == null) throw new IllegalArgumentException(); - this.edges = new LinkedList<Edge>(); - this.edges.add(e); + + if (e.sample == null) + throw new IllegalArgumentException("Not sampled: "+e); + + this.edges = Collections.singletonList(e); + this.sample = e.sample; + + this.cumulativeEstimatedCardinality = e.sample.estimatedCardinality; + } /** + * Constructor used by {@link #addEdge(QueryEngine, int, Edge)} + * @param edges The edges in the new path. + * @param cumulativeEstimatedCardinality The cumulative estimated cardinality of the new path. + * @param sample The sample from the last + */ + private Path(final List<Edge> edges, + final long cumulativeEstimatedCardinality, + final EdgeSample sample) { + + if (edges == null) + throw new IllegalArgumentException(); + + if (cumulativeEstimatedCardinality < 0) + throw new IllegalArgumentException(); + + if (sample == null) + throw new IllegalArgumentException(); + + this.edges = Collections.unmodifiableList(edges); + + this.cumulativeEstimatedCardinality = cumulativeEstimatedCardinality; + + this.sample = sample; + + } + + /** * Return <code>true</code> iff the {@link Path} contains at least one * {@link Edge} for that {@link Vertex}. * @@ -1038,6 +1104,86 @@ return false; } + + /** + * Return <code>true</code> if this path is an unordered super set of + * the given path. In the case where both paths have the same vertices + * this will also return <code>true</code>. + * + * @param p + * Another path. + * + * @return <code>true</code> if this path is an unordered super set of + * the given path. + */ + public boolean isUnorderedSuperSet(final Path p) { + + if (p == null) + throw new IllegalArgumentException(); + + if (edges.size() < p.edges.size()) { + /* + * Fast rejection. This assumes that each edge after the first + * adds one distinct vertex to the path. That assumption is + * enforced by #addEdge(). + */ + return false; + } + + final Vertex[] v1 = getVertices(); + final Vertex[] v2 = p.getVertices(); + + if (v1.length < v2.length) { + // Proven false since the other set is larger. + return false; + } + + /* + * Scan the vertices of the caller's path. If any of those vertices + * are NOT found in this path then the caller's path can not be a + * subset of this path. + */ + for (int i = 0; i < v2.length; i++) { + + final Vertex tmp = v2[i]; + + boolean found = false; + for (int j = 0; j < v1.length; j++) { + + if (v1[j] == tmp) { + found = true; + break; + } + + } + + if (!found) { + return false; + } + + } + + return true; + + } + + /** + * Return the vertices in this path (in path order). + * + * @return The vertices (in path order). + * + * @todo this could be rewritten without the toArray() using a method + * which visits the vertices of a path in any order. + */ + public Vertex[] getVertices() { + final Set<Vertex> tmp = new LinkedHashSet<Vertex>(); + for (Edge e : edges) { + tmp.add(e.v1); + tmp.add(e.v2); + } + final Vertex[] a = tmp.toArray(new Vertex[tmp.size()]); + return a; + } /** * Add an edge to a path, computing the estimated cardinality of the new @@ -1078,13 +1224,6 @@ // The new vertex, which is not part of this path. final Vertex targetVertex = v1Found ? e.v2 : e.v1; - // Extend the path. - final Path tmp = new Path(); - - tmp.edges.addAll(edges); - - tmp.edges.add(e); - /* * Chain sample the edge. * @@ -1110,14 +1249,32 @@ // 0/* start */, this.sample.sample); final EdgeSample edgeSample = e.estimateCardinality(queryEngine, - limit, sourceVertex, targetVertex, this.sample.sample); + limit, sourceVertex, targetVertex, + this.sample.estimatedCardinality, this.sample.exact, + this.sample.sample); - tmp.sample = edgeSample; + { + + final List<Edge> edges = new ArrayList<Edge>( + this.edges.size() + 1); + + edges.addAll(this.edges); + + edges.add(e); + + final long cumulativeEstimatedCardinality = this.cumulativeEstimatedCardinality + + edgeSample.estimatedCardinality; + + // Extend the path. + final Path tmp = new Path(edges, + cumulativeEstimatedCardinality, edgeSample); + + // tmp.stopVertex = e.getMaximumCardinalityVertex(); + + return tmp; + + } -// tmp.stopVertex = e.getMaximumCardinalityVertex(); - - return tmp; - } // /** @@ -1184,17 +1341,24 @@ static public String showTable(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); - for (Path x : a) { + for(int i=0; i<a.length; i++) { + final Path x = a[i]; if (x.sample == null) { - f.format("%7s, %10s", "N/A", "N/A"); + f.format("p[%2d] %7s, %10s %10s", "N/A", "N/A", "N/A", i); } else { - f.format("% 7.2f, % 10d", x.sample.f, - x.sample.estimatedCardinality); + f.format("p[%2d] % 7.2f, % 10d % 10d", i, x.sample.f, + x.sample.estimatedCardinality, + x.cumulativeEstimatedCardinality); } - sb.append(","); - for (Edge e : x.edges) - sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() - + ")"); + sb.append(", ["); + final Vertex[] vertices = x.getVertices(); + for(Vertex v : vertices) { + f.format("%2d ", v.pred.getId()); + } + sb.append("]"); +// for (Edge e : x.edges) +// sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() +// + ")"); sb.append("\n"); } return sb.toString(); @@ -1903,12 +2067,40 @@ * <p> * It is not clear that this code is comparing all paths which * need to be compared. + * + * @todo I have restated the termination rule as follows. + * <p> + * If there is a path [p] whose total cost is LTE the cost of + * executing just its last edge [e], then the path [p] dominates + * all paths beginning with edge [e]. The dominated paths should + * be pruned. + * <p> + * If there is a path, [p], which is an unordered extension of + * another path, [p1] (the vertices of p are a superset of the + * vertices of p1), and the cost of [p] is LTE the cost of [p1], + * then [p] dominates [p1]. The dominated paths should be pruned. + * <p> + * If there is a path, [p], which has the same vertices as a path + * [p1] and the cost of [p] is LTE the cost of [p1], then [p] + * dominates (or is equivalent to) [p1]. The path [p1] should be + * pruned. + * + * For a given path length [l], if no paths of length [l] remain + * then the minimum cost path of length GT [l] may be executed. + * + * @todo Due to sampling error and the desire to be robust to small + * differences in the expected cost of an operation, we should + * only consider two significant digits when comparing estimates + * of cost. E.g., 990 and 1000 should not be differentiated as + * they are the same within the sampling error. This should be + * used to chose all starting vertices which have the same minimum + * cardinality. */ public Path getSelectedJoinPath(final Path[] a) { final StringBuilder sb = new StringBuilder(); final Formatter f = new Formatter(sb); + Path p = null; for (int i = 0; i < a.length; i++) { - Path p = null; final Path Pi = a[i]; if (Pi.sample == null) throw new RuntimeException("Not sampled: " + Pi); @@ -1918,31 +2110,45 @@ final Path Pj = a[j]; if (Pj.sample == null) throw new RuntimeException("Not sampled: " + Pj); + /* + * FIXME This needs to compare the cost of Pj given path Pi + * against the cost of Pj when executed as a single edge (or + * by any other alternative join path sequence). The choice + * of Pi and Pj is not coherent and the same value of costPj + * is being used for both sides of the equation. + */ final long costPi = Pi.sample.estimatedCardinality; final double sfPi = Pi.sample.f; final long costPj = Pj.sample.estimatedCardinality; final long expectedCombinedCost = costPi + (long) (sfPi * costPj); - final boolean lt = expectedCombinedCost < costPj; + /* + * @todo I think that LTE makes more sense here since having + * the same net cardinality for a given edge after + * performing more steps would appear to be worth while. + */ + final boolean lte = expectedCombinedCost <= costPj; { f .format( - "Comparing: P[% 2d] with P[% 2d] : % 10d + (% 7.2f * % 10d) %2s %10d", - i, j, costPi, sfPi, costPj, (lt ? "<" - : ">="), costPj); + "Comparing: P[%2d] with P[%2d] : (% 10d + (% 7.2f * % 10d) = %10d) %2s %10d", + i, j, costPi, sfPi, costPj, expectedCombinedCost, (lte ? "<=" + : ">"), costPj); System.err.println(sb); sb.setLength(0); } - if (lt) { + if (lte) { p = Pi; - } else { - p = null; +// } else { +// p = null; break; } } // Pj - if (p != null) - return p; +// if (p != null) +// return p; } // Pi + if (p != null) + return p; /* * None of the paths is a winner according to the selection * criteria. @@ -1951,6 +2157,98 @@ } /** + * Prune paths which are dominated by other paths. Start the algorithm + * by passing in all edges which have the minimum cardinality (when + * comparing their expected cardinality after rounding to 2 significant + * digits). + * <p> + * If there is a path [p] whose total cost is LTE the cost of executing + * just its last edge [e], then the path [p] dominates all paths + * beginning with edge [e]. The dominated paths should be pruned. [This + * is a degenerate case of the next rule.] + * <p> + * If there is a path, [p] != [p1], where [p] is an unordered superset + * of [p1] (that is the vertices of p are a superset of the vertices of + * p1, but allowing the special case where the set of vertices are the + * same), and the cumulative cost of [p] is LTE the cumulative cost of + * [p1], then [p] dominates (or is equivalent to) [p1] and p1 should be + * pruned. + * <p> + * If there is a path, [p], which has the same vertices as a path [p1] + * and the cumulative cost of [p] is LTE the cumulative cost of [p1], + * then [p] dominates (or is equivalent to) [p1]. The path [p1] should + * be pruned. [This is a degenerate case of the prior rule.] + * + * @param a + * A set of paths. + * + * @return The set of paths with all dominated paths removed. + * + * @todo This does not give us a stopping condition unless the set of + * paths becomes empty. I think it will tend to search too far for + * a best path, running the risk of increasing inaccuracy + * introduced by propagation of samples. Resampling the vertices + * and increasing the vertex and edge cutoff at each iteration of + * the search could compensate for that. + * + * @todo Cumulative estimated cardinality is an estimate of the work to + * be done. However, the actual cost of a join depends on whether + * we will use nested index subquery or a hash join and the cost + * of that operation on the database. There could be counter + * examples where the cost of the hash join with a range scan + * using the unbound variable is LT the nested index subquery. For + * those cases, we will do the same amount of IO on the hash join + * but there will still be a lower cardinality to the join path + * since we are feeding in fewer solutions to be joined. + */ + public Path[] pruneJoinPaths(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + final Set<Path> pruned = new LinkedHashSet<Path>(); + for (int i = 0; i < a.length; i++) { + final Path Pi = a[i]; + if (Pi.sample == null) + throw new RuntimeException("Not sampled: " + Pi); + for (int j = 0; j < a.length; j++) { + if (i == j) + continue; + final Path Pj = a[j]; + if (Pj.sample == null) + throw new RuntimeException("Not sampled: " + Pj); + final boolean isPiSuperSet = Pi.isUnorderedSuperSet(Pj); + if(!isPiSuperSet) { + // Can not directly compare these join paths. + continue; + } + final long costPi = Pi.cumulativeEstimatedCardinality; + final long costPj = Pj.cumulativeEstimatedCardinality; + final boolean lte = costPi <= costPj; + { + f + .format( + "Comparing: P[%2d] with P[%2d] : %10d %2s %10d %s", + i, j, costPi, (lte ? "<=" : ">"), + costPj, lte ? " **prune P["+j+"]**" : ""); + System.err.println(sb); + sb.setLength(0); + } + if (lte) { + pruned.add(Pj); + } + } // Pj + } // Pi + System.err.println("Pruned "+pruned.size()+" of out "+a.length+" paths"); + final Set<Path> keep = new LinkedHashSet<Path>(); + for(Path p : a) { + if(pruned.contains(p)) + continue; + keep.add(p); + } + final Path[] b = keep.toArray(new Path[keep.size()]); + return b; + } + + /** * Termination condition if no more edges to sample. This * breaks the deadlock by preferring the path whose .... */ @@ -2044,4 +2342,39 @@ } + private static double roundToSignificantFigures(final double num, + final int n) { + if (num == 0) { + return 0; + } + + final double d = Math.ceil(Math.log10(num < 0 ? -num : num)); + final int power = n - (int) d; + + final double magnitude = Math.pow(10, power); + final long shifted = Math.round(num * magnitude); + return shifted / magnitude; + } + + /** + * Places vertices into order by the {@link BOp#getId()} associated + * with their {@link IPredicate}. + */ + private static class BOpIdComparator implements Comparator<Vertex> { + + private static final transient Comparator<Vertex> INSTANCE = new BOpIdComparator(); + + @Override + public int compare(Vertex o1, Vertex o2) { + final int id1 = o1.pred.getId(); + final int id2 = o2.pred.getId(); + if (id1 < id2) + return 1; + if (id2 > id1) + return -1; + return 0; + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -627,7 +627,8 @@ */ final Path p0 = new Path(g.getEdge(v2, v3)); final Path p1 = new Path(g.getEdge(v2, v4)); - final Path[] paths_t0 = new Path[] { p0, p1 }; + final Path p2 = new Path(g.getEdge(v4, v1)); + final Path[] paths_t0 = new Path[] { p0, p1, p2 }; System.err.println("\n*** Paths @ t0\n" + JoinGraph.showTable(paths_t0)); @@ -638,7 +639,7 @@ // System.err.println("Selected path: " + selected_t0); // // } - + /* * The set of one step extensions of those paths. * @@ -648,28 +649,47 @@ * distinct from all other paths already generated in this breadth * first expansion of the search space. (ROX further constrains the * new paths to extend the stop vertex of the path from which they - * are derived.) + * are derived.) + * + * @todo always label edges by either minimum bopId or minimum + * estimated cardinality (with tie broken by bopId)? When extending + * a path in which more than one edge can reach the target vertex, + * always chose the edge having the source vertex with the minimum + * cardinality? */ final Path[] paths_t1 = new Path[] {// + // t0 + p0, // (2,3) + p1, // (2,4) + p2, // (4,1) + // t1 p0.addEdge(queryEngine, limit, g.getEdge(v2, v4)), // aka (v3,v4) p0.addEdge(queryEngine, limit, g.getEdge(v3, v0)), // p0.addEdge(queryEngine, limit, g.getEdge(v3, v5)), // + // p1.addEdge(queryEngine, limit, g.getEdge(v4, v1)), // p1.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // p1.addEdge(queryEngine, limit, g.getEdge(v4, v5)), // + // + p2.addEdge(queryEngine, limit, g.getEdge(v1, v5)), // aka (4,5) + p2.addEdge(queryEngine, limit, g.getEdge(v4, v3)), // + p2.addEdge(queryEngine, limit, g.getEdge(v4, v2)), // + }; System.err.println("\n*** Paths @ t1\n" + JoinGraph.showTable(paths_t1)); - final Path selected_t1 = g.getSelectedJoinPath(paths_t1); - - if (selected_t1 != null) { + g.pruneJoinPaths(paths_t1); + +// final Path selected_t1 = g.getSelectedJoinPath(paths_t1); +// +// if (selected_t1 != null) { +// +// System.err.println("Selected path: " + selected_t1); +// +// } - System.err.println("Selected path: " + selected_t1); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java 2010-11-12 15:48:11 UTC (rev 3940) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java 2010-11-12 16:33:28 UTC (rev 3941) @@ -286,79 +286,79 @@ if (startEdge == null) throw new RuntimeException("No weighted edges."); - /* - * Generate a set of paths by extending that starting vertex in one - * step in each possible direction. For the initial one-step - * extension of the starting vertex we can reuse the estimated - * cardinality of each edge in the join graph, which was already - * computed above. - */ - final Path[] paths; - { +// /* +// * Generate a set of paths by extending that starting vertex in one +// * step in each possible direction. For the initial one-step +// * extension of the starting vertex we can reuse the estimated +// * cardinality of each edge in the join graph, which was already +// * computed above. +// */ +// final Path[] paths; +// { +// +// System.err.println("startEdge="+startEdge); +// +// // The starting vertex is the one with the minimum est. +// // cardinality. +// final Vertex startVertex = startEdge +// .getMinimumCardinalityVertex(); +// +// System.err.println("startVertex=" + startVertex); +// +// // Find the set of edges branching from the starting vertex. +// final List<Edge> branches = g +// .getEdges(startVertex, null/* visited */); +// +// if (branches.isEmpty()) { +// +// // No vertices remain to be explored so we should just execute something. +// throw new RuntimeException("Paths can not be extended"); +// +// } else if (branches.size() == 1) { +// +// final Edge e = branches.get(0); +// +// final Path path = new Path(e); +// +// // The initial sample is just the sample for that edge. +// path.sample = e.sample; +// +// System.err.println("path=" + path); +// +// paths = new Path[] { path }; +// +// } else { +// +// final List<Path> list = new LinkedList<Path>(); +// +// // Create one path for each of those branches. +// for (Edge e : branches) { +// +// if (e.v1 != startVertex && e.v2 != startVertex) +// continue; +// +// // Create a one step path. +// final Path path = new Path(e); +// +// // The initial sample is just the sample for that edge. +// path.sample = e.sample; +// +// System.err +// .println("path[" + list.size() + "]: " + path); +// +// list.add(path); +// +// } +// +// paths = list.toArray(new Path[list.size()]); +// +// } +// +// System.err.println("selectedJoinPath: " +// + g.getSelectedJoinPath(paths)); +// +// } - System.err.println("startEdge="+startEdge); - - // The starting vertex is the one with the minimum est. - // cardinality. - final Vertex startVertex = startEdge - .getMinimumCardinalityVertex(); - - System.err.println("startVertex=" + startVertex); - - // Find the set of edges branching from the starting vertex. - final List<Edge> branches = g - .getEdges(startVertex, null/* visited */); - - if (branches.isEmpty()) { - - // No vertices remain to be explored so we should just execute something. - throw new RuntimeException("Paths can not be extended"); - - } else if (branches.size() == 1) { - - final Edge e = branches.get(0); - - final Path path = new Path(e); - - // The initial sample is just the sample for that edge. - path.sample = e.sample; - - System.err.println("path=" + path); - - paths = new Path[] { path }; - - } else { - - final List<Path> list = new LinkedList<Path>(); - - // Create one path for each of those branches. - for (Edge e : branches) { - - if (e.v1 != startVertex && e.v2 != startVertex) - continue; - - // Create a one step path. - final Path path = new Path(e); - - // The initial sample is just the sample for that edge. - path.sample = e.sample; - - System.err - .println("path[" + list.size() + "]: " + path); - - list.add(path); - - } - - paths = list.toArray(new Path[list.size()]); - - } - - System.err.println("selectedJoinPath: " - + g.getSelectedJoinPath(paths)); - - } - /* * FIXME Now extend the initial paths some more and explore the * termination criteria and how they handle paths which are extended This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-12 15:48:17
|
Revision: 3940 http://bigdata.svn.sourceforge.net/bigdata/?rev=3940&view=rev Author: martyncutcher Date: 2010-11-12 15:48:11 +0000 (Fri, 12 Nov 2010) Log Message: ----------- BlobAllocatorless blob implementation Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java 2010-11-12 00:58:16 UTC (rev 3939) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSOutputStream.java 2010-11-12 15:48:11 UTC (rev 3940) @@ -24,10 +24,13 @@ package com.bigdata.rwstore; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; import java.io.FilterOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; import org.apache.log4j.Logger; @@ -140,7 +143,7 @@ * PSOutputStream impl. */ - private int[] m_blobHeader = null; + private ArrayList<Integer> m_blobHeader = null; private byte[] m_buf = null; private boolean m_isSaved = false; // private long m_headAddr = 0; @@ -176,11 +179,8 @@ m_blobThreshold = maxAlloc-4; // allow for checksum - final int maxHdrSize = RWStore.BLOB_FIXED_ALLOCS * 4; - final int bufSize = m_blobThreshold > maxHdrSize ? m_blobThreshold : maxHdrSize; - - if (m_buf == null || m_buf.length != bufSize) - m_buf = new byte[bufSize]; + if (m_buf == null || m_buf.length != m_blobThreshold) + m_buf = new byte[m_blobThreshold]; reset(); } @@ -218,12 +218,16 @@ if (m_count == m_blobThreshold && !m_writingHdr) { if (m_blobHeader == null) { - m_blobHeader = new int[RWStore.BLOB_FIXED_ALLOCS]; // max 16K - m_blobHdrIdx = 0; + int hdrSize = m_blobThreshold/4; + if (hdrSize > RWStore.BLOB_FIXED_ALLOCS) + hdrSize = RWStore.BLOB_FIXED_ALLOCS; + m_blobHeader = new ArrayList<Integer>(); // only support header + // m_blobHdrIdx = 0; } final int curAddr = (int) m_store.alloc(m_buf, m_count, m_context); - m_blobHeader[m_blobHdrIdx++] = curAddr; + // m_blobHeader[m_blobHdrIdx++] = curAddr; + m_blobHeader.add(curAddr); m_count = 0; } @@ -324,28 +328,40 @@ if (m_blobHeader != null) { try { m_writingHdr = true; // ensure that header CAN be a BLOB - m_blobHeader[m_blobHdrIdx++] = addr; + // m_blobHeader[m_blobHdrIdx++] = addr; + m_blobHeader.add(addr); final int precount = m_count; m_count = 0; try { - writeInt(m_blobHdrIdx); - for (int i = 0; i < m_blobHdrIdx; i++) { - writeInt(m_blobHeader[i]); +// writeInt(m_blobHdrIdx); +// for (int i = 0; i < m_blobHdrIdx; i++) { +// writeInt(m_blobHeader[i]); +// } + int hdrBufSize = 4*(m_blobHeader.size() + 1); + ByteArrayOutputStream hdrbuf = new ByteArrayOutputStream(hdrBufSize); + DataOutputStream hdrout = new DataOutputStream(hdrbuf); + hdrout.writeInt(m_blobHeader.size()); + for (int i = 0; i < m_blobHeader.size(); i++) { + hdrout.writeInt(m_blobHeader.get(i)); } - addr = (int) m_store.alloc(m_buf, m_count, m_context); + hdrout.flush(); + + byte[] outbuf = hdrbuf.toByteArray(); + addr = (int) m_store.alloc(outbuf, hdrBufSize, m_context); - if (m_blobHdrIdx != ((m_blobThreshold - 1 + m_bytesWritten - m_count) / m_blobThreshold)) { - throw new IllegalStateException( - "PSOutputStream.save at : " + addr - + ", bytes: " + m_bytesWritten - + ", blocks: " + m_blobHdrIdx - + ", last alloc: " + precount); - } +// if (m_blobHdrIdx != ((m_blobThreshold - 1 + m_bytesWritten - m_count) / m_blobThreshold)) { +// throw new IllegalStateException( +// "PSOutputStream.save at : " + addr +// + ", bytes: " + m_bytesWritten +// + ", blocks: " + m_blobHdrIdx +// + ", last alloc: " + precount); +// } if (log.isDebugEnabled()) log.debug("Writing BlobHdrIdx with " + m_blobHdrIdx + " allocations"); - addr = m_store.registerBlob(addr); // returns handle + // DO NOT USE BLOB ALLOCATOR + // addr = m_store.registerBlob(addr); // returns handle } catch (IOException e) { e.printStackTrace(); } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-12 00:58:16 UTC (rev 3939) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-12 15:48:11 UTC (rev 3940) @@ -281,6 +281,8 @@ static final int ALLOCATION_SCALEUP = 16; // multiplier to convert allocations based on minimum allocation of 32k static private final int META_ALLOCATION = 8; // 8 * 32K is size of meta Allocation + // Maximum fixed allocs in a BLOB, but do restrict to size that will fit within a single fixed allocation + // Ignored static final int BLOB_FIXED_ALLOCS = 2048; // private ICommitCallback m_commitCallback; // @@ -578,11 +580,14 @@ } final int maxBlockLessChk = m_maxFixedAlloc-4; - // set this at blob header references max 4096 fixed allocs - // meaning that header may itself be a blob if max fixed is - // less than 16K - m_maxBlobAllocSize = (BLOB_FIXED_ALLOCS * maxBlockLessChk); + // ensure that BLOB header cannot itself be a BLOB +// int blobFixedAlocs = maxBlockLessChk/4; +// if (blobFixedAlocs > RWStore.BLOB_FIXED_ALLOCS) +// blobFixedAlocs = RWStore.BLOB_FIXED_ALLOCS; +// m_maxBlobAllocSize = ((maxBlockLessChk/4) * maxBlockLessChk); + m_maxBlobAllocSize = Integer.MAX_VALUE; + assert m_maxFixedAlloc > 0; m_deferredFreeOut = PSOutputStream.getNew(this, m_maxFixedAlloc, null); @@ -754,8 +759,9 @@ final DataInputStream strBuf = new DataInputStream(new ByteArrayInputStream(buf)); + // Can handle minor store version incompatibility final int storeVersion = strBuf.readInt(); - if (storeVersion != cVersion) { + if ((storeVersion & 0xFF00) != (cVersion & 0xFF00)) { throw new IllegalStateException("Incompatible RWStore header version"); } m_lastDeferredReleaseTime = strBuf.readLong(); @@ -1105,13 +1111,14 @@ } } - final Allocator na = getBlock((int) addr); - if (! (na instanceof BlobAllocator)) { - throw new IllegalStateException("Invalid Allocator index"); - } - final BlobAllocator ba = (BlobAllocator) na; - final int hdraddr = ba.getBlobHdrAddress(getOffset((int) addr)); - getData(hdraddr, hdrbuf); // read in header - could itself be a blob! +// final Allocator na = getBlock((int) addr); +// if (! (na instanceof BlobAllocator)) { +// throw new IllegalStateException("Invalid Allocator index"); +// } +// final BlobAllocator ba = (BlobAllocator) na; +// final int hdraddr = ba.getBlobHdrAddress(getOffset((int) addr)); +// getData(hdraddr, hdrbuf); // read in header - could itself be a blob! + getData(addr, hdrbuf); // fine but MUST NOT allow header to be a BLOB! final DataInputStream hdrstr = new DataInputStream(new ByteArrayInputStream(hdrbuf)); final int rhdrs = hdrstr.readInt(); if (rhdrs != nblocks) { @@ -1369,23 +1376,27 @@ } m_allocationLock.lock(); try { - final Allocator alloc = getBlockByAddress(addr); - /* - * There are a few conditions here. If the context owns the - * allocator and the allocation was made by this context then - * it can be freed immediately. - * The problem comes when the context is null and the allocator - * is NOT owned, BUT there are active AllocationContexts, in this - * situation, the free must ALWAYS be deferred. - */ - final boolean alwaysDefer = context == null && m_contexts.size() > 0; - if (alwaysDefer) - if (log.isDebugEnabled()) - log.debug("Should defer " + physicalAddress(addr)); - if (/*alwaysDefer ||*/ !alloc.canImmediatelyFree(addr, sze, context)) { - deferFree(addr, sze); + if (sze > m_maxFixedAlloc) { + freeBlob(addr, sze, context); } else { - immediateFree(addr, sze); + final Allocator alloc = getBlockByAddress(addr); + /* + * There are a few conditions here. If the context owns the + * allocator and the allocation was made by this context then + * it can be freed immediately. + * The problem comes when the context is null and the allocator + * is NOT owned, BUT there are active AllocationContexts, in this + * situation, the free must ALWAYS be deferred. + */ + final boolean alwaysDefer = context == null && m_contexts.size() > 0; + if (alwaysDefer) + if (log.isDebugEnabled()) + log.debug("Should defer " + physicalAddress(addr)); + if (/*alwaysDefer ||*/ !alloc.canImmediatelyFree(addr, sze, context)) { + deferFree(addr, sze); + } else { + immediateFree(addr, sze); + } } } finally { m_allocationLock.unlock(); @@ -1393,7 +1404,34 @@ } -// private long immediateFreeCount = 0; + private boolean freeBlob(final int hdr_addr, final int sze, final IAllocationContext context) { + if (sze < (m_maxFixedAlloc-4)) + throw new IllegalArgumentException("Unexpected address size"); + + final int alloc = m_maxFixedAlloc-4; + final int blcks = (alloc - 1 + sze)/alloc; + + // read in header block, then free each reference + final byte[] hdr = new byte[(blcks+1) * 4 + 4]; // add space for checksum + getData(hdr_addr, hdr); + + final DataInputStream instr = new DataInputStream( + new ByteArrayInputStream(hdr, 0, hdr.length-4) ); + try { + final int allocs = instr.readInt(); + for (int i = 0; i < allocs; i++) { + final int nxt = instr.readInt(); + free(nxt, m_maxFixedAlloc); + } + free(hdr_addr, hdr.length); + + return true; + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + // private long immediateFreeCount = 0; private void immediateFree(final int addr, final int sze) { switch (addr) { @@ -1573,9 +1611,9 @@ if (size > (m_maxFixedAlloc - 4)) { - if (size > (BLOB_FIXED_ALLOCS * (m_maxFixedAlloc - 4))) + if (size > getMaxBlobSize()) throw new IllegalArgumentException( - "Allocation request beyond maximum BLOB"); + "Allocation request beyond maximum BLOB of " + getMaxBlobSize()); if (log.isTraceEnabled()) log.trace("BLOB ALLOC: " + size); @@ -1585,8 +1623,8 @@ try { int i = 0; - final int lsize = size - 512; - while (i < lsize) { + final int blocks = size/512; + for (int b = 0; b < blocks; b++) { psout.write(buf, i, 512); // add 512 bytes at a time i += 512; } @@ -1984,8 +2022,14 @@ * Use BCD-style numbering so * 0x0200 == 2.00 * 0x0320 == 3.20 + * + * The minor byte values should maintain binary compatibility, with + * major bytes + * Versions + * 0x0300 - extended header to include reserved ints + * 0x0400 - removed explicit BlobAllocators */ - final private int cVersion = 0x0300; + final private int cVersion = 0x0400; /** * MetaBits Header @@ -2419,12 +2463,10 @@ * number of filled slots | store used */ public void showAllocators(final StringBuilder str) { - final AllocationStats[] stats = new AllocationStats[m_allocSizes.length+1]; - for (int i = 0; i < stats.length-1; i++) { + final AllocationStats[] stats = new AllocationStats[m_allocSizes.length]; + for (int i = 0; i < stats.length; i++) { stats[i] = new AllocationStats(m_allocSizes[i]*64); } - // for BLOBs - stats[stats.length-1] = new AllocationStats(0); final Iterator<Allocator> allocs = m_allocs.iterator(); while (allocs.hasNext()) { @@ -2456,7 +2498,7 @@ tfilled += filled; tfilledSlots += stats[i].m_filledSlots; } - for (int i = 0; i < stats.length-1; i++) { + for (int i = 0; i < stats.length; i++) { final long reserved = stats[i].m_reservedSlots * stats[i].m_blockSize; final long filled = stats[i].m_filledSlots * stats[i].m_blockSize; str.append(padRight("" + stats[i].m_blockSize, 10)); @@ -2466,9 +2508,6 @@ str.append(padLeft("" + (treserved==0?0:(reserved * 100 / treserved)) + "%", 8)); str.append("\n"); } - // lastly some BLOB stats - only interested in used/reserved slots - str.append(padRight("BLOB", 10)); - str.append(padLeft("" + stats[stats.length-1].m_filledSlots, 12) + padLeft("" + stats[stats.length-1].m_reservedSlots, 12)); str.append("\n"); str.append(padRight("Totals", 10)); @@ -3405,6 +3444,11 @@ int ret = m_minFixedAlloc; while (data_len > ret) { i++; + // If we write directly to the writeCache then the data_len + // may be larger than largest slot + if (i == m_allocSizes.length) + return data_len; + ret = 64 * m_allocSizes[i]; } @@ -4059,7 +4103,7 @@ } public int getMaxBlobSize() { - return this.m_maxBlobAllocSize-4; + return m_maxBlobAllocSize-4; // allow for checksum } } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2010-11-12 00:58:16 UTC (rev 3939) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/rwstore/TestRWJournal.java 2010-11-12 15:48:11 UTC (rev 3940) @@ -126,6 +126,15 @@ properties.setProperty(Options.WRITE_CACHE_ENABLED, "" + writeCacheEnabled); + // number of bits in FixedAllocators + properties.setProperty(com.bigdata.rwstore.RWStore.Options.DEFAULT_FREE_BITS_THRESHOLD, "1000"); + + // Size of META_BITS_BLOCKS + properties.setProperty(com.bigdata.rwstore.RWStore.Options.DEFAULT_META_BITS_SIZE, "9"); + + // properties.setProperty(RWStore.Options.ALLOCATION_SIZES, "1,2,3,5,8,12,16,32"); // 2K max + properties.setProperty(RWStore.Options.ALLOCATION_SIZES, "1,2,3,5,8,12,16"); // 1K + return properties; } @@ -238,6 +247,8 @@ } public Properties getProperties() { + + System.out.println("TestRWJournal:getProperties"); final Properties properties = super.getProperties(); @@ -254,10 +265,26 @@ properties.setProperty(Options.WRITE_CACHE_ENABLED, "" + writeCacheEnabled); + // number of bits in FixedAllocators + properties.setProperty(RWStore.Options.FREE_BITS_THRESHOLD, "50"); + + // Size of META_BITS_BLOCKS + properties.setProperty(RWStore.Options.META_BITS_SIZE, "9"); + + // properties.setProperty(RWStore.Options.ALLOCATION_SIZES, "1,2,3,5,8,12,16,32,48,64,128"); // 8K - max blob = 2K * 8K = 16M + // properties.setProperty(RWStore.Options.ALLOCATION_SIZES, "1,2,3,5,8,12,16,32,48,64,128"); // 2K max + properties.setProperty(RWStore.Options.ALLOCATION_SIZES, "1,2,3,5,8,12,16"); // 2K max + return properties; } + protected IRawStore getStore() { + + return new Journal(getProperties()); + + } + // /** // * Test that allocate() pre-extends the store when a record is allocated // * which would overflow the current user extent. @@ -758,10 +785,10 @@ int endBlob = 1024 * 1256; int[] faddrs = allocBatchBuffer(rw, 100, startBlob, endBlob); - System.out.println("Final allocation: " + rw.physicalAddress(faddrs[99]) - + ", allocations: " + (rw.getTotalAllocations() - numAllocs) - + ", allocated bytes: " + (rw.getTotalAllocationsSize() - startAllocations)); - } finally { + final StringBuilder str = new StringBuilder(); + rw.showAllocators(str); + System.out.println(str); + } finally { store.destroy(); @@ -776,17 +803,17 @@ final Journal store = (Journal) getStore(); try { + final RWStrategy bs = (RWStrategy) store + .getBufferStrategy(); - byte[] buf = new byte[1024 * 2048]; // 2Mb buffer of random data + final RWStore rw = bs.getRWStore(); + + + byte[] buf = new byte[2 * 1024 * 1024]; // 5Mb buffer of random data r.nextBytes(buf); ByteBuffer bb = ByteBuffer.wrap(buf); - RWStrategy bs = (RWStrategy) store - .getBufferStrategy(); - - RWStore rw = bs.getRWStore(); - long faddr = bs.write(bb); // rw.alloc(buf, buf.length); log.info("Blob Allocation at " + rw.convertFromAddr(faddr)); @@ -842,6 +869,12 @@ assertEquals(bb, rdBuf); + // now delete the memory + bs.delete(faddr); // immediateFree! + + faddr = bs.write(bb); // rw.alloc(buf, buf.length); + bb.position(0); + System.out.println("Now commit to disk"); store.commit(); @@ -862,12 +895,12 @@ rw.checkDeferredFrees(true, store); try { - rdBuf = bs.read(faddr); // should fail with illegal state + rdBuf = bs.read(faddr); // should fail with illegal argument throw new RuntimeException("Fail"); } catch (Exception ise) { - assertTrue("Expected IllegalStateException reading from " + (faddr >> 32) + " instead got: " + ise, ise instanceof IllegalStateException); + assertTrue("Expected IllegalArgumentException reading from " + (faddr >> 32) + " instead got: " + ise, ise instanceof IllegalArgumentException); } - + } finally { store.destroy(); @@ -1038,9 +1071,9 @@ // allocBatch(store, 1, 32, 650, 100000000); allocBatch(store, 1, 32, 650, 50000); store.commit(); - System.out.println("Final allocations: " + rw.getTotalAllocations() - + ", allocated bytes: " + rw.getTotalAllocationsSize() + ", file length: " - + rw.getStoreFile().length()); + final StringBuilder str = new StringBuilder(); + rw.showAllocators(str); + System.out.println(str); store.close(); System.out.println("Re-open Journal"); store = (Journal) getStore(); @@ -1065,7 +1098,7 @@ long realAddr = 0; try { // allocBatch(store, 1, 32, 650, 100000000); - pureAllocBatch(store, 1, 32, 3075, 300000); // cover wider range of blocks + pureAllocBatch(store, 1, 32, rw.m_maxFixedAlloc-4, 300000); // cover wider range of blocks store.commit(); System.out.println("Final allocations: " + rw.getTotalAllocations() + ", allocated bytes: " + rw.getTotalAllocationsSize() + ", file length: " @@ -1106,7 +1139,7 @@ .getBufferStrategy(); RWStore rw = bs.getRWStore(); - int freeAddr[] = new int[2048]; + int freeAddr[] = new int[512]; int freeCurs = 0; for (int i = 0; i < grp; i++) { int alloc = min + r.nextInt(sze-min); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 02:08:14
|
Revision: 3939 http://bigdata.svn.sourceforge.net/bigdata/?rev=3939&view=rev Author: thompsonbry Date: 2010-11-12 00:58:16 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Missed an update to BOpUtility which broke the build. Added some comments on JoinGraph. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-11-12 00:18:05 UTC (rev 3938) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-11-12 00:58:16 UTC (rev 3939) @@ -912,4 +912,39 @@ } + /** + * Inject (or replace) an {@link Integer} "rowId" column. This does not have + * a side-effect on the source {@link IBindingSet}s. + * + * @param var + * The name of the column. + * @param start + * The starting value for the identifier. + * @param in + * The source {@link IBindingSet}s. + * + * @return The modified {@link IBindingSet}s. + */ + public static IBindingSet[] injectRowIdColumn(final IVariable var, + final int start, final IBindingSet[] in) { + + if (in == null) + throw new IllegalArgumentException(); + + final IBindingSet[] out = new IBindingSet[in.length]; + + for (int i = 0; i < out.length; i++) { + + final IBindingSet bset = in[i].clone(); + + bset.set(var, new Constant<Integer>(Integer.valueOf(start + i))); + + out[i] = bset; + + } + + return out; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 00:18:05 UTC (rev 3938) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 00:58:16 UTC (rev 3939) @@ -880,6 +880,11 @@ * will be falsely high by whatever ratio the chosen vertex * cardinality exceeds the one having the minimum cardinality which * is connected via an edge to the target vertex). + * + * FIXME I am not convinced that this approach is quite right. I am + * also not convinced that this approach will correctly carry the + * additional metadata on the EdgeSample (exact, estimate overflow + * and underflow, etc). */ final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample : vTarget.sample; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-12 00:18:16
|
Revision: 3938 http://bigdata.svn.sourceforge.net/bigdata/?rev=3938&view=rev Author: thompsonbry Date: 2010-11-12 00:18:05 +0000 (Fri, 12 Nov 2010) Log Message: ----------- Checkpoint of some work on runtime query optimization. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestJoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/AdaptiveQueryOptimization.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-11 22:00:41 UTC (rev 3937) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/SampleIndex.java 2010-11-12 00:18:05 UTC (rev 3938) @@ -68,7 +68,8 @@ * sampling, including: uniform distribution, randomly distribution, tuple * at a time versus clustered (sampling with leaves), adaptive sampling * until the sample reflects some statistical property of the underlying - * population, etc. + * population, etc. Support for different kinds of sampling could be added + * using appropriate annotations. */ public class SampleIndex<E> extends AbstractAccessPathOp<E> { @@ -308,6 +309,10 @@ * Taking a clustered sample really requires knowing where the * leaf boundaries are in the index, e.g., using * {@link ILeafCursor}. + * <p> + * Taking all tuples from a few leaves in each sample might + * produce a faster estimation of the correlation when sampling + * join paths. * * @todo Rather than evenly spaced samples, we should be taking a random * sample. This could be achieved using a random initial offset Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-11 22:00:41 UTC (rev 3937) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-11-12 00:18:05 UTC (rev 3938) @@ -28,9 +28,12 @@ package com.bigdata.bop.controller; import java.io.Serializable; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Formatter; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -46,7 +49,7 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.Constant; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; @@ -66,6 +69,7 @@ import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.relation.rule.Rule; import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.IChunkedIterator; /** * A join graph with annotations for estimated cardinality and other details in @@ -81,7 +85,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * @todo Some edges can be eliminated by transitivity. For example, given + * TODO Some edges can be eliminated by transitivity. For example, given * * <pre> * query: @@ -103,7 +107,7 @@ * It is necessary to execute e1 and either e2 or e3, but not both e2 and e3. * </pre> * - * @todo In order to combine pipelining with runtime query optimization we need + * TODO In order to combine pipelining with runtime query optimization we need * to sample based on the first chunk(s) delivered by the pipeline. If * necessary, we can buffer multiple chunks for semi-selective queries. * However, for unselective queries we would accept as many buffers worth @@ -127,11 +131,11 @@ String VERTICES = JoinGraph.class.getName() + ".vertices"; /** - * The initial sample size (default {@value #DEFAULT_SAMPLE_SIZE}). + * The initial limit for cutoff sampling (default {@value #DEFAULT_LIMIT}). */ - String SAMPLE_SIZE = JoinGraph.class.getName() + ".sampleSize"; + String LIMIT = JoinGraph.class.getName() + ".limit"; - int DEFAULT_SAMPLE_SIZE = 100; + int DEFAULT_LIMIT = 100; } /** @@ -144,11 +148,11 @@ } /** - * @see Annotations#SAMPLE_SIZE + * @see Annotations#LIMIT */ - public int getSampleSize() { + public int getLimit() { - return getProperty(Annotations.SAMPLE_SIZE, Annotations.DEFAULT_SAMPLE_SIZE); + return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } @@ -160,17 +164,17 @@ /** * - * @todo We can derive the vertices from the join operators or the join + * TODO We can derive the vertices from the join operators or the join * operators from the vertices. However, if a specific kind of join * operator is required then the question is whether we have better * information to make that choice when the join graph is evaluated or * before it is constructed. * - * @todo How we will handle optional joins? Presumably they are outside of + * TODO How we will handle optional joins? Presumably they are outside of * the code join graph as part of the tail attached to that join * graph. * - * @todo How can join constraints be moved around? Just attach them where + * TODO How can join constraints be moved around? Just attach them where * ever a variable becomes bound? And when do we filter out variables * which are not required downstream? Once we decide on a join path * and execute it fully (rather than sampling that join path). @@ -196,10 +200,90 @@ } + /** + * Used to assign row identifiers. + */ + static private final IVariable<Integer> ROWID = Var.var("__rowid"); + + /** + * A sample of a {@link Vertex} (an access path). + */ + public static class VertexSample { + + /** + * Fast range count. This will be the same for each sample taken + * (assuming a read historical view or even a time scale of query which + * is significantly faster than update). + */ + public final long rangeCount; + + /** + * The limit used to produce the {@link #sample}. + */ + public final int limit; + + /** + * When <code>true</code>, the result is not a sample but the + * materialized access path. + * + * @todo When <code>true</code>, we could run the join against the + * sample rather than the disk. This would require wrapping the + * sample as an access path. Since all exact samples will be + * pretty small, this is not likely to have any great performance + * benefit. + */ + public final boolean exact; + + /** + * Sample. + */ + final Object[] sample; + + /** + * + * @param rangeCount + * @param limit + * @param exact + * @param sample + */ + public VertexSample(final long rangeCount, final int limit, final boolean exact, final Object[] sample) { + + if (rangeCount < 0L) + throw new IllegalArgumentException(); + + if (limit <= 0) + throw new IllegalArgumentException(); + + if (sample == null) + throw new IllegalArgumentException(); + + this.rangeCount = rangeCount; + + this.limit = limit; + + this.exact = exact; + + this.sample = sample; + + } + + public String toString() { + return "VertexSample{rangeCount=" + rangeCount + ",limit=" + limit + + ",exact=" + exact + ", sampleSize=" + sample.length + "}"; + } + + } + /** * A vertex of the join graph is an annotated relation (this corresponds to * an {@link IPredicate} with additional annotations to support the adaptive * query optimization algorithm). + * <p> + * The unique identifier for a {@link Vertex} (within a given join graph) is + * the {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. + * {@link #hashCode()} is defined in terms of this unique identifier so we + * can readily detect when a {@link Set} already contains a given + * {@link Vertex}. */ public static class Vertex implements Serializable { @@ -208,23 +292,13 @@ */ private static final long serialVersionUID = 1L; - final IPredicate<?> pred; + public final IPredicate<?> pred; /** - * The limit used to produce the {@link #sample}. + * The most recently taken sample of the {@link Vertex}. */ - int limit; + VertexSample sample = null; - /** - * Fast range count and <code>null</code> until initialized. - */ - Long rangeCount; - - /** - * Sample (when not-null). - */ - Object[] sample; - Vertex(final IPredicate<?> pred) { if (pred == null) @@ -236,45 +310,270 @@ public String toString() { - return "\nVertex{pred=" + pred + ",rangeCount=" + rangeCount - + ",sampleSize=" + (sample == null ? "N/A" : sample.length) - + "}"; + return "Vertex{pred=" + pred + ",sample=" + sample + "}"; } - public void sample(final BOpContextBase context,final int limit) { + /** + * Equals is based on a reference test. + */ + public boolean equals(Object o) { + return this == o; + } + /** + * The hash code is just the {@link BOp.Annotations#BOP_ID} of the + * associated {@link IPredicate}. + */ + public int hashCode() { + return pred.getId(); + } + + /** + * Take a sample of the vertex. If the sample is already exact, then + * this is a NOP. + * + * @param context + * @param limit + * The sample cutoff. + */ + public void sample(final BOpContextBase context, final int limit) { + + if (context == null) + throw new IllegalArgumentException(); + + if (limit <= 0) + throw new IllegalArgumentException(); + + final VertexSample oldSample = this.sample; + + if(oldSample != null && oldSample.exact) { + + /* + * The old sample is already the full materialization of the + * vertex. + */ + + return; + + } + final IRelation r = context.getRelation(pred); final IAccessPath ap = context.getAccessPath(r, pred); - if (rangeCount == null) { + final long rangeCount = oldSample == null ? ap + .rangeCount(false/* exact */) : oldSample.rangeCount; - rangeCount = ap.rangeCount(false/* exact */); + if (rangeCount <= limit) { - } + /* + * Materialize the access path. + * + * @todo This could be more efficient if we raised it onto the + * AP or if we overrode CHUNK_CAPACITY and the fully buffered + * iterator threshold such that everything was materialized as a + * single chunk. + */ + + final List<Object> tmp = new ArrayList<Object>((int) rangeCount); - if (sample == null) { // @todo new sample each time? + final IChunkedIterator<Object> itr = ap.iterator(); - final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // - NV.asMap(// - new NV(SampleIndex.Annotations.PREDICATE, pred),// - new NV(SampleIndex.Annotations.LIMIT, limit))); + try { + + while (itr.hasNext()) { + + tmp.add(itr.next()); + + } + + } finally { + + itr.close(); + } + + sample = new VertexSample(rangeCount, limit, true/* exact */, + tmp.toArray(new Object[0])); - sample = sampleOp.eval(context); - - this.limit = limit; - + return; + } + + /* + * Materialize a random sample from the access path. + */ + final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // + NV.asMap(// + new NV(SampleIndex.Annotations.PREDICATE, pred),// + new NV(SampleIndex.Annotations.LIMIT, limit))); + + sample = new VertexSample(rangeCount, limit, false/*exact*/, sampleOp + .eval(context)); + } } /** + * A sample of an {@link Edge} (a join). + */ + public static class EdgeSample { + + /** + * The fast range count (aka cardinality) for the source vertex of the + * edge (whichever vertex has the lower cardinality). + */ + public final long rangeCount; + + /** + * The limit used to sample the edge (this is the limit on the #of + * solutions generated by the cutoff join used when this sample was + * taken). + */ + public final int limit; + + /** + * The #of binding sets out of the source sample vertex sample which + * were consumed. + */ + public final int inputCount; + + /** + * The #of binding sets generated before the join was cutoff. + * <p> + * Note: If the outputCount is zero then this is a good indicator that + * there is an error in the query such that the join will not select + * anything. This is not 100%, merely indicative. + */ + public final int outputCount; + + /** + * The ratio of the #of input samples consumed to the #of output samples + * generated (the join hit ratio or scale factor). + */ + public final double f; + + /** + * The estimated cardinality of the join. + */ + public final long estimatedCardinality; + + /** + * Flag is set when the estimate is likely to be a lower bound for the + * cardinality of the edge. + * <p> + * If the {@link #inputCount} is ONE (1) and the {@link #outputCount} is + * the {@link #limit} then the {@link #estimatedCardinality} is a lower + * bound as more than {@link #outputCount} solutions could have been + * produced by the join against a single input solution. + */ + public final boolean estimateIsLowerBound; + + /** + * Flag indicates that the {@link #estimatedCardinality} underflowed. + * <p> + * Note: When the source vertex sample was not exact, then it is + * possible for the cardinality estimate to underflow. When, in + * addition, {@link #outputCount} is LT {@link #limit}, then feeding the + * sample of source tuples in is not sufficient to generated the desired + * #of output tuples. In this case, {@link #f join hit ratio} will be + * low. It may even be that zero output tuples were generated, in which + * case the join hit ratio will appear to be zero. However, the join hit + * ratio actually underflowed and an apparent join hit ratio of zero + * does not imply that the join will be empty unless the source vertex + * sample is actually the fully materialized access path - see + * {@link VertexSample#exact} and {@link #exact}. + */ + public final boolean estimateIsUpperBound; + + /** + * <code>true</code> if the sample is the exact solution for the join + * path. + * <p> + * Note: If the entire source vertex is being feed into the sample, + * {@link VertexSample#exact} flags this condition, and outputCount is + * also LT the limit, then the edge sample is the actual result of the + * join. That is, feeding all source tuples into the join gives fewer + * than the desired number of output tuples. + * + * @todo This field marks this condition and should be used to avoid + * needless recomputation of a join whose exact solution is + * already known. + */ + public final boolean exact; + + /** + * The sample of the solutions for the join path. + */ + private final IBindingSet[] sample; + + /** + * Create an object which encapsulates a sample of an edge. + * + * @param limit + * The limit used to sample the edge (this is the limit on + * the #of solutions generated by the cutoff join used when + * this sample was taken). + * @param sourceVertexSample + * The sample for source vertex of the edge (whichever vertex + * has the lower cardinality). + * @param inputCount + * The #of binding sets out of the source sample vertex + * sample which were consumed. + * @param outputCount + * The #of binding sets generated before the join was cutoff. + */ + EdgeSample(final VertexSample sourceVertexSample, final int limit, + final int inputCount, final int outputCount, + final IBindingSet[] sample) { + + if(sample == null) + throw new IllegalArgumentException(); + + this.rangeCount = sourceVertexSample.rangeCount; + + this.limit = limit; + + this.inputCount = inputCount; + + this.outputCount = outputCount; + + f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); + + estimatedCardinality = (long) (rangeCount * f); + + estimateIsLowerBound = inputCount == 1 && outputCount == limit; + + estimateIsUpperBound = !sourceVertexSample.exact + && outputCount < limit; + + this.exact = sourceVertexSample.exact && outputCount < limit; + + this.sample = sample; + } + + public String toString() { + return getClass().getName() + "{inputRangeCount=" + rangeCount + + ", limit=" + limit + ", inputCount=" + inputCount + + ", outputCount=" + outputCount + ", f=" + f + + ", estimatedCardinality=" + estimatedCardinality + + ", estimateIsLowerBound=" + estimateIsLowerBound + + ", estimateIsUpperBound=" + estimateIsUpperBound + + ", sampleIsExactSolution=" + exact + + "}"; + } + + }; + + /** * An edge of the join graph is an annotated join operator. The edges of the * join graph are undirected. Edges exist when the vertices share at least * one variable. + * <p> + * {@link #hashCode()} is defined in terms of the unordered hash codes of + * the individual vertices. */ public static class Edge implements Serializable { @@ -286,97 +585,18 @@ /** * The vertices connected by that edge. */ - final Vertex v1, v2; + public final Vertex v1, v2; /** * The set of shared variables. */ - final Set<IVariable<?>> shared; + public final Set<IVariable<?>> shared; - class EdgeSample { - - /** - * The fast range count (aka cardinality) for the source vertex of - * the edge (whichever vertex has the lower cardinality). - */ - final long inputRangeCount; - /** - * The limit used to sample the edge (this is the limit on the #of - * solutions generated by the cutoff join used when this sample was - * taken). - */ - final int limit; - /** - * The #of binding sets out of the source sample vertex sample which - * were consumed. - */ - final int inputCount; - /** - * The #of binding sets generated before the join was cutoff. - */ - final int outputCount; - /** - * The ratio of the #of input samples consumed to the #of output - * samples generated. - */ - final double f; - /** - * The estimated cardinality of the join. - */ - final long estimatedCardinality; - - /** - * @param limit - * The limit used to sample the edge (this is the limit - * on the #of solutions generated by the cutoff join used - * when this sample was taken). - * @param inputRangeCount - * The fast range count (aka cardinality) for the source - * vertex of the edge (whichever vertex has the lower - * cardinality). - * @param inputCount - * The #of binding sets out of the source sample vertex - * sample which were consumed. - * @param outputCount - * The #of binding sets generated before the join was - * cutoff. - * - * @todo If the outputCount is zero then this is a good indicator - * that there is an error in the query such that the join will - * not select anything. This is not 100%, merely indicative. - */ - EdgeSample(final long inputRangeCount, final int limit, final int inputCount, - final int outputCount) { - - this.inputRangeCount = inputRangeCount; - - this.limit = limit; - - this.inputCount = inputCount; - - this.outputCount = outputCount; - - f = outputCount == 0 ? 0 : (outputCount / (double) inputCount); - - estimatedCardinality = (long) (inputRangeCount * f); - - } - - public String toString() { - return "EdgeSample" + "{inputRangeCount=" + inputRangeCount - + ", limit=" + limit + ", inputCount=" + inputCount - + ", outputCount=" + outputCount + ", f=" + f - + ", estimatedCardinality=" + estimatedCardinality - + "}"; - } - - }; - /** * The last sample for this edge and <code>null</code> if the edge has * not been sampled. */ - EdgeSample sample = null; + public EdgeSample sample = null; public Edge(final Vertex v1, final Vertex v2, final Set<IVariable<?>> shared) { if (v1 == null) @@ -391,21 +611,108 @@ this.v2 = v2; this.shared = shared; } - + + /** + * Note: The vertices of the edge are labeled using the + * {@link BOp.Annotations#BOP_ID} associated with the {@link IPredicate} + * for each vertex. + */ public String toString() { - return "\nEdge{v1=" + v1.pred.getId() + ",v2=" + v2.pred.getId() - + ",shared=" + shared.toString() + ", sample=" + sample + "}"; + return "Edge{ (V" + v1.pred.getId() + ",V" + v2.pred.getId() + ")" + + ", shared=" + shared.toString() + ", sample=" + sample + + "}"; } /** + * Equality is determined by reference testing. + */ + public boolean equals(final Object o) { + + return this == o; + + } + + /** + * The hash code of an edge is the hash code of the vertex with the + * smaller hash code X 31 plus the hash code of the vertex with the + * larger hash code. This definition compensates for the arbitrary order + * in which the vertices may be expressed and also recognizes that the + * vertex hash codes are based on the bop ids, which are often small + * integers. + */ + public int hashCode() { + + if (hash == 0) { + + final int h1 = v1.hashCode(); + final int h2 = v2.hashCode(); + + final int h; + if (h1 < h2) { + + h = h1 * 31 + h2; + + } else { + + h = h2 * 31 + h1; + + } + + hash = h; + + } + return hash; + + } + private int hash; + + /** + * Return the vertex with the smaller estimated cardinality. + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMinimumCardinalityVertex() { + + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + + return (v1.sample.rangeCount < v2.sample.rangeCount) ? v1 : v2; + + } + + /** + * Return the vertex with the larger estimated cardinality (the vertex + * not returned by {@link #getMinimumCardinalityVertex()}). + * + * @throws IllegalStateException + * if either vertex has not been sampled. + */ + public Vertex getMaximumCardinalityVertex() { + + // The vertex with the minimum cardinality. + final Vertex o = getMinimumCardinalityVertex(); + + // Return the other vertex. + return (v1 == o) ? v2 : v1; + + } + + /** * Estimate the cardinality of the edge. * * @param context + * + * @return The estimated cardinality of the edge. + * * @throws Exception */ - public void estimateCardinality(final QueryEngine queryEngine, + public long estimateCardinality(final QueryEngine queryEngine, final int limit) throws Exception { if (limit <= 0) @@ -418,7 +725,11 @@ */ // vertex v, vprime final Vertex v, vp; - if (v1.rangeCount < v2.rangeCount) { + if (v1.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v2.sample == null) // vertex not sampled. + throw new IllegalStateException(); + if (v1.sample.rangeCount < v2.sample.rangeCount) { v = v1; vp = v2; } else { @@ -427,7 +738,7 @@ } /* - * @todo This is difficult to setup because we do not have a concept + * TODO This is difficult to setup because we do not have a concept * (or class) corresponding to a fly weight relation and we do not * have a general purpose relation, just arrays or sequences of * IBindingSets. Also, all relations are persistent. Temporary @@ -446,28 +757,66 @@ * both the input and the output of the cutoff evaluation of the * edge rather than rows of the materialized relation. * - * @todo On subsequent iterations we would probably re-sample [v] + * TODO On subsequent iterations we would probably re-sample [v] * and we would run against the materialized intermediate result for * [v']. */ /* - * Convert the source sample into an IBindingSet[], injecting a - * rowid column. + * Convert the source sample into an IBindingSet[]. + * + * @todo We might as well do this when we sample the vertex. */ - final IVariable<Integer> ROWID = Var.var("__rowid"); - final IBindingSet[] sample = new IBindingSet[v.sample.length]; + final IBindingSet[] sourceSample = new IBindingSet[v.sample.sample.length]; { - for (int i = 0; i < sample.length; i++) { + for (int i = 0; i < sourceSample.length; i++) { final IBindingSet bset = new HashBindingSet(); - BOpContext.copyValues((IElement) v.sample[i], v.pred, bset); - bset.set(ROWID, new Constant<Integer>(Integer.valueOf(i))); - sample[i] = bset; + BOpContext.copyValues((IElement) v.sample.sample[i], v.pred, bset); + sourceSample[i] = bset; } } + // Sample the edge and save the sample on the edge as a side-effect. + this.sample = estimateCardinality(queryEngine, limit, v, vp, sourceSample); + + return sample.estimatedCardinality; + + } + + /** + * Estimate the cardinality of the edge. + * + * @param queryEngine + * @param limit + * @param vSource + * The source vertex. + * @param vTarget + * The target vertex + * @param sourceSample + * The sample for the source vertex. When this is a one-step + * estimation of the cardinality of the edge, then this + * sample is taken from the {@link VertexSample}. When the + * edge (vSource,vTarget) extends some {@link Path}, then + * this is taken from the {@link EdgeSample} for that + * {@link Path}. + * + * @return The result of sampling that edge. + * + * @throws Exception + */ + public EdgeSample estimateCardinality(final QueryEngine queryEngine, + final int limit, final Vertex vSource, final Vertex vTarget, + IBindingSet[] sourceSample) throws Exception { + + if (limit <= 0) + throw new IllegalArgumentException(); + + // Inject a rowId column. + sourceSample = BOpUtility.injectRowIdColumn(ROWID, 1/* start */, + sourceSample); + /* - * @todo Any constraints on the edge (other than those implied by + * TODO Any constraints on the edge (other than those implied by * shared variables) need to be annotated on the join. Constraints * (other than range constraints which are directly coded by the * predicate) will not reduce the effort to compute the join, but @@ -476,7 +825,7 @@ */ final PipelineJoin joinOp = new PipelineJoin(new BOp[] {}, // new NV(BOp.Annotations.BOP_ID, 1),// - new NV(PipelineJoin.Annotations.PREDICATE,vp.pred.setBOpId(3)) + new NV(PipelineJoin.Annotations.PREDICATE,vTarget.pred.setBOpId(3)) ); final SliceOp sliceOp = new SliceOp(new BOp[] { joinOp },// @@ -494,12 +843,13 @@ queryId, joinOp.getId()/* startId */, -1 /* partitionId */, new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { sample }))); + new IBindingSet[][] { sourceSample }))); // #of source samples consumed. int inputCount = 0; // #of output samples generated. int outputCount = 0; + final List<IBindingSet> result = new LinkedList<IBindingSet>(); try { try { IBindingSet bset = null; @@ -508,11 +858,12 @@ runningQuery.iterator()); while (itr.hasNext()) { bset = itr.next(); + result.add(bset); outputCount++; } - // #of input rows consumed. Note: +1 since origin ZERO. + // #of input rows consumed. inputCount = bset == null ? 0 : ((Integer) bset.get(ROWID) - .get()) + 1; + .get()); } finally { // verify no problems. FIXME Restore test of the query. // runningQuery.get(); @@ -521,18 +872,365 @@ runningQuery.cancel(true/* mayInterruptIfRunning */); } - this.sample = new EdgeSample(v.rangeCount, limit, inputCount, - outputCount); + /* + * Note: This needs to be based on the source vertex having the + * minimum cardinality for the Path which is being extended which + * connects via some edge defined in the join graph. If a different + * vertex is chosen as the source then the estimated cardinality + * will be falsely high by whatever ratio the chosen vertex + * cardinality exceeds the one having the minimum cardinality which + * is connected via an edge to the target vertex). + */ + final VertexSample moreSelectiveVertexSample = vSource.sample.rangeCount < vTarget.sample.rangeCount ? vSource.sample + : vTarget.sample; + final EdgeSample edgeSample = new EdgeSample( + moreSelectiveVertexSample/* vSource.sample */, limit, + inputCount, outputCount, result + .toArray(new IBindingSet[result.size()])); + if (log.isInfoEnabled()) - log.info("edge=" + this + sample); + log.info("edge=" + this + ", sample=" + edgeSample); + return edgeSample; + } } +// /** +// * A path sample includes the materialized binding sets from the as-executed +// * join path. +// * +// * @todo The sample {@link IBindingSet}[] could be saved with the +// * {@link EdgeSample}. However, when we are sampling a join path we +// * want to associate the net sample with the path, not each edge in +// * that path, because we need to be able to generate join paths in +// * which the path is extended from any vertex already part of the path +// * to any vertex which has not yet incorporated in the path and has +// * not yet been executed. To do this we need to intermediate results +// * for the path, which includes all variables bound by each join for +// * each edge in the path, not just on an edge by edge basis. +// */ +// public static class PathSample extends EdgeSample { +// +// /** +// * <code>true</code> if the sample is the exact solution for the join path. +// */ +// private final boolean exact; +// +// /** +// * The sample of the solutions for the join path. +// */ +// private final IBindingSet[] sample; +// +// PathSample(final long inputRangeCount, final int limit, +// final int inputCount, final int outputCount, +// final boolean exact, final IBindingSet[] sample) { +// +// super(inputRangeCount, limit, inputCount, outputCount); +// +// if(sample == null) +// throw new IllegalArgumentException(); +// +// this.exact = exact; +// +// this.sample = sample; +// +// } +// +// public String toString() { +// +// return super.toString() + ":{exact=" + exact + ", sampleSize=" +// + sample.length + "}"; +// +// } +// +// } + /** + * A sequence of {@link Edge}s (aka join steps). + */ + public static class Path { + + public final List<Edge> edges; + + /* + * These fields carry state used by chainSample. It would be better to + * have that state on a data structure which is purely local to + * chainSample, but perhaps Path is that data structure. + */ + + public EdgeSample sample = null; + +// /** +// * Input to the next round of sampling. +// */ +// private VertexSample inputSample; + + /** + * The vertex at which the path from which this path was derived + * stopped. This is initialized to the source vertex when entering the + * chainSample() method. + */ + private Vertex stopVertex; + + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("Path{"); + boolean first = true; + for (Edge e : edges) { + if (!first) + sb.append(","); + sb.append("(" + e.v1.pred.getId() + "," + e.v2.pred.getId() + ")"); + first = false; + } + sb.append(",sample=" + sample + "}"); + return sb.toString(); + } + + /** + * Create an empty path. + */ + public Path() { + this.edges = new LinkedList<Edge>(); + } + + /** + * Create a path from a single edge. + * + * @param e + * The edge. + */ + public Path(final Edge e) { + if (e == null) + throw new IllegalArgumentException(); + this.edges = new LinkedList<Edge>(); + this.edges.add(e); + this.sample = e.sample; + } + + /** + * Return <code>true</code> iff the {@link Path} contains at least one + * {@link Edge} for that {@link Vertex}. + * + * @param v + * The vertex + * + * @return true if the vertex is already part of the path. + */ + public boolean contains(final Vertex v) { + + if (v == null) + throw new IllegalArgumentException(); + + for (Edge e : edges) { + + if (e.v1 == v || e.v2 == v) + return true; + + } + + return false; + } + + /** + * Add an edge to a path, computing the estimated cardinality of the new + * path, and returning the new path. + * + * @param queryEngine + * @param limit + * @param e + * The edge. + * + * @return The new path. + * + * @throws Exception + */ + public Path addEdge(final QueryEngine queryEngine, final int limit, + final Edge e) throws Exception { + + if (e == null) + throw new IllegalArgumentException(); + + // Figure out which vertices are already part of this path. + final boolean v1Found = contains(e.v1); + final boolean v2Found = contains(e.v2); + + if (!v1Found && !v2Found) + throw new IllegalArgumentException( + "Edge does not extend path: edge=" + e + ", path=" + + this); + + if (v1Found && v2Found) + throw new IllegalArgumentException( + "Edge already present in path: edge=" + e + ", path=" + + this); + + // The vertex which is already part of this path. + final Vertex sourceVertex = v1Found ? e.v1 : e.v2; + + // The new vertex, which is not part of this path. + final Vertex targetVertex = v1Found ? e.v2 : e.v1; + + // Extend the path. + final Path tmp = new Path(); + + tmp.edges.addAll(edges); + + tmp.edges.add(e); + + /* + * Chain sample the edge. + * + * Note: ROX uses the intermediate result I(p) for the existing path + * as the input when sampling the edge. The corresponding concept + * for us is the sample for this Path, which will have all variable + * bindings produced so far. In order to estimate the cardinality of + * the new join path we have to do a one step cutoff evaluation of + * the new Edge, given the sample available on the current Path. + * + * TODO It is possible for the path sample to be empty. Unless the + * sample also happens to be exact, this is an indication that the + * estimated cardinality has underflowed. How are we going to deal + * with this situation?!? What would appear to matter is the amount + * of work being performed by the join in achieving that low + * cardinality. If we have to do a lot of work to get a small + * cardinality then we should prefer join paths which achieve the + * same reduction in cardinality with less 'intermediate + * cardinality' - that is, by examining fewer possible solutions. + */ + +// final IBindingSet[] sample = BOpUtility.injectRowIdColumn(ROWID, +// 0/* start */, this.sample.sample); + + final EdgeSample edgeSample = e.estimateCardinality(queryEngine, + limit, sourceVertex, targetVertex, this.sample.sample); + + tmp.sample = edgeSample; + +// tmp.stopVertex = e.getMaximumCardinalityVertex(); + + return tmp; + + } + +// /** +// * Equality is defined by comparison of the unordered set of edges. +// */ +// public boolean equals(final Object o) { +// if (this == o) +// return true; +// if (!(o instanceof Path)) +// return false; +// final Path t = (Path) o; +// if (edges.length != t.edges.length) +// return false; +// for (Edge e : edges) { +// boolean found = false; +// for (Edge x : t.edges) { +// if (x.equals(e)) { +// found = true; +// break; +// } +// } +// if (!found) +// return false; +// } +// return true; +// } +// +// /** +// * The hash code of path is defined as the bit-wise XOR of the hash +// * codes of the edges in that path. +// */ +// public int hashCode() { +// +// if (hash == 0) { +// +// int result = 0; +// +// for(Edge e : edges) { +// +// result ^= e.hashCode(); +// +// } +// +// hash = result; +// +// } +// return hash; +// +// } +// private int hash; + + } + + /** + * Comma delimited table showing the estimated join hit ratio, the estimated + * cardinality, and the set of vertices for each of the specified join + * paths. + * + * @param a + * An array of join paths. + * + * @return A table with that data. + */ + static public String showTable(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + for (Path x : a) { + if (x.sample == null) { + f.format("%7s, %10s", "N/A", "N/A"); + } else { + f.format("% 7.2f, % 10d", x.sample.f, + x.sample.estimatedCardinality); + } + sb.append(","); + for (Edge e : x.edges) + sb.append(" (" + e.v1.pred.getId() + " " + e.v2.pred.getId() + + ")"); + sb.append("\n"); + } + return sb.toString(); + } + + /** * A join graph (data structure and methods only). + * + * Note: ROX was stated in terms of materialization of intermediate results. + * Bigdata was originally designed to support pipelined join evaluation in + * which the zero investment property is true (there exists an index for the + * join). While support is being developed for operator-at-once joins (e.g., + * hash joins), that support is aimed at more efficient evaluation of high + * cardinality joins using multi-block IO. Therefore, unlike ROX, the + * runtime query optimizer does not materialize the intermediate results + * when chain sampling. Instead, it feeds a sample into a cutoff pipeline + * evaluation for the join path. Since some join paths can eliminate a lot + * of intermediate solutions and hence take a long time to satisfy the + * cutoff, we also specify a timeout for the cutoff evaluation of a join + * path. Given the zero investment property (an index exists for the join), + * if the cutoff is not satisfied within the timeout, then the join has a + * low correlation. If no solutions are generated within the timeout, then + * the estimate of the correlation "underflows". + * + * Note: timeouts are a bit tricky when you are not running on a real-time + * platform. In particular, heavy swapping or heavy GC workloads could both + * cause a timeout to expire because no work was done on sampling the join + * path rather than because there was a lot of work to be done. Therefore, + * the timeout should be used to protect against join paths which take a + * long time to materialize <i>cutoff</i> solutions rather than to fine tune + * the running time of the query optimizer. + * + * TODO Runtime query optimization is probably useless (or else should rely + * on materialization of intermediate results) when the cardinality of the + * vertices and edges for the query is small. This would let us balance the + * design characteristics of MonetDB and bigdata. For this purpose, we need + * to flag when a {@link VertexSample} is complete (e.g., the cutoff is GTE + * the actual range count). This also needs to be done for each join path so + * we can decide when the sample for the path is in fact the exact solution + * rather than an estimate of the cardinality of the solution together with + * a sample of the solution. */ public static class JGraph { @@ -546,6 +1244,9 @@ */ private final Edge[] E; + // The set of vertices which have been consumed by the query. + private final Set<Vertex> executedVertices = new LinkedHashSet<Vertex>(); + public List<Vertex> getVertices() { return Collections.unmodifiableList(Arrays.asList(V)); } @@ -555,8 +1256,25 @@ } public String toString() { - return super.toString() + "{V=" + Arrays.toString(V) + ",E=" - + Arrays.toString(E) + "}"; + final StringBuilder sb = new StringBuilder(); + sb.append("JoinGraph"); + sb.append("{V=["); + for(Vertex v : V) { + sb.append("\nV["+v.pred.getId()+"]="+v); + } + sb.append("],E=["); + for(Edge e : E) { + sb.append("\n"+e); + } + sb.append("\n],ExecutedVertices=["); + for(Vertex v : executedVertices) { + sb.append("\nV["+v.pred.getId()+"]="+v); + } + sb.append("\n]}"); + return sb.toString(); + +// return super.toString() + "{V=" + Arrays.toString(V) + ",E=" +// + Arrays.toString(E) + ", executedVertices="+executedVertices+"}"; } public JGraph(final IPredicate[] v) { @@ -611,6 +1329,45 @@ } /** + * Return the {@link Vertex} whose {@link IPredicate} is associated with + * the given {@link BOp.Annotations#BOP_ID}. + * + * @param bopId + * The bop identifier. + * @return The {@link Vertex} -or- <code>null</code> if there is no such + * vertex in the join graph. + */ + public Vertex getVertex(int bopId) { + for(Vertex v : V) { + if(v.pred.getId()==bopId) + return v; + } + return null; + } + + /** + * Return the {@link Edge} associated with the given vertices. The + * vertices may appear in any order. + * + * @param v1 + * One vertex. + * @param v2 + * Another vertex. + * + * @return The edge -or- <code>null</code> if there is no such edge in + * the join graph. + */ + public Edge getEdge(Vertex v1, Vertex v2) { + for(Edge e : E) { + if (e.v1 == v1 && e.v2 == v2) + return e; + if (e.v1 == v2 && e.v2 == v1) + return e; + } + return null; + } + + /** * Obtain a sample and estimated cardinality (fast range count) for each vertex. * * @param context @@ -632,28 +1389,601 @@ * * @param context * - * @throws Exception + * @throws Exception */ - public void estimateEdgeWeights(final QueryEngine queryEngine, final int limit) throws Exception { - - for(Edge e : E) { - + public void estimateEdgeWeights(final QueryEngine queryEngine, + final int limit) throws Exception { + + for (Edge e : E) { + if (e.v1.sample == null || e.v2.sample == null) { - + /* * We can only estimate the cardinality of edges connecting * vertices for which samples were obtained. */ continue; + + } + + e.estimateCardinality( + queryEngine, limit); + + } + + } + + /** + * Return the {@link Edge} having the minimum estimated cardinality out + * of those edges whose cardinality has been estimated. + * + * @param A + * set of vertices to be excluded from consideration + * (optional). + * + * @return The minimum cardinality edge -or- <code>null</code> if there + * are no {@link Edge}s having an estimated cardinality. + */ + public Edge getMinimumCardinalityEdge(final Set<Vertex> visited) { + + long minCard = Long.MIN_VALUE; + Edge minEdge = null; + + for (Edge e : E) { + + if (e.sample == null) { + + // Edge has not been sampled. + continue; + + } + + if (visited != null + && (visited.contains(e.v1) || visited.contains(e.v2))) { + // A vertex of that edge has already been consumed. + continue; + } - e.estimateCardinality(queryEngine, limit); + final long estimatedCardinality = e.sample.estimatedCardinality; + + if (minEdge == null || estimatedCardinality < minCard) { + + minEdge = e; + + minCard = estimatedCardinality; + + } + + } + + return minEdge; + + } + +// /** +// * Return the {@link Edge} having the minimum estimated cardinality out +// * of those edges whose cardinality has been estimated. +// * +// * @return The minimum cardinality edge -or- <code>null</code> if there +// * are no {@link Edge}s having an estimated cardinality. +// */ +// public Edge getMinimumCardinalityEdge() { +// +// return getMinimumCardinalityEdge(null); +// +// } + + /** + * Return the #of edges in which the given vertex appears where the + * other vertex of the edge does not appear in the set of visited + * vertices. + * + * @param v + * The vertex. + * @param visited + * A set of vertices to be excluded from consideration. + * + * @return The #of such edges. + */ + public int getEdgeCount(final Vertex v, final Set<Vertex> visited) { + + return getEdges(v, visited).size(); + + } + + /** + * Return the edges in which the given vertex appears where the other + * vertex of the edge does not appear in the set of visited vertices. + * + * @param v + * The vertex. + * @param visited + * A set of vertices to be excluded from consideration + * (optional). + * + * @return Those edges. + */ + public List<Edge> getEdges(final Vertex v, final Set<Vertex> visited) { + + if (v == null) + throw new IllegalArgumentException(); + + if (visited != null && visited.contains(v)) + return Collections.emptyList(); + + final List<Edge> tmp = new LinkedList<Edge>(); + + for (Edge e : E) { + + if (v.equals(e.v1) || v.equals(e.v2)) { + + if (visited != null) { + + if (visited.contains(e.v1)) + continue; + + if (visited.contains(e.v2)) + continue; + + } + + tmp.add(e); + + } } + return tmp; + } - + + /** + * + * @param queryEngine + * @param limit + * The limit for sampling a vertex and the initial limit for + * cutoff join evaluation. A reasonable value is + * <code>100</code>. + * @param timeout + * The timeout for cutoff join path evaluation + * (milliseconds). A reasonable value is <code>100</code>ms. + * @throws Exception + * + * FIXME This must either return the query plan or copy the + * results as they are materialized to the sink for the join + * graph operator. + * + * + * @todo We do not need the [timeout] as long as we evaluate each cutoff + * join separately. The limited number of input solutions to the + * join automatically limits the amount of work the join can do. + * However, if we do cutoff evaluation of a series of edges then + * it is possible to do a lot of work in order to find [limit] + * solutions. In this case, a [timeout] protects us against join + * paths which have poor correlations and large cardinality for + * their vertices (a lot of solutions are considered to produce + * very few results). + */ + public void runtimeOptimizer(final QueryEngine queryEngine, + final int limit, final long timeout) throws Exception { + + final BOpContextBase context = new BOpContextBase(queryEngine); + + if (log.isInfoEnabled()) + log.info("limit=" + limit); + + /* + * Sample the vertices. + * + * TODO Sampling for scale-out not yet finished. + * + * FIXME Re-sampling will always produce the same sample depending + * on the sample operator impl (it should be random, but it is not). + */ + sampleVertices(context, limit); + + if(log.isDebugEnabled()) + log.debug("joinGraph=" + toString()); + + /* + * Estimate the cardinality and weights for each edge, obtaining the + * Edge with the minimum estimated cardinality. This will be the + * starting point for the join graph evaluation. + * + * @todo It would be very interesting to see the variety and/or + * distribution of the values bound when the edge is sampled. This + * can be easily done using a hash map with a counter. That could + * tell us a lot about the cardinality of the next join path + * (sampling the join path also tells us a lot, but it does not + * explain it as much as seeing the histogram of the bound values). + * I believe that there are some interesting online algorithms for + * computing the N most frequent observations and the like which + * could be used here. + * + * TODO ROX is choosing the starting edge based on the minimum + * estimated cardinality. However, it is possible for there to be + * more than one edge with an estimated cardinality which is + * substantially to the minimum estimated cardinality. It would be + * best to start from multiple vertices so we can explore join paths + * which begin with those alternative starting vertices as well. + * (LUBM Q2 is an example of such a query). + */ + estimateEdgeWeights(queryEngine, limit); + + while(moreEdgesToVisit(executedVertices)) { + + // Decide on the next join path to execute. + final Path p = chainSample(queryEngine, limit, timeout); + + for(Edge e : p.edges) { + + /* + * FIXME Finish the algorithm. + * + * Execute the edge. We have two choices here. If join path + * is currently materialized and the expected cardinality of + * the edge is small to moderate (LTE limit * 10) then we + * can simply materialize the result of evaluating the edge. + * + * In this case, we replace the sample for the vertex with + * the actual result of evaluating the edge. [This concept + * pre-supposes that a vertex sample is the set of matching + * elements and that we do not store the binding sets which + * satisfy the join path. I think that this is perhaps the + * primary point of difference for MonetDB/ROX and bigdata. + * bigdata is working with IBindingSet[]s and should + * associate the set of intermediate solutions which + * represent the materialized intermediate result with the + * join path, not the vertex or the edge.] + * + * Otherwise, either the join path is already only a sample + * or the expected cardinality of this edge is too large so + * we do the cutoff evaluation of the edge in order to + * propagate a sample. + * + * 1. exec(e,T1(v1),T2(v2)) + */ + + executedVertices.add(e.v1); + executedVertices.add(e.v2); + + } + + /* + * Re-sample edges branching from any point in the path which we + * just executed. The purpose of this is to improve the + * detection of correlations using a materialized sample of the + * intermediate results (which will be correlated) rather than + * independent samples of the vertices (which are not + * correlated). + * + * Also, note that ROX only samples vertices which satisfy the + * zero investment property and therefore there could be + * vertices which have not yet been sampled if some vertices are + * not associated with an index. + * + * @todo This could just be another call to sampleVertices() and + * estimateEdgeWeights() if those methods accepted the set of + * already executed vertices so they could make the proper + * exclusions (or if we had a method which returned the + * un-executed vertices and/or edges). + */ +// e.v1.sample(context, limit); +// e.v2.sample(context, limit); + + } + + } + + /** + * Return <code>true</code> iff there exists at least one {@link Edge} + * branching from a vertex NOT found in the set of vertices which have + * visited. + * + * @param visited + * A set of vertices. + * + * @return <code>true</code> if there are more edges to explore. + */ + private boolean moreEdgesToVisit(final Set<Vertex> visited) { + + // Consider all edges. + for(Edge e : E) { + + if (visited.contains(e.v1) && visited.contains(e.v2)) { + /* + * Since both vertices for this edge have been executed the + * edge is now redundant. Either it was explicitly executed + * or another join path was used which implies the edge by + * transitivity in the join graph. + */ + continue; + } + + /* + * We found a counter example (an edge which has not been + * explored). + */ + if (log.isTraceEnabled()) + log.trace("Edge has not been explored: " + e); + + return true; + + } + + // No more edges to explore. + return false; + + } + + /** + * E + * + * @param limit + * @return + * + * TODO How to indicate the set of edges which remain to be + * explored? + * + * @throws Exception + */ + public Path chainSample(final QueryEngine queryEngine, final int limit, + final long timeout) throws Exception { + + final Vertex source; + { + /* + * Find the edge having the minimum estimated cardinality. + */ + final Edge e = getMinimumCardinalityEdge(executedVertices); + + if (e == null) + throw new RuntimeException("No weighted edges."); + + /* + * Decide which vertex of that edge will be the starting point + * for chain sampling (if any). + */ + if (getEdgeCount(e.v1, executedVertices) > 1 + || getEdgeCount(e.v2, executedVertices) > 1) { + /* + * There is at least one vertex of that edge which branches. + * Chain sampling will begin with the vertex of that edge + * which has the lower estimated cardinality. + * + * TODO It could be that the minimum cardinality vertex does + * not branch. What happens for that code path? Do we just + * execute that edge and then reenter chain sampling? If so, + * it would be cleared to test for this condition explicitly + * up front. + */ + source = e.getMinimumCardinalityVertex(); + } else { + /* + * There is no vertex which branches for that edge. This is + * a stopping condition for chain sampling. The path + * consisting of just that edge is returned and should be + * executed by the caller. + */ + return new Path(e); + } + + } + + /* + * Setup some data structures for one or more breadth first + * expansions of the set of path(s) which are being sampled. This + * iteration will continue until we reach a stopping condition. + */ + + // The set of paths being considered. + final List<Path> paths = new LinkedList<Path>(); + + { + // The current path. + final Path p = new Path(); + + p.stopVertex = source; +// p.inputSample = source.sample; + paths.add(p); + } + + // initialize the cutoff to the limit used to sample the vertices. + int cutoff = limit; + long cutoffMillis = timeout; + + final Set<Vertex> unsampled = new LinkedHashSet<Vertex>( + executedVertices); + + /* + * One breadth first expansion of the join paths. + * + * Note: This expands each join path one vertex in each iteration. + * However, different join paths can expand from different vertices. + * + * For ROX, each join path is expanded from the last vertex which + * was added to that join path so the initial edge for each join + * path strongly determines the edges in the join graph along which + * that join path can grow. + * + * For bigdata, we can grow the path from any vertex already in the + * path to any vertex which (a) is not yet in the path; and (b) has + * not yet been evaluated. + * + * This suggests that this loop must consider each of the paths to + * decide whether that path can be extended. + */ + while (moreEdgesToVisit(unsampled)) { + + // increment the cutoff. + cutoff += limit; + cutoffMillis += timeout; + + // Consider each path. + for(Path p : paths) { + + /* + * The vertex at which we stopped expanding that path the + * last time. + * + * TODO ROX might have to traverse vertex to vertex along + * edges, but we can execute any edge whose preconditions + * have been satisfied. + */ + final Vertex v = p.stopVertex; + + // TODO depends on the notion of the paths remaining. + if (getEdgeCount(v, null/*executed+sampled(p)*/) > 0) { + /* + * This path branches at this vertex, so remove the old + * path 1st. + */ + paths.remove(p); + } + + // For each edge which is a neighbor of the vertex [v]. + final List<Edge> neighbors = null; + for(Edge e : neighbors) { + // 1. append the edge to the path + final Path p1 = p.addEdge(queryEngine, cutoff, e); + // 3. add the path to paths. + paths.add(p1); + } + + } + + final Path p = getSelectedJoinPath(paths.toArray(new Path[paths.size()])); + + if(p != null) { + + return p; + + } + + } // while(moreEdgesToSample) + + final Path p = getBestAlternativeJoinPath(paths.toArray(new Path[paths.size()])); + + if(p != null) { + + return p; + + } + + // TODO ROX as given can return null here, which looks like a bug. + return null; + + } // chainSample() + + /** + * Return the path which is selected by the termination criteria + * (looking for a path which dominates the alternatives). + * + * @param a + * An array of {@link Path}s to be tested. + * + * @return The selected path -or- <code>null</code> if none of the paths + * is selected. + * + * @todo Should we only explore beneath the diagonal? + * + * @todo What is the basis for comparing the expected cardinality of + * join paths? Where one path is not simply the one step extension + * of the other. + * <p> + * This rule might only be able to compare the costs for paths in + * which one path directly extends another. + * <p> + * It is not clear that this code is comparing all paths which + * need to be compared. + */ + public Path getSelectedJoinPath(final Path[] a) { + final StringBuilder sb = new StringBuilder(); + final Formatter f = new Formatter(sb); + for (int i = 0; i < a.length; i++) { + Path p = null; + final Path Pi = a[i]; + if (Pi.sample == null) + throw new RuntimeException("Not sampled: " + Pi); + for (int j = 0; j < a.length; j++) { + if (i == j) + continue; + final Path Pj = a[j]; + if (Pj.sample == null) + throw new RuntimeException("Not sampled: " + Pj); + final long costPi = Pi.sample.estimatedCardinality; + final double sfPi = Pi.sample.f; + final long costPj = Pj.sample.estimatedCardinality; + final long expectedCombinedCost = costPi + + (long) (sfPi * costPj); + final boolean lt = expectedCombinedCost < costPj; + { + f + .format( + "Comparing: P[% 2d] with P[% 2d] : % 10d + (% 7.2f * % 10d) %2s %10d", + i, j, costPi, sfPi, costPj, (lt ? "<" + : ">="), costPj); + System.err.println(sb); + sb.setLength(0); + } + if (lt) { + p = Pi; + } else { + p = null; + break; + } + } // Pj + if (p != null) + return p; + } // Pi + /* + * None of the paths is a winner according to the selection + * criteria. + */ + return null; + } + + /** + * Termination condition if no more edges to sample. This + * breaks the deadlock by preferring the path whose .... + */ + public Path getBestAlternativeJoinPath(final Path[] a) { + for (int i = 0; i < a.length; i++) { + Path p = null; + final Pat... [truncated message content] |
From: <btm...@us...> - 2010-11-11 22:00:49
|
Revision: 3937 http://bigdata.svn.sourceforge.net/bigdata/?rev=3937&view=rev Author: btmurphy Date: 2010-11-11 22:00:41 +0000 (Thu, 11 Nov 2010) Log Message: ----------- [branch dev-btm]: CHECKPOINT - completed phase 1 of callable executor (client) service smart proxy work. Made changes to allow smart proxy and/or remote service implementations to be started by ServicesManagerService, service re-ordering in RestartPersistentServices because of dependency on txn service and load balancer, added DeferredInitTask to EmbeddedShardService, added the necessary jar files to classpath of shard locator and callable executor services in boot-processes.xml Modified Paths: -------------- branches/dev-btm/bigdata/src/java/com/bigdata/resources/StoreManager.java branches/dev-btm/bigdata/src/resources/logging/log4j.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/boot/config/boot-processes.xml branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/RestartPersistentServices.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServiceConfigurationZNodeMonitorTask.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServiceConfiguration.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/master/TestMappedRDFDataLoadMaster.config branches/dev-btm/src/resources/config/bigdataCluster.config Modified: branches/dev-btm/bigdata/src/java/com/bigdata/resources/StoreManager.java =================================================================== --- branches/dev-btm/bigdata/src/java/com/bigdata/resources/StoreManager.java 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata/src/java/com/bigdata/resources/StoreManager.java 2010-11-11 22:00:41 UTC (rev 3937) @@ -1523,6 +1523,10 @@ // Wait no more than N seconds for discovery int nWait = 120; boolean discoveredTxnSrvc = false; + if (log.isDebugEnabled()) { + log.debug("waiting for transaction " + +"service discovery ..."); + } for(int i=0; i<nWait; i++) { if (discoveryMgr.getTransactionService() != null) { discoveredTxnSrvc = true; @@ -1531,24 +1535,19 @@ try { Thread.sleep(1000L); } catch(InterruptedException ie) { } + } + if(discoveredTxnSrvc) { if (log.isDebugEnabled()) { - log.debug("waiting for transaction " - +"service discovery"); + log.debug("StoreManager - discovered " + +"transaction service"); } - if(discoveredTxnSrvc) { - if (log.isDebugEnabled()) { - log.debug("discovered transaction service"); - } - } else { - log.warn("transaction service unreachable"); - }//endif(discoveredTxnSrvc) - }//endloop(nWait) -//BTM if(discoveredTxnSrvc) { -//BTM com.bigdata.util.Util.printStr("TestBigdataClientRemote.txt","\nStoreManager.start >>> TRANSACTION SERVICE DISCOVERED"); -//BTM }else{ -//BTM com.bigdata.util.Util.printStr("TestBigdataClientRemote.txt","\nStoreManager.start >>> TRANSACTION SERVICE UNREACHABLE\n"); -//BTM } +log.warn("DISCOVERED TRANSACTION SERVICE"); + } else { + log.warn("StoreManager - transaction " + +"service unreachable"); + } + } catch (UnsupportedOperationException ex) { //BTM com.bigdata.util.Util.printStr("TestBigdataClientRemote.txt","\nStoreManager.start >>> FEDERATION UNAVAILABLE - test case?\n"); log.warn("Federation not available - running in test case?"); Modified: branches/dev-btm/bigdata/src/resources/logging/log4j.properties =================================================================== --- branches/dev-btm/bigdata/src/resources/logging/log4j.properties 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata/src/resources/logging/log4j.properties 2010-11-11 22:00:41 UTC (rev 3937) @@ -215,3 +215,4 @@ log4j.logger.com.bigdata.transaction=DEBUG log4j.logger.com.bigdata.metadata=DEBUG log4j.logger.com.bigdata.shard=DEBUG +log4j.logger.com.bigdata.executor=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/boot/config/boot-processes.xml =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/boot/config/boot-processes.xml 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/boot/config/boot-processes.xml 2010-11-11 22:00:41 UTC (rev 3937) @@ -77,7 +77,7 @@ <javaprop name="java.util.logging.config.file" value="${bigdata.configDir}/logging/logging.properties"/> - <property name="java.classpath" value="${bootLauncherClasspath}${:}lib/fastutil.jar${:}lib/dsiutils.jar${:}lib/cweb-extser.jar"/> + <property name="java.classpath" value="${bootLauncherClasspath}${:}lib/fastutil.jar${:}lib/dsiutils.jar${:}lib/cweb-extser.jar${:}lib/ctc_utils.jar"/> <property name="java.app.mainclass" value="com.bigdata.boot.starter.SingleNonActivatableServiceStarter"/> <arg value="${bigdata.configDir}/policy/service.policy"/> @@ -113,7 +113,7 @@ <javaprop name="java.util.logging.config.file" value="${bigdata.configDir}/logging/logging.properties"/> - <property name="java.classpath" value="${bootLauncherClasspath}"/> + <property name="java.classpath" value="${bootLauncherClasspath}${:}lib/fastutil.jar${:}lib/dsiutils.jar${:}lib/cweb-extser.jar${:}lib/ctc_utils.jar"/> <property name="java.app.mainclass" value="com.bigdata.boot.starter.SingleNonActivatableServiceStarter"/> <arg value="${bigdata.configDir}/policy/service.policy"/> Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/executor.config 2010-11-11 22:00:41 UTC (rev 3937) @@ -11,6 +11,10 @@ import net.jini.core.discovery.LookupLocator; import net.jini.discovery.LookupDiscoveryManager; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; + import com.bigdata.util.config.NicUtil; import com.bigdata.util.config.ConfigDeployUtil; @@ -106,3 +110,15 @@ (com.bigdata.executor.serverExporterTcpServerEndpoint, com.bigdata.executor.serverILFactory, false, false); } + +//NOTE: remove once dynamic discovery of zookeeper is added +org.apache.zookeeper.ZooKeeper { + + zroot = ConfigDeployUtil.getString("federation.name"); + + servers = com.bigdata.executor.serverExporterIpAddr+":2888:3888"; + + acl = new ACL[] { + new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) + }; +} Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/executor/config/logging.properties 2010-11-11 22:00:41 UTC (rev 3937) @@ -37,5 +37,4 @@ #log4j.logger.com.bigdata.executor=DEBUG #log4j.logger.com.bigdata.executor.EmbeddedCallableExecutor=DEBUG -#log4j.logger.com.bigdata.executor.EmbeddedClientIndexStore=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/RestartPersistentServices.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/RestartPersistentServices.java 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/RestartPersistentServices.java 2010-11-11 22:00:41 UTC (rev 3937) @@ -19,6 +19,9 @@ import com.bigdata.service.jini.JiniFederation; import com.bigdata.util.InnerCause; +//BTM - FOR_CLIENT_SERVICE +import java.util.ArrayList; + /** * Task restarts persistent physical services that should be running on this * host but which are not discoverable using jini (not found when we query for @@ -154,7 +157,8 @@ + BigdataZooDefs.CONFIG; // these are the ServiceConfigurations. - final List<String> serviceConfigZNodes; +//BTM - PRE_CLIENT_SERVICE final List<String> serviceConfigZNodes; + List<String> serviceConfigZNodes; try { serviceConfigZNodes = zookeeper.getChildren(zconfig, false); @@ -171,8 +175,109 @@ if (log.isInfoEnabled()) log.info("Considering " + serviceConfigZNodes.size() + " service configurations"); -System.out.println("\n*********************************************************"); -System.out.println("*** RestartPersistentServices.runOnce: Considering " + serviceConfigZNodes.size()+ " service configurations"); +System.out.println("\n*** RestartPersistentServices - BEGIN *********************************************************\n"); +for (String serviceConfigZNode : serviceConfigZNodes) { + System.out.println("*** RestartPersistentServices.runOnce: serviceConfigZNode = "+ serviceConfigZNode); +} +System.out.println("\n*** RestartPersistentServices.runOnce: RE-ORDER - DATA SERVICE LAST\n"); +//BTM - FOR_CLIENT_SERVICE - BEGIN --------------------------------------- + // re-order because shard (data) service waits for transaction + // service and load balancer service + List<String> tmpList = new ArrayList<String>(); + + //1. transaction service(s) + for (String serviceConfigZNode : serviceConfigZNodes) { + if ( (serviceConfigZNode.equals + ("com.bigdata.transaction.ServiceImpl")) || + (serviceConfigZNode.equals + ("com.bigdata.service.jini.TransactionServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + //2. load balancer service(s) + for (String serviceConfigZNode : serviceConfigZNodes) { + if ( serviceConfigZNode.equals + ("com.bigdata.loadbalancer.ServiceImpl") || + (serviceConfigZNode.equals + ("com.bigdata.service.jini.LoadBalancerServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + //3. shard locator (metadata) service(s) + for (String serviceConfigZNode : serviceConfigZNodes) { + if ( serviceConfigZNode.equals + ("com.bigdata.metadata.ServiceImpl") || + (serviceConfigZNode.equals + ("com.bigdata.service.jini.MetadataServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + //4. callable executor (client) service(s) + for (String serviceConfigZNode : serviceConfigZNodes) { + if ( serviceConfigZNode.equals + ("com.bigdata.executor.ServiceImpl") || + (serviceConfigZNode.equals + ("com.bigdata.service.jini.ClientServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + //5. shard (data) service(s) + for (String serviceConfigZNode : serviceConfigZNodes) { + if ( serviceConfigZNode.equals + ("com.bigdata.shard.ServiceImpl") || + (serviceConfigZNode.equals + ("com.bigdata.service.jini.DataServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + //6. add anything that's none of the above + for (String serviceConfigZNode : serviceConfigZNodes) { + + if ( !(serviceConfigZNode.equals + ("com.bigdata.transaction.ServiceImpl")) && + !(serviceConfigZNode.equals + ("com.bigdata.service.jini.TransactionServer")) && + + !(serviceConfigZNode.equals + ("com.bigdata.loadbalancer.ServiceImpl")) && + !(serviceConfigZNode.equals + ("com.bigdata.service.jini.LoadBalancerServer")) && + + !(serviceConfigZNode.equals + ("com.bigdata.metadata.ServiceImpl")) && + !(serviceConfigZNode.equals + ("com.bigdata.service.jini.MetadataServer")) && + + !(serviceConfigZNode.equals + ("com.bigdata.executor.ServiceImpl")) && + !(serviceConfigZNode.equals + ("com.bigdata.service.jini.ClientServer")) && + + !(serviceConfigZNode.equals + ("com.bigdata.shard.ServiceImpl")) && + !(serviceConfigZNode.equals + ("com.bigdata.service.jini.DataServer")) ) + { + tmpList.add(serviceConfigZNode); + } + } + if (tmpList.size() == serviceConfigZNodes.size()) { + serviceConfigZNodes = tmpList; + } else { + log.warn("reordered list size ["+tmpList.size() + +"] != serviceConfigZNodes size [" + +serviceConfigZNodes.size()+"]"); + } +System.out.println("*** RestartPersistentServices.runOnce: Considering " + serviceConfigZNodes.size()+ " service configurations\n"); +for (String serviceConfigZNode : serviceConfigZNodes) { + System.out.println("*** RestartPersistentServices.runOnce AFTER RE-ORDER: serviceConfigZNode = "+ serviceConfigZNode); +} +//BTM - FOR_CLIENT_SERVICE - END ------------------------------------- for (String serviceConfigZNode : serviceConfigZNodes) { @@ -253,7 +358,7 @@ } } -System.out.println("*********************************************************\n"); +System.out.println("\n*** RestartPersistentServices - END *********************************************************\n"); // Success. return true; Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServiceConfigurationZNodeMonitorTask.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServiceConfigurationZNodeMonitorTask.java 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/ServiceConfigurationZNodeMonitorTask.java 2010-11-11 22:00:41 UTC (rev 3937) @@ -323,7 +323,7 @@ if (config.serviceCount != children.size()) { // adjust the #of logical service instances (blocks). -System.out.println("GGGG ServiceConfigurationZNodeMonitorTask.runWithLock: *** NEW LOGICAL SERVICE TASK"); +System.out.println("GGGG ServiceConfigurationZNodeMonitorTask.runWithLock: *** NEW LOGICAL SERVICE TASK >>> "+serviceConfigZPath); config.newLogicalServiceTask(fed, listener, serviceConfigZPath, children).call(); Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServiceConfiguration.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServiceConfiguration.java 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/jini/start/config/ServiceConfiguration.java 2010-11-11 22:00:41 UTC (rev 3937) @@ -494,7 +494,6 @@ * if the service detectably did not start. */ public V call() throws Exception { -System.out.println("*** ServiceConfiguration: call() [className="+className+"]"); if (log.isInfoEnabled()) log.info("config: " + this); @@ -532,7 +531,9 @@ TimeUnit.MILLISECONDS); // attempt to detect a service start failure. +System.out.println("\n*** ServiceConfiguration: call() [className="+className+"] >>> awaitServiceStart [timeout="+timeout+" ms] - BEGIN\n"); awaitServiceStart(processHelper, timeout, TimeUnit.MILLISECONDS); +System.out.println("\n*** ServiceConfiguration: call() [className="+className+"] >>> awaitServiceStart [timeout="+timeout+" ms] - END\n"); } catch (InterruptedException ex) { Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/logging.properties 2010-11-11 22:00:41 UTC (rev 3937) @@ -37,5 +37,4 @@ #log4j.logger.com.bigdata.metadata=DEBUG #log4j.logger.com.bigdata.metadata.EmbeddedShardLocator=DEBUG -#log4j.logger.com.bigdata.journal.EmbeddedIndexStore=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/metadata/config/shardlocator.config 2010-11-11 22:00:41 UTC (rev 3937) @@ -11,6 +11,10 @@ import net.jini.core.discovery.LookupLocator; import net.jini.discovery.LookupDiscoveryManager; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; + import com.bigdata.util.config.NicUtil; import com.bigdata.util.config.ConfigDeployUtil; @@ -89,3 +93,15 @@ (com.bigdata.metadata.serverExporterTcpServerEndpoint, com.bigdata.metadata.serverILFactory, false, false); } + +//NOTE: remove once dynamic discovery of zookeeper is added +org.apache.zookeeper.ZooKeeper { + + zroot = ConfigDeployUtil.getString("federation.name"); + + servers = com.bigdata.metadata.serverExporterIpAddr+":2888:3888"; + + acl = new ACL[] { + new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) + }; +} Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/EmbeddedShardService.java 2010-11-11 22:00:41 UTC (rev 3937) @@ -156,12 +156,14 @@ //BTM - BEGIN - fields from AbstractFederation ------------------------------- private ScheduledFuture<?> eventTaskFuture; private ScheduledFuture<?> lbsReportingTaskFuture; + private long lbsReportingPeriod; private AbstractHTTPD httpServer; private String httpServerUrl;//URL used to access the httpServer private int httpdPort; //BTM - END - fields from AbstractFederation ------------------------------- -String dbgFlnm="EmbeddedShardService.out"; + private ScheduledFuture deferredInitTaskFuture; + private boolean deferredInitDone = false; protected EmbeddedShardService (final UUID serviceUUID, @@ -236,116 +238,45 @@ maxStaleLocatorRetries, logger, this.properties); - String httpServerPath = - (this.localResources).getServiceCounterPathPrefix(); - try { - this.httpServerUrl = - "http://" - +AbstractStatisticsCollector.fullyQualifiedHostName - +":"+this.httpdPort+"/?path=" - +URLEncoder.encode(httpServerPath, "UTF-8"); - } catch(java.io.UnsupportedEncodingException e) { - logger.warn("failed to initialize httpServerUrl", e); - } -System.out.println("\nEmbeddedShardService >>> NEW StoreManager - BEGIN"); - this.resourceMgr = - new ShardResourceManager(this, - this.discoveryMgr, - this.localResources, - this.indexMgr, - this.properties); -System.out.println("\nEmbeddedShardService >>> NEW StoreManager - END"); + this.lbsReportingPeriod = lbsReportingPeriod; - this.localTransactionMgr = new LocalTransactionManager(discoveryMgr); - this.concurrencyMgr = - new ConcurrencyManager(this.properties, - this.localTransactionMgr, - this.resourceMgr); - //WARN: circular refs - (this.resourceMgr).setConcurrencyManager(this.concurrencyMgr); - (this.indexMgr).setConcurrencyManager(this.concurrencyMgr); + // Note that this service employs a StoreManager (in the + // ResourceManager) whose creation depends on the existence + // of a transaction service. Additionally, this service + // also employs counters dependent on the existence of + // a load balancer to which the counters send events. + // Unfortunately, when the ServicesManagerService is used + // to start this service, these dependencies can cause + // problems for the ServicesManagerService. This is because + // the order the services are started by the ServicesManagerService + // can be random, and if this service is the first service + // the ServicesManagerService attempts to start (or whose + // starting is attempted before the transaction service + // and/or the load balancer), then unless this service + // returns an indication to the ServicesManagerService that + // it has successfully started within the time period + // expected, the ServicesManagerService will declare that + // this service is faulty and kill the process in which + // this service was started. To address this issue, this + // service executes an instance of DeferredInitTask to + // create the ResourceManager and set up the counters and + // events asynchronously; which allows the transaction + // service and load balancer to be started and discovered + // after this service has been started by the + // ServicesManagerService. -//BTM - from AbstractFederation constructor and addScheduledTask - - //start event queue/sender task (sends events every 2 secs) - - long sendEventsDelay = 100L;//one-time initial delay - long sendEventsPeriod = 2000L; - this.eventTaskFuture = - (localResources.getScheduledExecutor()).scheduleWithFixedDelay - (localResources.getEventQueueSender(), - sendEventsDelay, - sendEventsPeriod, - TimeUnit.MILLISECONDS); - -//BTM - from AbstractFederation - start deferred tasks - - //start task to report counters to the load balancer - - LoadBalancerReportingTask lbsReportingTask = - new LoadBalancerReportingTask(this.resourceMgr, - this.concurrencyMgr, - this.localResources, - this.discoveryMgr, - logger); - this.lbsReportingTaskFuture = - ((this.localResources).getScheduledExecutor()) + deferredInitDone = deferredInit(); + if (!deferredInitDone) { + DeferredInitTask deferredInitTask = new DeferredInitTask(); + this.deferredInitTaskFuture = + ((this.localResources).getScheduledExecutor()) .scheduleWithFixedDelay - (lbsReportingTask, - lbsReportingPeriod,//initial delay - lbsReportingPeriod, + (deferredInitTask, + 20L*1000L,//initial delay + 30L*1000L,//period TimeUnit.MILLISECONDS); - - //start an http daemon from which interested parties can query - //counter and/or statistics information with http get commands - - try { - httpServer = - new HttpReportingServer(this.httpdPort, - this.resourceMgr, - this.concurrencyMgr, - this.localResources, - logger); - } catch (IOException e) { - logger.error("failed to start http server " - +"[port="+this.httpdPort - +", path="+httpServerPath+"]", e); - return; } - if(httpServer != null) { - if( logger.isDebugEnabled() ) { - logger.debug("started http daemon " - +"[access URL="+this.httpServerUrl+"]"); - } - // add counter reporting the access url to load balancer - ((this.localResources).getServiceCounterSet()) - .addCounter - (IServiceCounters.LOCAL_HTTPD, - new OneShotInstrument<String>(this.httpServerUrl)); - } - -//BTM - BEGIN ScaleOutIndexManager -//BTM - The call to embeddedIndexStore.didStart was previously -//BTM - commented out during the data service conversion. But -//BTM - the method didStart() on the original EmbeddedIndexStore and -//BTM - AbstractFederation calls the private method setupCounters; -//BTM - which seems to be important for at least the shard (data) -//BTM - service. The tests still passed without calling that method, -//BTM - but we should consider calling it at this point (the problem -//BTM - is that it waits on the resource manager to finish -//BTM - initializing, which waits on the transaction service to be -//BTM - discovered). Setting up these counters seem to be important -//BTM - only for the shard (data) service rather than the other -//BTM - services. So we should consider adding setupCounters to this -//BTM - class, and calling it here instead of calling -//BTM - embeddedIndexStore.didStart() or AbstractFederation.didStart(). -//BTM - -//BTM embeddedIndexStore.didStart(); - - setupCounters(); - -//BTM - END ScaleOutIndexManager } // Required by Service interface @@ -394,6 +325,11 @@ logger.warn("SSSS SHARD SERVICE EmbeddedShardService.shutdown"); if (!isOpen()) return; + //false ==> allow in-progress tasks to complete + if (deferredInitTaskFuture != null) { + deferredInitTaskFuture.cancel(false); + } + if (concurrencyMgr != null) { concurrencyMgr.shutdown(); } @@ -404,9 +340,12 @@ resourceMgr.shutdown(); } - //false ==> allow in-progress tasks to complete - lbsReportingTaskFuture.cancel(false); - eventTaskFuture.cancel(false); + if (lbsReportingTaskFuture != null) { + lbsReportingTaskFuture.cancel(false); + } + if (eventTaskFuture != null) { + eventTaskFuture.cancel(false); + } if (indexMgr != null) indexMgr.destroy(); if (localResources != null) { @@ -1026,6 +965,119 @@ return new File(resourceMgr.getDataDir(), "httpd.url"); } + private boolean deferredInit() { + + // StoreManager depends on the transaction service + if (discoveryMgr.getTransactionService() == null) return false; + + if (this.resourceMgr == null) { +System.out.println("\nEmbeddedShardService >>> NEW StoreManager - BEGIN"); + this.resourceMgr = + new ShardResourceManager(this, + this.discoveryMgr, + this.localResources, + this.indexMgr, + this.properties); +System.out.println("\nEmbeddedShardService >>> NEW StoreManager - END"); + + this.localTransactionMgr = + new LocalTransactionManager(discoveryMgr); + this.concurrencyMgr = + new ConcurrencyManager(this.properties, + this.localTransactionMgr, + this.resourceMgr); + //WARN: circular refs + (this.resourceMgr).setConcurrencyManager(this.concurrencyMgr); + (this.indexMgr).setConcurrencyManager(this.concurrencyMgr); + } + + // Events and counters depend on the load balancer + if (discoveryMgr.getLoadBalancerService() == null) return false; + +//BTM - from AbstractFederation - start deferred tasks + + //start task to report counters to the load balancer + + LoadBalancerReportingTask lbsReportingTask = + new LoadBalancerReportingTask(this.resourceMgr, + this.concurrencyMgr, + this.localResources, + this.discoveryMgr, + logger); + this.lbsReportingTaskFuture = + ((this.localResources).getScheduledExecutor()) + .scheduleWithFixedDelay + (lbsReportingTask, + lbsReportingPeriod,//initial delay + lbsReportingPeriod, + TimeUnit.MILLISECONDS); + + //start an http daemon from which interested parties can query + //counter and/or statistics information with http get commands + + String httpServerPath = + (this.localResources).getServiceCounterPathPrefix(); + try { + this.httpServerUrl = + "http://" + +AbstractStatisticsCollector.fullyQualifiedHostName + +":"+this.httpdPort+"/?path=" + +URLEncoder.encode(httpServerPath, "UTF-8"); + } catch(java.io.UnsupportedEncodingException e) { + logger.warn("failed to initialize httpServerUrl", e); + } + + + try { + httpServer = + new HttpReportingServer(this.httpdPort, + this.resourceMgr, + this.concurrencyMgr, + this.localResources, + logger); + } catch (IOException e) { + logger.error("failed to start http server " + +"[port="+this.httpdPort + +", path="+httpServerPath+"]", e); + return false; + } + if(httpServer != null) { + if( logger.isDebugEnabled() ) { + logger.debug("started http daemon " + +"[access URL="+this.httpServerUrl+"]"); + } + // add counter reporting the access url to load balancer + ((this.localResources).getServiceCounterSet()) + .addCounter + (IServiceCounters.LOCAL_HTTPD, + new OneShotInstrument<String>(this.httpServerUrl)); + } + +//BTM - BEGIN ScaleOutIndexManager Note +//BTM - The call to embeddedIndexStore.didStart was previously +//BTM - commented out during the data service conversion. But +//BTM - the method didStart() on the original EmbeddedIndexStore and +//BTM - AbstractFederation calls the private method setupCounters; +//BTM - which seems to be important for at least the shard (data) +//BTM - service. The tests still passed without calling that method, +//BTM - but we should consider calling it at this point (the problem +//BTM - is that it waits on the resource manager to finish +//BTM - initializing, which waits on the transaction service to be +//BTM - discovered). Setting up these counters seem to be important +//BTM - only for the shard (data) service rather than the other +//BTM - services. So we should consider adding setupCounters to this +//BTM - class, and calling it here instead of calling +//BTM - embeddedIndexStore.didStart() or AbstractFederation.didStart(). +//BTM - +//BTM embeddedIndexStore.didStart(); + + setupCounters(); + +//BTM - END ScaleOutIndexManager Note + + return true; + } + private void setupLoggingContext() { try { @@ -1120,8 +1172,37 @@ } } -//BTM - see the note at the end of this class' constructor + class DeferredInitTask implements Runnable { + public DeferredInitTask() { } + + public void run() { + try { + if (!deferredInitDone) { +System.out.println("\n*** EmbededShardService#DeferredInitTask: DO DEFERRED INIT \n"); + deferredInitDone = deferredInit(); + } else { +System.out.println("\n*** EmbededShardService#DeferredInitTask: DEFERRED INIT DONE >>> CANCELLING TASK\n"); + deferredInitDone = true; + if (deferredInitTaskFuture != null) { + deferredInitTaskFuture.cancel(false); + deferredInitTaskFuture = null; + } + } + } catch (Throwable t) { +System.out.println("\n*** EmbededShardService#DeferredInitTask: EXCEPTION >>> "+t+"\n"); + logger.error("deferred initialization failure", t); + deferredInitDone = true; + if (deferredInitTaskFuture != null) { + deferredInitTaskFuture.cancel(false); + deferredInitTaskFuture = null; + } + } + } + } + +//BTM - see the note at the end of the deferredInit method + /** * Sets up shard service specific counters. * <p> Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/logging.properties 2010-11-11 22:00:41 UTC (rev 3937) @@ -37,5 +37,4 @@ #log4j.logger.com.bigdata.shard=DEBUG #log4j.logger.com.bigdata.shard.EmbeddedShardService=DEBUG -#log4j.logger.com.bigdata.journal.EmbeddedIndexStore=DEBUG Modified: branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config =================================================================== --- branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/java/com/bigdata/shard/config/shard.config 2010-11-11 22:00:41 UTC (rev 3937) @@ -11,6 +11,10 @@ import net.jini.core.discovery.LookupLocator; import net.jini.discovery.LookupDiscoveryManager; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; + import com.bigdata.util.config.NicUtil; import com.bigdata.util.config.ConfigDeployUtil; @@ -89,3 +93,15 @@ (com.bigdata.shard.serverExporterTcpServerEndpoint, com.bigdata.shard.serverILFactory, false, false); } + +//NOTE: remove once dynamic discovery of zookeeper is added +org.apache.zookeeper.ZooKeeper { + + zroot = ConfigDeployUtil.getString("federation.name"); + + servers = com.bigdata.shard.serverExporterIpAddr+":2888:3888"; + + acl = new ACL[] { + new ACL(ZooDefs.Perms.ALL, new Id("world", "anyone")) + }; +} Modified: branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/master/TestMappedRDFDataLoadMaster.config =================================================================== --- branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/master/TestMappedRDFDataLoadMaster.config 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/bigdata-jini/src/test/com/bigdata/service/jini/master/TestMappedRDFDataLoadMaster.config 2010-11-11 22:00:41 UTC (rev 3937) @@ -137,7 +137,7 @@ /* Template for matching the services to which the clients will be * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is + * CallableExecutor as the interface to be discovered. While it is * possible to run tasks on an IDataService or even an * IMetadataService since they both implement IRemoteExecutor, it * is generally discouraged unless the tasks require explicit @@ -148,7 +148,8 @@ new ServiceTemplate( null, //serviceID new Class[]{ - com.bigdata.service.IClientService.class +//BTM com.bigdata.service.IClientService.class + com.bigdata.service.CallableExecutor.class }, null // attributes ), Modified: branches/dev-btm/src/resources/config/bigdataCluster.config =================================================================== --- branches/dev-btm/src/resources/config/bigdataCluster.config 2010-11-11 15:02:03 UTC (rev 3936) +++ branches/dev-btm/src/resources/config/bigdataCluster.config 2010-11-11 22:00:41 UTC (rev 3937) @@ -630,14 +630,15 @@ "org.apache.zookeeper.server.quorum.QuorumPeerMain", //BTM "com.bigdata.service.jini.TransactionServer", //BTM "com.bigdata.service.jini.MetadataServer", - "com.bigdata.service.jini.DataServer", +//BTM "com.bigdata.service.jini.DataServer", //BTM "com.bigdata.service.jini.LoadBalancerServer", - "com.bigdata.service.jini.ClientServer", +//BTM "com.bigdata.service.jini.ClientServer", "com.bigdata.transaction.ServiceImpl", "com.bigdata.metadata.ServiceImpl", -//BTM "com.bigdata.shard.ServiceImpl", -"com.bigdata.loadbalancer.ServiceImpl" +"com.bigdata.shard.ServiceImpl", +"com.bigdata.loadbalancer.ServiceImpl", +"com.bigdata.executor.ServiceImpl" }; @@ -1340,6 +1341,36 @@ // log4j = "file:@NAS@/dist/bigdata/var/config/logging/loadbalancer-logging.properties"; } + +com.bigdata.executor.ServiceImpl { + + constraints = new IServiceConstraint[] { + new JiniRunningConstraint(), + new ZookeeperRunningConstraint(), + new HostAllowConstraint(bigdata.cs), + new MaxClientServicesPerHostConstraint(bigdata.maxClientServicePerHost) + }; + + args = new String[]{ + "-Xmx1600m", // was 800m + //"-XX:-UseGCOverheadLimit",//keeps VM alive even when memory starved + "-XX:+UseParallelOldGC", + //"-XX:ParallelGCThreads=8", + + "-Djava.util.logging.config.file=@NAS@/dist/bigdata/var/config/logging/executor-logging.properties", + "-Dlog4j.configuration=@NAS@/dist/bigdata/var/config/logging/executor-logging.properties", + "-Dlog4j.primary.configuration=@NAS@/dist/bigdata/var/config/logging/executor-logging.properties", + "-DusingServiceConfiguration=true", + "-Dbigdata.logDir=@NAS@/dist/bigdata/var/log", + "-DappHome=@APP_HOME@", + "-Dconfig=@NAS@/dist/bigdata/var/config/jini/executor.config" + }; + + serviceCount = bigdata.clientServiceCount; + + properties = new NV[] { + }; +} //BTM - END /** @@ -1959,7 +1990,7 @@ /* Template for matching the services to which the clients will be * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is + * CallableExecutor as the interface to be discovered. While it is * possible to run tasks on a shard service or even a shard * locator service since they both implement ShardManagement, it * is generally discouraged unless the tasks require explicit @@ -1970,7 +2001,8 @@ new ServiceTemplate( null, //serviceID new Class[]{ - com.bigdata.service.IClientService.class +//BTM com.bigdata.service.IClientService.class + com.bigdata.service.CallableExecutor.class }, null // attributes ), @@ -2065,7 +2097,7 @@ /* Template for matching the services to which the clients will be * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is + * CallableExecutor as the interface to be discovered. While it is * possible to run tasks on a shard service or even a shard * locator service since they both implement ShardManagement, it * is generally discouraged unless the tasks require explicit @@ -2076,7 +2108,8 @@ new ServiceTemplate( null, //serviceID new Class[]{ - com.bigdata.service.IClientService.class +//BTM com.bigdata.service.IClientService.class + com.bigdata.service.CallableExecutor.class }, null // attributes ), @@ -2261,7 +2294,7 @@ /* Template for matching the services to which the clients will be * distributed for execution. Normally you will specify - * IClientService as the interface to be discovered. While it is + * CallableExecutor as the interface to be discovered. While it is * possible to run tasks on a shard service or even a shard * locator service since they both implement ShardManagement, it * is generally discouraged unless the tasks require explicit @@ -2272,7 +2305,8 @@ new ServiceTemplate( null, //serviceID new Class[]{ - com.bigdata.service.IClientService.class +//BTM com.bigdata.service.IClientService.class + com.bigdata.service.CallableExecutor.class }, null), // attributes null // filter This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-11 15:02:10
|
Revision: 3936 http://bigdata.svn.sourceforge.net/bigdata/?rev=3936&view=rev Author: martyncutcher Date: 2010-11-11 15:02:03 +0000 (Thu, 11 Nov 2010) Log Message: ----------- add reserved bytes to header and check for extent in establishExtent Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 14:39:43 UTC (rev 3935) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 15:02:03 UTC (rev 3936) @@ -762,6 +762,12 @@ cDefaultMetaBitsSize = strBuf.readInt(); final int allocBlocks = strBuf.readInt(); + strBuf.readInt(); // reserved5 + strBuf.readInt(); // reserved6 + strBuf.readInt(); // reserved7 + strBuf.readInt(); // reserved8 + strBuf.readInt(); // reserved9 + m_allocSizes = new int[allocBlocks]; for (int i = 0; i < allocBlocks; i++) { m_allocSizes[i] = strBuf.readInt(); @@ -1763,9 +1769,15 @@ try { str.writeInt(cVersion); str.writeLong(m_lastDeferredReleaseTime); - str.writeInt(cDefaultMetaBitsSize); + str.writeInt(cDefaultMetaBitsSize); + str.writeInt(m_allocSizes.length); - str.writeInt(m_allocSizes.length); + str.writeInt(0); // reserved5 + str.writeInt(0); // reserved6 + str.writeInt(0); // reserved7 + str.writeInt(0); // reserved8 + str.writeInt(0); // reserved9 + for (int i = 0; i < m_allocSizes.length; i++) { str.writeInt(m_allocSizes[i]); } @@ -1973,16 +1985,21 @@ * 0x0200 == 2.00 * 0x0320 == 3.20 */ - final private int cVersion = 0x0200; + final private int cVersion = 0x0300; /** * MetaBits Header - * int version - * long deferredFree - * int defaultMetaBitsSize - * int length of allocation sizes + * 0 int version + * 1-2 int[2] long deferredFree + * 3 int defaultMetaBitsSize + * 4 int length of allocation sizes + * 5 int reserved + * 6 int reserved + * 7 int reserved + * 8 int reserved + * 9 int reserved */ - final private int cMetaHdrFields = 5; + final private int cMetaHdrFields = 10; /** * @see Options#META_BITS_SIZE */ @@ -2965,10 +2982,12 @@ final long currentExtent = convertAddr(m_fileSize); - if (extent != currentExtent) { + if (extent > currentExtent) { extendFile(convertFromAddr(extent - currentExtent)); + } else if (extent < currentExtent) { + throw new IllegalArgumentException("Cannot shrink RWStore extent"); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 14:39:50
|
Revision: 3935 http://bigdata.svn.sourceforge.net/bigdata/?rev=3935&view=rev Author: thompsonbry Date: 2010-11-11 14:39:43 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Removed three files which were not in use (Config, ICommitCallback, DirectOutputStream). Removed several methods from IStore which were not in use. Updated RWStore to remove the concept of a persistent session and updated FixedAllocator to always act without reference to the concept of a persistent session. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Removed Paths: ------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Config.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/DirectOutputStream.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/ICommitCallback.java Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Config.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Config.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/Config.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -1,41 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rwstore; - -/************************************************************************************************ - * This is a bit of a cludge, but somehow we need to let the stores know whether they need - * a locak file in this context - typically one is not needed in a web-application context. - **/ -public final class Config { - static boolean m_lockFileNeeded = true; - - public static boolean isLockFileNeeded() { - return m_lockFileNeeded; - } - - public static void setLockFileNeeded(boolean isNeeded) { - m_lockFileNeeded = isNeeded; - } -} Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/DirectOutputStream.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/DirectOutputStream.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/DirectOutputStream.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -1,57 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rwstore; - -import java.io.*; - -public class DirectOutputStream extends ByteArrayOutputStream { - - public DirectOutputStream(int size) { - super(size); - } - - public DirectOutputStream() { - } - - public void directWrite(RandomAccessFile file, int size) throws IOException { - file.write(buf, 0, size); - } - - public void directWrite(RandomAccessFile outfile) throws java.io.IOException { - outfile.write(buf, 0, size()); - } - - //------------------------------------------------------------- - - public void directWrite(java.io.OutputStream outstr) throws java.io.IOException { - outstr.write(buf, 0, size()); - } - - //------------------------------------------------------------- - - public void directWrite(java.io.OutputStream outstr, int outSize) throws java.io.IOException { - outstr.write(buf, 0, outSize); - } -} \ No newline at end of file Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -175,9 +175,9 @@ str.writeInt(block.m_bits[i]); } - if (!m_store.isSessionPreserved()) { +// if (!m_store.isSessionPreserved()) { block.m_transients = block.m_bits.clone(); - } +// } /** * If this allocator is shadowed then copy the new committed @@ -187,8 +187,8 @@ assert block.m_saveCommit != null; block.m_saveCommit = block.m_bits.clone(); - } else if (m_store.isSessionPreserved()) { - block.m_commit = block.m_transients.clone(); +// } else if (m_store.isSessionPreserved()) { +// block.m_commit = block.m_transients.clone(); } else { block.m_commit = block.m_bits.clone(); } @@ -201,19 +201,19 @@ str.close(); } - if (!m_store.isSessionPreserved()) { - m_freeBits += m_freeTransients; +// if (!m_store.isSessionPreserved()) { + m_freeBits += m_freeTransients; - // Handle re-addition to free list once transient frees are - // added back - if ((m_freeTransients == m_freeBits) && (m_freeTransients != 0)) { - m_freeList.add(this); - m_freeWaiting = false; - } - - m_freeTransients = 0; + // Handle re-addition to free list once transient frees are + // added back + if ((m_freeTransients == m_freeBits) && (m_freeTransients != 0)) { + m_freeList.add(this); + m_freeWaiting = false; } + m_freeTransients = 0; +// } + return buf; } catch (IOException e) { throw new StorageTerminalError("Error on write", e); Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/ICommitCallback.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/ICommitCallback.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/ICommitCallback.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -1,29 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rwstore; - -public interface ICommitCallback { - public void CommitCallback(); -} Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/IStore.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -26,258 +26,109 @@ import java.io.File; - -/************************************************************************************************ - * The IStore interface provides persistent file-backed storage. - * It can be used as a standalone utility, but has been primarily designed - * to support the Generic Persistent Object model. - **/ +/** + * The IStore interface provides persistent file-backed storage. It can be used + * as a standalone utility, but has been primarily designed to support the + * Generic Persistent Object model. + */ public interface IStore { - -// /********************************************************************* -// * Provides a link to an object to carryout any additional data updates -// * before the physical commit - used by the GPO object managers for example -// **/ -// public static interface ICommitCallback { -// public void commitCallback(); -// public void commitComplete(); -// } - public boolean isLongAddress(); - // /************************************************************** -// * Registers a commitCallback object. -// * -// * <p>This method may be called more than once, there maybe several -// * such objects.</p> -// * -// * <p>It is used by the GPO object managers to allow them to store -// * index information and other updated data after a commit -// * cycle.</p> +// * called when used as a server, returns whether facility is enabled // **/ -// public void setCommitCallback(ICommitCallback callback); - - /************************************************************** - * called when used as a server, returns whether facility is enabled - **/ - public boolean preserveSessionData(); +// public boolean preserveSessionData(); -// /************************************************************** -// * the filestore may be explicitly limited -// * - useful when testing, it is all too easy to fill a disk -// * -// * <p>the default is 1GB</p> -// * -// * @param size the new max filesize >> 8 -// **/ -// public void setMaxFileSize(int size); - - /************************************************************** - * the lowest level interface should normally not be used directly. + /** + * Writes data on the store. * * @return the allocated address **/ public long alloc(byte buf[], int size, IAllocationContext context); - - /************************************************************** - * frees allocated storage - * - * @param addr the storage address to be freed - **/ + + /** + * Frees allocated storage + * + * @param addr + * the storage address to be freed + */ public void free(long addr, int size); // /************************************************************** -// * Reallocates storage +// * Odd method needed by PSInputStream to fetch data of unknown +// * size into a buffer // * -// * @param oldAddr is the existing address to be freed -// * @return a stream to write to the store +// * <p>Both RWStore and WOStore store data in either explicit or +// * implicit block sizes.</p> +// * +// * @param addr the address of the data in the IStore +// * buf the buffer to store the data in +// * +// * @returns the size of the data copied // **/ -// public PSOutputStream realloc(long oldAddr, int size); -// -// public PSInputStream getData(long value); +// public int getDataSize(long addr, byte buf[]); - /************************************************************** - * Odd method needed by PSInputStream to fetch data of unknown - * size into a buffer - * - * <p>Both RWStore and WOStore store data in either explicit or - * implicit block sizes.</p> - * - * @param addr the address of the data in the IStore - * buf the buffer to store the data in - * - * &returns the size of the data copied - **/ - public int getDataSize(long addr, byte buf[]); - + /** + * Read data of a known size from the store. + * + * @param l + * the address of the data + * @param buf + * the buffer of the size required! + */ + public void getData(long l, byte buf[]); + // /************************************************************** -// * if the caller can be sure of the size, then a more efficient allocation can be made, -// * but the corresponding getData call must also be made with an explicit size. +// * Given a physical address (byte offset on the store), return true +// * if that address could be managed by an allocated block. // * -// * <p>this should not generally be used - but specific objects can exploit this -// * interface for storing special purpose fixed size structures.</p> -// * -// * <p>Note that the Write Once Store will not automatically preserve historical -// * address information if explicit buffers are used.</p> +// * @param a the storage address to be tested // **/ -// public long realloc(long oldaddr, int oldsze, byte buf[]); - - /************************************************************** - * Used to retrieve data of a known size, typically after having - * been allocated using fixed size reallocation. - * - * @param l the address of the data - * @param buf the buffer of the size required! - **/ - public void getData(long l, byte buf[]); - - /************************************************************** - * a debug method that verifies a storage address as active - * - * @param a the storage address to be tested - **/ - public boolean verify(long a); +// public boolean verify(long a); - /*************************************************************************************** - * this supports the core functionality of a WormStore, other stores should return - * zero, indicating no previous versions available - **/ - public long getPreviousAddress(long addr); - - /*************************************************************************************** - * @return whether the address given is a native IStore address - **/ - public boolean isNativeAddress(long value); - -// /*************************************************************************************** -// * the root address enables the store to be self contained! -// * Along with the allocation information to manage the data, the store by default -// * can store and provide a root address to data needed to initialize the system. -// * -// * @param addr the address to be stored as "root" -// **/ -// public void setRootAddr(long addr); -// -// /*************************************************************************************** -// * @return the root address previously set -// **/ -// public long getRootAddr(); +// /** +// * The {@link RWStore} always generates negative address values. +// * +// * @return whether the address given is a native IStore address +// */ +// public boolean isNativeAddress(long value); -// /*************************************************************************************** -// * A utility equivalent to : store.getData(store.getRootAddr()); +// /** +// * useful in debug situations // * -// * @return an InputStream for any data stored at the root address -// **/ -// public PSInputStream getRoot(); +// * @return store allocation and usage statistics +// */ +// public String getStats(boolean full); -// /*************************************************************************************** -// * clears all data from the store. -// **/ -// public void clear(); - - -// /*************************************************************************************** -// * increments the current nested transaction level -// **/ -// public void startTransaction(); -// -// /*************************************************************************************** -// * decrements the current nested transaction level, if the value is reduced to zero then -// * a physical commit is carried out, if the level is already zero, a runtime exception -// * is thrown. -// **/ -// public void commitTransaction(); -// -// /*************************************************************************************** -// * if the transaction level is greater than one, all modifcations are undone, and the -// * transaction level set to zero. -// **/ -// public void rollbackTransaction(); - - /*************************************************************************************** - * does what it says - **/ - public String getVersionString(); - - /*************************************************************************************** - * useful in debug situations - * - * @return store allocation and usage statistics - **/ - public String getStats(boolean full); - + /** + * Close the file. + */ public void close(); - /*************************************************************************************** - * Needed by PSOutputStream for BLOB buffer chaining. - **/ - public int bufferChainOffset(); - -// public void absoluteWriteLong(long addr, int threshold, long value); -// -// /*************************************************************************************** +// /** // * Needed by PSOutputStream for BLOB buffer chaining. -// **/ -// public void absoluteWriteInt(int addr, int offset, int value); -// -// /*************************************************************************************** -// * Needed to free Blob chains. -// **/ -// public int absoluteReadInt(int addr, int offset); -// -// /*************************************************************************************** -// * Needed to free Blob chains. -// **/ -// public int absoluteReadLong(long addr, int offset); - -// /*************************************************************************************** -// * copies the store to a new file, this is not necessarily a byte for byte copy -// * since the store could write only consolidated data - particulalry relevant for the -// * Write Once store. -// * -// * @param filename specifies the file to be copied to. -// **/ -// public void backup(String filename) throws FileNotFoundException, IOException; -// -// /*************************************************************************************** -// * copies the store to a new file, this is not necessarily a byte for byte copy -// * since the store could write only consolidated data - particulalry relevant for the -// * Write Once store. -// * -// * @param outstr specifies stream to be copied to. -// **/ -// public void backup(OutputStream outstr) throws IOException; -// -// /*************************************************************************************** -// * useful in deployed web services to be able to restore a previously backed-up -// * store. Can also be useful to copy databases, for example, when running -// * a test system that can be simply restored to a backup extracted from a live system. -// * -// * @param instr specifies stream to be restored from. -// **/ -// public void restore(InputStream instr) throws IOException; +// */ +// public int bufferChainOffset(); - /********************************************************************************************* - * Retrieves store file. - * Can be used to delete the store after the IStore has been released - * @return the File object - **/ + /** + * Retrieves store file. Can be used to delete the store after the IStore + * has been released + * + * @return the File object + */ public File getStoreFile(); -// public void absoluteWriteAddress(long addr, int threshold, long addr2); - - public int getAddressSize(); - /** - * Called by the PSOutputStream to register the header bloc of a blob. The store - * must return a new address that is used to retrieve the blob header. This double - * indirection is required to be able to manage the blobs, since the blob header - * itself is of variable size and is handled by the standard FixedAllocators in the - * RWStore. For a WORM implementation the address of the blob header can be returned - * directly + * Called by the PSOutputStream to register the header block of a blob. The + * store must return a new address that is used to retrieve the blob header. + * This double indirection is required to be able to manage the blobs, since + * the blob header itself is of variable size and is handled by the standard + * FixedAllocators in the RWStore. * * @param addr - * @return + * The address of the header block of the blob. + * + * @return The */ public int registerBlob(int addr); + } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 14:07:22 UTC (rev 3934) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 14:39:43 UTC (rev 3935) @@ -36,10 +36,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -300,24 +298,20 @@ // protected int m_transactionCount; // private boolean m_committing; - /** - * When <code>true</code> the allocations will not actually be recycled - * until after a store restart. When <code>false</code>, the allocations are - * recycled once they satisfy the history retention requirement. - * - * FIXME Should this go away or be raised as an option for unlimited - * retention until restart? - */ - private boolean m_preserveSession = false; +// /** +// * When <code>true</code> the allocations will not actually be recycled +// * until after a store restart. When <code>false</code>, the allocations are +// * recycled once they satisfy the history retention requirement. +// */ +// private boolean m_preserveSession = false; // private boolean m_readOnly; - /** - * lists of total alloc blocks. - * - * @todo examine concurrency and lock usage for {@link #m_alloc}, which is - * used by {@link #getStats(boolean)}, and the rest of these lists as - * well. - */ + /** + * lists of total alloc blocks. + * + * @todo examine concurrency and lock usage for {@link #m_alloc} and the + * rest of these lists. + */ private final ArrayList<Allocator> m_allocs; /** lists of free alloc blocks. */ @@ -1295,46 +1289,46 @@ return out.toString(); } - /** - * FIXME: This method is not currently used with BigData, if needed then - * the address mangling needs re-working - */ - public int getDataSize(long addr, byte buf[]) { - throw new UnsupportedOperationException(); - -// synchronized (this) { -// m_writes.flush(); -// -// if (addr == 0) { -// return 0; -// } -// -// try { -// int size = addr2Size((int) addr); -// synchronized (m_raf) { -//// m_raf.seek(physicalAddress((int) addr)); -//// m_raf.readFully(buf, 0, size); -// m_raf.getChannel().read(ByteBuffer.wrap(buf, 0, size), physicalAddress((int) addr)); -// } -// -// return size; -// } catch (IOException e) { -// throw new StorageTerminalError("Unable to read data", e); -// } -// } - } +// /** +// * FIXME: This method is not currently used with BigData, if needed then +// * the address mangling needs re-working +// */ +// public int getDataSize(long addr, byte buf[]) { +// throw new UnsupportedOperationException(); +// +//// synchronized (this) { +//// m_writes.flush(); +//// +//// if (addr == 0) { +//// return 0; +//// } +//// +//// try { +//// int size = addr2Size((int) addr); +//// synchronized (m_raf) { +////// m_raf.seek(physicalAddress((int) addr)); +////// m_raf.readFully(buf, 0, size); +//// m_raf.getChannel().read(ByteBuffer.wrap(buf, 0, size), physicalAddress((int) addr)); +//// } +//// +//// return size; +//// } catch (IOException e) { +//// throw new StorageTerminalError("Unable to read data", e); +//// } +//// } +// } - /** - * Always returns ZERO (0L). - * <p> - * This is intended to support the core functionality of a WormStore, other - * stores should return zero, indicating no previous versions available - */ - public long getPreviousAddress(final long laddr) { - - return 0; - - } +// /** +// * Always returns ZERO (0L). +// * <p> +// * This is intended to support the core functionality of a WormStore, other +// * stores should return zero, indicating no previous versions available +// */ +// public long getPreviousAddress(final long laddr) { +// +// return 0; +// +// } public void free(final long laddr, final int sze) { @@ -1795,12 +1789,12 @@ } } - static final float s_version = 3.0f; +// static final float s_version = 3.0f; +// +// public String getVersionString() { +// return "RWStore " + s_version; +// } - public String getVersionString() { - return "RWStore " + s_version; - } - public void commitChanges(final Journal journal) { assertOpen(); checkCoreAllocations(); @@ -2361,35 +2355,35 @@ return -1; } - // -------------------------------------------------------------------------------------- - private String allocListStats(final List<Allocator> list, final AtomicLong counter) { - final StringBuffer stats = new StringBuffer(); - final Iterator<Allocator> iter = list.iterator(); - while (iter.hasNext()) { - stats.append(iter.next().getStats(counter)); - } - - return stats.toString(); - } - - public String getStats(final boolean full) { - - final AtomicLong counter = new AtomicLong(); - - final StringBuilder sb = new StringBuilder("FileSize : " + m_fileSize - + " allocated : " + m_nextAllocation + "\r\n"); - - if (full) { - - sb.append(allocListStats(m_allocs, counter)); - - sb.append("Allocated : " + counter); - - } - - return sb.toString(); - - } +// // -------------------------------------------------------------------------------------- +// private String allocListStats(final List<Allocator> list, final AtomicLong counter) { +// final StringBuffer stats = new StringBuffer(); +// final Iterator<Allocator> iter = list.iterator(); +// while (iter.hasNext()) { +// stats.append(iter.next().getStats(counter)); +// } +// +// return stats.toString(); +// } +// +// public String getStats(final boolean full) { +// +// final AtomicLong counter = new AtomicLong(); +// +// final StringBuilder sb = new StringBuilder("FileSize : " + m_fileSize +// + " allocated : " + m_nextAllocation + "\r\n"); +// +// if (full) { +// +// sb.append(allocListStats(m_allocs, counter)); +// +// sb.append("Allocated : " + counter); +// +// } +// +// return sb.toString(); +// +// } public static class AllocationStats { public AllocationStats(final int i) { @@ -2510,6 +2504,13 @@ // -------------------------------------------------------------------------------------- + /** + * Given a physical address (byte offset on the store), return true if that + * address could be managed by an allocated block. + * + * @param a + * the storage address to be tested. + */ public boolean verify(final long laddr) { final int addr = (int) laddr; @@ -2594,28 +2595,33 @@ // } // } + /** + * The {@link RWStore} always generates negative address values. + * + * @return whether the address given is a native IStore address + */ public boolean isNativeAddress(final long addr) { return addr <= 0; } - /******************************************************************************* - * called when used as a server, returns whether facility is enabled, this - * is the whole point of the wormStore - so the answer is true - **/ - public boolean preserveSessionData() { - m_preserveSession = true; +// /******************************************************************************* +// * called when used as a server, returns whether facility is enabled, this +// * is the whole point of the wormStore - so the answer is true +// **/ +// public boolean preserveSessionData() { +// m_preserveSession = true; +// +// return true; +// } +// +// /******************************************************************************* +// * called by allocation blocks to determine whether they can re-allocate +// * data within this session. +// **/ +// protected boolean isSessionPreserved() { +// return m_preserveSession || m_contexts.size() > 0; +// } - return true; - } - - /******************************************************************************* - * called by allocation blocks to determine whether they can re-allocate - * data within this session. - **/ - protected boolean isSessionPreserved() { - return m_preserveSession || m_contexts.size() > 0; - } - // /********************************************************************* // * create backup file, copy data to it, and close it. // **/ @@ -2702,21 +2708,21 @@ // } // } - /*************************************************************************************** - * Needed by PSOutputStream for BLOB buffer chaining. - **/ - public int bufferChainOffset() { - return m_maxFixedAlloc - 4; - } +// /*************************************************************************************** +// * Needed by PSOutputStream for BLOB buffer chaining. +// **/ +// public int bufferChainOffset() { +// return m_maxFixedAlloc - 4; +// } public File getStoreFile() { return m_fd; } - public boolean isLongAddress() { - // always ints - return false; - } +// public boolean isLongAddress() { +// // always ints +// return false; +// } // public int absoluteReadLong(long addr, int offset) { // throw new UnsupportedOperationException(); @@ -2730,9 +2736,9 @@ // absoluteWriteInt((int) addr, threshold, (int) addr2); // } - public int getAddressSize() { - return 4; - } +// public int getAddressSize() { +// return 4; +// } // public RandomAccessFile getRandomAccessFile() { // return m_raf; @@ -2838,7 +2844,7 @@ /** * A Blob Allocator maintains a list of Blob headers. The allocator stores - * upto 255 blob headers plus a checksum. When a request is made to read the + * up to 255 blob headers plus a checksum. When a request is made to read the * blob data, the blob allocator retrieves the blob header and reads the * data from that into the passed byte array. */ @@ -2851,8 +2857,8 @@ } if (ba == null) { final Allocator lalloc = (Allocator) m_allocs.get(m_allocs.size() - 1); - final int psa = lalloc.getRawStartAddr(); // previous block - // start address + // previous block start address + final int psa = lalloc.getRawStartAddr(); assert (psa - 1) > m_nextAllocation; ba = new BlobAllocator(this, psa - 1); ba.setFreeList(m_freeBlobs); // will add itself to the free list This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 14:07:29
|
Revision: 3934 http://bigdata.svn.sourceforge.net/bigdata/?rev=3934&view=rev Author: thompsonbry Date: 2010-11-11 14:07:22 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Made a few things in AllocBlock private or final. Removed the WriteCacheServiceReference from AllocBlock and FixedAllocator as it was not used. Updated RWStore to reflect the change to the FixedAllocator ctor. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-11-11 13:46:38 UTC (rev 3933) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-11-11 14:07:22 UTC (rev 3934) @@ -26,7 +26,6 @@ import java.util.ArrayList; -import com.bigdata.io.writecache.WriteCacheService; import com.bigdata.rwstore.RWStore.AllocationStats; /** @@ -37,6 +36,10 @@ * to use {@link System#arraycopy(Object, int, Object, int, int)} to copy * the data rather than cloning it. * + * @todo Review the locks held during reads against {@link AllocBlock}. Is it + * possible that we could have updates which are not being made visible to + * readers? + * * @todo change to use long[]s. */ public class AllocBlock { @@ -67,20 +70,20 @@ * Just the newly allocated bits. This will be copied onto {@link #m_commit} * when the current native transaction commits. */ - int m_bits[]; + final int m_bits[]; /** * All of the bits from the commit point on entry to the current native * transaction plus any newly allocated bits. */ int m_transients[]; - /** - * Used to clear an address on the {@link WriteCacheService} if it has been - * freed. - */ - private final RWWriteCacheService m_writeCache; +// /** +// * Used to clear an address on the {@link WriteCacheService} if it has been +// * freed. +// */ +// private final RWWriteCacheService m_writeCache; - AllocBlock(final int addrIsUnused, final int bitSize, final RWWriteCacheService cache) { - m_writeCache = cache; + AllocBlock(final int addrIsUnused, final int bitSize) {//, final RWWriteCacheService cache) { +// m_writeCache = cache; m_ints = bitSize; m_commit = new int[bitSize]; m_bits = new int[bitSize]; @@ -116,16 +119,16 @@ if (!RWStore.tstBit(m_bits, bit)) { throw new IllegalArgumentException("Freeing bit not set"); } - - // Allocation optimization - if bit NOT set in committed memory then - // clear - // the transient bit to permit reallocation within this transaction. - // - // Note that with buffered IO there is also an opportunity to avoid - // output to - // the file by removing any pending write to the now freed address. On - // large - // transaction scopes this may be significant. + + /* + * Allocation optimization - if bit NOT set in committed memory then + * clear the transient bit to permit reallocation within this + * transaction. + * + * Note that with buffered IO there is also an opportunity to avoid + * output to the file by removing any pending write to the now freed + * address. On large transaction scopes this may be significant. + */ RWStore.clrBit(m_bits, bit); if (!RWStore.tstBit(m_commit, bit)) { @@ -190,7 +193,7 @@ return allocBits; } - public String getStats(AllocationStats stats) { + public String getStats(final AllocationStats stats) { final int total = m_ints * 32; final int allocBits = getAllocBits(); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-11 13:46:38 UTC (rev 3933) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-11-11 14:07:22 UTC (rev 3934) @@ -42,7 +42,7 @@ private static final Logger log = Logger.getLogger(FixedAllocator.class); - final private RWWriteCacheService m_writeCache; +// final private RWWriteCacheService m_writeCache; volatile private int m_freeBits; volatile private int m_freeTransients; @@ -298,7 +298,7 @@ * @param preserveSessionData * @param cache */ - FixedAllocator(final RWStore store, final int size, final RWWriteCacheService cache) { + FixedAllocator(final RWStore store, final int size) {//, final RWWriteCacheService cache) { m_diskAddr = 0; m_store = store; @@ -323,7 +323,7 @@ m_bitSize = 32; } - m_writeCache = cache; +// m_writeCache = cache; // number of blocks in this allocator, bitSize plus 1 for start address final int numBlocks = 255 / (m_bitSize + 1); @@ -335,7 +335,7 @@ */ m_allocBlocks = new ArrayList<AllocBlock>(numBlocks); for (int i = 0; i < numBlocks; i++) { - m_allocBlocks.add(new AllocBlock(0, m_bitSize, m_writeCache)); + m_allocBlocks.add(new AllocBlock(0, m_bitSize));//, cache)); } m_freeTransients = 0; Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 13:46:38 UTC (rev 3933) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 14:07:22 UTC (rev 3934) @@ -915,7 +915,7 @@ while (fixedSize < allocSize) fixedSize = 64 * m_allocSizes[++index]; - allocator = new FixedAllocator(this, allocSize, m_writeCache); + allocator = new FixedAllocator(this, allocSize);//, m_writeCache); freeList = m_freeFixed[index]; } else { @@ -969,7 +969,7 @@ final int allocSize = 64 * m_allocSizes[block]; final FixedAllocator allocator = new FixedAllocator(this, - allocSize, m_writeCache); + allocSize);//, m_writeCache); allocator.setIndex(m_allocs.size()); @@ -1480,7 +1480,7 @@ final ArrayList<FixedAllocator> list = m_freeFixed[i]; if (list.size() == 0) { - allocator = new FixedAllocator(this, block, m_writeCache); + allocator = new FixedAllocator(this, block);//, m_writeCache); allocator.setFreeList(list); allocator.setIndex(m_allocs.size()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 13:46:44
|
Revision: 3933 http://bigdata.svn.sourceforge.net/bigdata/?rev=3933&view=rev Author: thompsonbry Date: 2010-11-11 13:46:38 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Removed unused import. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWWriteCacheService.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWWriteCacheService.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWWriteCacheService.java 2010-11-11 13:46:20 UTC (rev 3932) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWWriteCacheService.java 2010-11-11 13:46:38 UTC (rev 3933) @@ -32,7 +32,6 @@ import org.apache.log4j.Logger; import com.bigdata.io.IReopenChannel; -import com.bigdata.io.writecache.BufferedWrite; import com.bigdata.io.writecache.WriteCache; import com.bigdata.io.writecache.WriteCacheService; import com.bigdata.io.writecache.WriteCache.FileChannelScatteredWriteCache; @@ -40,8 +39,8 @@ /** * Defines the WriteCacheService to be used by the RWStore. + * * @author mgc - * */ public class RWWriteCacheService extends WriteCacheService { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 13:46:26
|
Revision: 3932 http://bigdata.svn.sourceforge.net/bigdata/?rev=3932&view=rev Author: thompsonbry Date: 2010-11-11 13:46:20 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Made a bunch of stuff private. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedOutputStream.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedOutputStream.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedOutputStream.java 2010-11-11 13:46:03 UTC (rev 3931) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedOutputStream.java 2010-11-11 13:46:20 UTC (rev 3932) @@ -27,34 +27,34 @@ import java.io.*; public class FixedOutputStream extends OutputStream { - byte m_buf[]; - int m_count = 0; + private final byte m_buf[]; + private int m_count = 0; - public FixedOutputStream(byte buf[]) { + public FixedOutputStream(final byte buf[]) { m_buf = buf; } /**************************************************************** * write a single 4 byte integer **/ - public void writeInt(int b) { + public void writeInt(final int b) { m_buf[m_count++] = (byte) ((b >>> 24) & 0xFF); m_buf[m_count++] = (byte) ((b >>> 16) & 0xFF); m_buf[m_count++] = (byte) ((b >>> 8) & 0xFF); m_buf[m_count++] = (byte) ((b >>> 0) & 0xFF); } - public void write(int b) throws IOException { + public void write(final int b) throws IOException { m_buf[m_count++] = (byte) b; } - public void write(byte b[], int off, int len) throws IOException { + public void write(final byte b[], final int off, final int len) throws IOException { System.arraycopy(b, off, m_buf, m_count, len); m_count += len; } - public void writeLong(long txReleaseTime) { + public void writeLong(final long txReleaseTime) { writeInt((int) (txReleaseTime >> 32)); writeInt((int) txReleaseTime & 0xFFFFFFFF); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 13:46:09
|
Revision: 3931 http://bigdata.svn.sourceforge.net/bigdata/?rev=3931&view=rev Author: thompsonbry Date: 2010-11-11 13:46:03 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Modified to use log4j, to declare a serial version id, etc. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageTerminalError.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageTerminalError.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageTerminalError.java 2010-11-11 13:45:29 UTC (rev 3930) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/StorageTerminalError.java 2010-11-11 13:46:03 UTC (rev 3931) @@ -24,16 +24,25 @@ package com.bigdata.rwstore; +import org.apache.log4j.Logger; + public class StorageTerminalError extends Error { - protected Throwable m_cause; - protected static java.util.logging.Logger cat = java.util.logging.Logger.getLogger(StorageTerminalError.class.getName()); - public StorageTerminalError(String message, Throwable cause) { + /** + * + */ + private static final long serialVersionUID = 1L; + + private final Throwable m_cause; + + private static final transient Logger cat = Logger.getLogger(StorageTerminalError.class); + + public StorageTerminalError(final String message, final Throwable cause) { super(message); m_cause = cause; - cat.severe(message); + cat.fatal(message); } public String getMessage() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 13:45:35
|
Revision: 3930 http://bigdata.svn.sourceforge.net/bigdata/?rev=3930&view=rev Author: thompsonbry Date: 2010-11-11 13:45:29 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Removed several files which are not used in the current implementation. Removed Paths: ------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/LockFile.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSInputStream.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/WriteBlock.java Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/LockFile.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/LockFile.java 2010-11-11 13:41:06 UTC (rev 3929) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/LockFile.java 2010-11-11 13:45:29 UTC (rev 3930) @@ -1,108 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -package com.bigdata.rwstore; - -import java.io.File; - -import org.apache.log4j.Logger; - -import com.bigdata.journal.IJournal; - -public class LockFile { - /** - * Logger. - */ - protected static final Logger log = Logger.getLogger(LockFile.class); - - protected File m_lockfd = null; - private Thread m_lockThread = null; - - public static LockFile create(String lckname) { - LockFile lf = new LockFile(lckname); - - return (lf.m_lockfd != null) ? lf : null; - } - - public LockFile(String lckname) { - try { - log.info("** LockFile request **"); - - m_lockfd = new File(lckname); - if (m_lockfd.exists()) { - log.info("** LockFile exists **"); - if (m_lockfd.lastModified() > (System.currentTimeMillis() - (40 * 1000))) { - log.warn("** CONFLICT - STILL IN USE **"); - - m_lockfd = null; - - return; - } else { - log.info("** Deleting current Lock File **"); - m_lockfd.delete(); - } - } - - File pfile = m_lockfd.getParentFile(); - if (pfile != null) { - pfile.mkdirs(); - } - - m_lockfd.createNewFile(); - - m_lockfd.deleteOnExit(); - - m_lockThread = new Thread() { - public void run() { - while (m_lockfd != null) { - m_lockfd.setLastModified(System.currentTimeMillis()); - - try { - sleep(10 * 1000); - } catch (Throwable e) { - return; - } - } - } - }; - m_lockThread.setDaemon(true); - - m_lockThread.start(); - } catch (Throwable e) { - log.error("LockFile Error", e); - - m_lockfd = null; - } - } - - public void clear() { - if (m_lockfd != null) { - File fd = m_lockfd; - - m_lockfd = null; - - fd.delete(); - } - } -} Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSInputStream.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSInputStream.java 2010-11-11 13:41:06 UTC (rev 3929) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/PSInputStream.java 2010-11-11 13:45:29 UTC (rev 3930) @@ -1,298 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -package com.bigdata.rwstore; - -import java.io.*; - -/************************************************************************ - * PSOutputStream - * - * Provides stream interface direct to the low-level store. - * - * Retrieved from an IObjectStore to enable output to the store. - * - * The key idea here is that rather than a call like : - * store.realloc(oldAddr, byteOutputStream)=> newAddress - * - * instead : - * store.allocStream(oldAddr)=>PSOutputStream - * - * and then : - * stream.save()=> newAddress - * - * This will enable large data formats to be streamed to the data store, - * where the previous interface would have required that the entire - * resource was loaded into a memory structure first, before being - * written out in a single block to the store. - * - * This new approach will also enable the removal of BLOB allocation - * strategy. Instead, "BLOBS" will be served by linked fixed allocation - * blocks, flushed out by the stream. - * - * A big advantage of this is that BLOB reallocation is now a lot simpler, - * since BLOB storage is simply a potentially large number of 8K blocks. - * - * This also opens up the possibility of a Stream oriented data type, that - * could be used to serve up a variety of data streams. By providing - * relevant interfaces with the client/server system, a server can then - * provide multiple streams to a high number of clients. - * - * To this end, the output stream has a fixed buffer size, and they are recycled - * from a pool of output streams. - * - *@deprecated Unused as of 7/2/2010 - **/ -public class PSInputStream extends InputStream { - - static PSInputStream m_poolHead = null; - static PSInputStream m_poolTail = null; - static Integer m_lock = new Integer(42); - static int m_streamCount = 0; - - static int s_allocStreams = 0; - static int s_returnStreams = 0; - - public static PSInputStream getNew(IStore store, int size) { - synchronized (m_lock) { - s_allocStreams++; - - PSInputStream ret = m_poolHead; - if (ret != null) { - m_streamCount--; - - m_poolHead = ret.next(); - if (m_poolHead == null) { - m_poolTail = null; - } - } else { - ret = new PSInputStream(); - } - - ret.init(store, size); - - return ret; - } - } - - /******************************************************************* - * maintains pool of streams - in a normal situation there will only - * me a single stream continually re-used, but with some patterns - * there could be many streams. For this reason it is worth checking - * that the pool is not maintained at an unnecessaily large value, so - * maximum of 10 streams are maintained - adding upto 80K to the - * garbage collect copy. - **/ - static public void returnStream(PSInputStream stream) { - synchronized (m_lock) { - s_returnStreams++; - - if (m_streamCount > 10) { - return; - } - - if (m_poolTail != null) { - m_poolTail.setNext(stream); - } else { - m_poolHead = stream; - } - - m_poolTail = stream; - m_streamCount++; - } - } - - final int cBufsize = 16 * 1024; - int m_blobThreshold = 0; - byte[] m_buf = new byte[cBufsize]; - int m_headAddr = 0; - int m_count = 0; - int m_cursor = 0; - int m_totalBytes = -1; - int m_totalRead = 0; - IStore m_store; - - private PSInputStream m_next = null; - - private PSInputStream next() { - return m_next; - } - - private void setNext(PSInputStream str) { - m_next = str; - } - - public void close() { - returnStream(this); - } - - /**************************************************************** - * resets private state variables for reuse of stream - **/ - void init(IStore store, int size) { - m_headAddr = 0; - m_count = size; - m_store = store; - m_cursor = 0; - m_blobThreshold = m_store.bufferChainOffset(); - m_totalBytes = -1; - m_totalRead = 0; - } - - - /**************************************************************** - * Returns buffer for initial read - FIX PROTOCOL LATER - **/ - public void setTotalBytes(int totalBytes) { - m_totalBytes = totalBytes; - } - - /**************************************************************** - * Returns buffer for initial read - FIX PROTOCOL LATER - **/ - public byte[] getBuffer() { - return m_buf; - } - - /************************************************************ - * util to ensure negatives don't screw things - **/ - private int makeInt(byte val) { - int ret = val; - - return ret & 0xFF; - } - - /**************************************************************** - * Reads next byte - throws EOFException if none more available - **/ - public int read() throws IOException { - - if (m_totalBytes >=0) { - if (m_totalRead >= m_totalBytes) { - return -1; - } - } - - m_totalRead++; - - if (m_cursor == m_blobThreshold) { - int nextAddr = makeInt(m_buf[m_cursor++]) << 24; - nextAddr += makeInt(m_buf[m_cursor++]) << 16; - nextAddr += makeInt(m_buf[m_cursor++]) << 8; - nextAddr += makeInt(m_buf[m_cursor]); - - m_count = m_store.getDataSize(nextAddr, m_buf); - - m_cursor = 0; - } - - if (m_cursor >= m_count) { - return -1; - } - - int ret = m_buf[m_cursor++]; - return ret & 0xFF; - } - - /**************************************************************** - * Reads next 4 byte integer value - **/ - public int readInt() throws IOException { - int value = read() << 24; - value += read() << 16; - value += read() << 8; - value += read(); - - return value; - } - - public long readLong() throws IOException { - long value = readInt(); - value <<= 32; - - value += readInt(); - - return value; - } - - public synchronized int read(byte b[], int off, int len) throws IOException { - if (len == 0) { - return 0; - } - - if (len <= available()) { - System.arraycopy(m_buf, m_cursor, b, off, len); - m_cursor += len; - m_totalRead += len; - } else { - for (int i = 0; i < len; i++) { - int r = read(); - if (r != -1) { - b[off + i] = (byte) r; - } else { - return i == 0 ? -1 : i; - } - } - } - - return len; - } - - /**************************************************************** - * Space left - until buffer overrun - **/ - public int available() throws IOException { - if (m_count < m_blobThreshold) { - return m_count - m_cursor; - } else { - return m_blobThreshold - m_cursor; - } - } - - /**************************************************************** - * utility method that extracts all data from this stream and - * writes to the output stream - **/ - public int read(OutputStream outstr) throws IOException { - byte b[] = new byte[512]; - - int retval = 0; - - int r = read(b); - while (r == 512) { - outstr.write(b, 0, r); - retval += r; - - r = read(b); - } - - if (r != -1) { - outstr.write(b, 0, r); - retval += r; - } - - return retval; - } - } \ No newline at end of file Deleted: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/WriteBlock.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/WriteBlock.java 2010-11-11 13:41:06 UTC (rev 3929) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/WriteBlock.java 2010-11-11 13:45:29 UTC (rev 3930) @@ -1,223 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -package com.bigdata.rwstore; - -import java.io.*; - -public class WriteBlock { - protected static java.util.logging.Logger cat = java.util.logging.Logger.getLogger(WriteBlock.class.getName()); - - int m_total = 0; - RandomAccessFile m_file = null; - - WriteEntry m_head = null; - WriteEntry m_tail = null; // search back from end if better bet in general - - public WriteBlock(RandomAccessFile file) { - m_file = file; - } - - boolean m_directWrite = true; - - public void addWrite(long diskAddr, byte[] buf, int size) { - if (size == 0) { // nowt to do - return; - } - - if (diskAddr < 0) { - throw new Error("addWrite called to negative address! - " + diskAddr); - } - - if (m_directWrite && diskAddr > 100 * 1000 * 1000) { - // only start buffering writes once filesize > 100Mb ? - m_directWrite = false; - } - - if (m_directWrite) { - try { - m_file.seek(diskAddr); - m_file.write(buf, 0, size); - // cat.info("at: " + diskAddr + ", length: " + size); - } catch (Exception e) { - throw new Error("WriteBlock.doWrite : " + m_file + "-" + diskAddr + "-" + buf + "-" + e); - } - } else { - WriteEntry block = new WriteEntry(diskAddr, buf, size); - m_total += size; - - placeEntry(block); - - if (m_total > 50 * 1024) { // should be configured? - flush(); - } - } - } - - /********************************************************** - * find first block whose address is greater than this, and insert before - * - * if block has same address, then update with this buffer/size - **/ - void placeEntry(WriteEntry entry) { - if (m_head == null) { - m_head = entry; - m_tail = entry; - } else { - WriteEntry tail = m_tail; - - while (tail != null) { - if (tail.m_diskAddr < entry.m_diskAddr) { - entry.m_next = tail.m_next; - entry.m_prev = tail; - tail.m_next = entry; - - break; - } - - tail = tail.m_prev; - } - - if (tail == null) { - entry.m_next = m_head; - m_head = entry; - } else if (tail.m_diskAddr == entry.m_diskAddr) { - tail.m_buf = entry.m_buf; // use updated buffer - - return; - } - - if (entry.m_next != null) { - entry.m_next.m_prev = entry; - } else { - m_tail = entry; - } - } - } - - public void flush() { - WriteEntry entry = m_head; - long addr = 0; - while (entry != null) { - entry.doWrite(m_file); - - if (addr == entry.m_diskAddr) { - throw new RuntimeException("WriteBlock.flush : *** DUPLICATE WRITE *** " + addr); - } - - addr = entry.m_diskAddr; - - entry = entry.m_next; - } - - clear(); - } - - public void clear() { - m_head = null; - m_tail = null; - m_total = 0; - } - - public boolean removeWriteToAddr(long addr) { - WriteEntry entry = m_head; - - while (entry != null) { - if (entry.m_diskAddr == addr) { - if (entry.m_prev == null) { - m_head = entry.m_next; - } else { - entry.m_prev.m_next = entry.m_next; - } - - if (entry.m_next == null) { - m_tail = entry.m_prev; - } else { - entry.m_next.m_prev = entry.m_prev; - } - - return true; - } - - entry = entry.m_next; - } - - return false; - } - - static class WriteEntry { - - WriteEntry m_prev = null; - WriteEntry m_next = null; - - long m_diskAddr; - byte[] m_buf = null; - - int m_writeCount = 0; - - WriteEntry(long diskAddr, byte[] buf, int size) { - m_diskAddr = diskAddr; - - if (size > 0) { - m_buf = new byte[size]; - System.arraycopy(buf, 0, m_buf, 0, size); - } - } - - void doWrite(RandomAccessFile file) { - if (m_buf == null) { - cat.warning("WriteEntry:doWrite - with null buffer"); - - return; - } - - if (m_writeCount++ > 0) { - throw new Error("Write Block written more than once: " + m_writeCount); - } - - try { - file.seek(m_diskAddr); - file.write(m_buf); - // cat.warning("at: " + m_diskAddr + ", length: " + m_buf.length); - } catch (Exception e) { - throw new RuntimeException("WriteBlock.doWrite : " + file + "-" + m_diskAddr + "-" + m_buf + "-" + e); - } - } - - public boolean equals(Object obj) { - return m_diskAddr == ((WriteEntry) obj).m_diskAddr; - } - - public int compareTo(Object obj) { - long diskAddr = ((WriteEntry) obj).m_diskAddr; - if (m_diskAddr < diskAddr) { - return -1; - } else if (m_diskAddr > diskAddr) { - return 1; - } else { - return 0; - } - } - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-11 13:41:14
|
Revision: 3929 http://bigdata.svn.sourceforge.net/bigdata/?rev=3929&view=rev Author: thompsonbry Date: 2010-11-11 13:41:06 +0000 (Thu, 11 Nov 2010) Log Message: ----------- Modified to do nothing is the backing buffer is an RWStore. The RWStore can not be shrunk on the disk since it stores the meta-allocation information at the end of the file. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-11-11 09:48:06 UTC (rev 3928) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-11-11 13:41:06 UTC (rev 3929) @@ -59,7 +59,6 @@ import com.bigdata.btree.Checkpoint; import com.bigdata.btree.IIndex; import com.bigdata.btree.IndexMetadata; -import com.bigdata.btree.IndexSegment; import com.bigdata.btree.ReadOnlyIndex; import com.bigdata.cache.ConcurrentWeakValueCache; import com.bigdata.cache.ConcurrentWeakValueCacheWithTimeout; @@ -1390,6 +1389,11 @@ * Note: The caller MUST have exclusive write access to the journal. When * the {@link ConcurrencyManager} is used, that means that the caller MUST * have an exclusive lock on the {@link WriteExecutorService}. + * <p> + * Note: The {@link BufferMode#DiskRW} does NOT support this operation. This + * is because it stores meta-allocation information at the end of the file, + * which makes it impossible to shrink the file. Therefore this method will + * return without causing the file on disk to be shrunk for the RWStore. */ public void truncate() { @@ -1400,6 +1404,16 @@ final IBufferStrategy backingBuffer = getBufferStrategy(); + switch (backingBuffer.getBufferMode()) { + case DiskRW: + /* + * Operation is not supported for the RWStore. + */ + return; + default: + break; + } + final long oldExtent = backingBuffer.getExtent(); final long newExtent = backingBuffer.getHeaderSize() + backingBuffer.getNextOffset(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-11-11 09:48:12
|
Revision: 3928 http://bigdata.svn.sourceforge.net/bigdata/?rev=3928&view=rev Author: martyncutcher Date: 2010-11-11 09:48:06 +0000 (Thu, 11 Nov 2010) Log Message: ----------- add BLOB allocator stats, rationalise metaBitsSize calculation and ensure allocationLock held across BLOB registration Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-11-10 21:11:53 UTC (rev 3927) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/BlobAllocator.java 2010-11-11 09:48:06 UTC (rev 3928) @@ -40,7 +40,10 @@ private int m_sortAddr; private ArrayList m_freeList; private long m_startAddr; - // @todo javadoc. why 254? + /** + * There are 256 ints in a BlobAllocator, the first is used to provide the + * sortAddr, and the last for the checksum, leaving 254 BlobHdr addresses + */ private int m_freeSpots = 254; public BlobAllocator(final RWStore store, final int sortAddr) { @@ -60,14 +63,14 @@ return false; } - // @todo javadoc. Why is this method a NOP (other than the assert). + /** + * Should not be called directly since the PSOutputStream + * manages the blob allocations. + */ public int alloc(final RWStore store, final int size, final IAllocationContext context) { - assert size > (m_store.m_maxFixedAlloc-4); - - return 0; + throw new UnsupportedOperationException("Blob allocators do not allocate addresses directly"); } - // @todo why does this return false on all code paths? public boolean free(final int addr, final int sze) { if (sze < (m_store.m_maxFixedAlloc-4)) throw new IllegalArgumentException("Unexpected address size"); @@ -102,11 +105,10 @@ m_freeList.add(this); } + return true; } catch (IOException ioe) { throw new RuntimeException(ioe); } - - return false; } public int getFirstFixedForBlob(final int addr, final int sze) { @@ -292,7 +294,7 @@ } } - return 0; + throw new IllegalStateException("BlobAllocator unable to find free slot"); } public int getRawStartAddr() { @@ -308,7 +310,12 @@ } public void appendShortStats(final StringBuilder str, final AllocationStats[] stats) { - str.append("Index: " + m_index + ", address: " + getStartAddr() + ", BLOB\n"); + if (stats == null) { + str.append("Index: " + m_index + ", address: " + getStartAddr() + ", BLOB\n"); + } else { + stats[stats.length-1].m_filledSlots += 254 - m_freeSpots; + stats[stats.length-1].m_reservedSlots += 254; + } } public boolean isAllocated(final int offset) { Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-10 21:11:53 UTC (rev 3927) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-11-11 09:48:06 UTC (rev 3928) @@ -1945,17 +1945,15 @@ * number of metabits. */ private int getRequiredMetaBitsStorage() { - int ints = 1 + m_allocSizes.length; // length prefixed alloc sizes - ints += m_metaBits.length; + int ints = cMetaHdrFields; + ints += m_allocSizes.length + m_metaBits.length; - // need to handle number of modified blocks + // add the maximum number of new metaBits storage that may be + // needed to save the current committed objects final int commitInts = ((32 + m_commitList.size()) / 32); - final int allocBlocks = (8 + commitInts)/8; - ints += 9 * allocBlocks; + final int allocBlocks = (cDefaultMetaBitsSize - 1 + commitInts)/(cDefaultMetaBitsSize-1); + ints += cDefaultMetaBitsSize * allocBlocks; - ints += 2; // for deferredFreeListAddr and size - ints += 1; // for version - return ints*4; // return as bytes } @@ -1983,7 +1981,14 @@ */ final private int cVersion = 0x0200; - final private int cMetaHdrFields = 5; // version, deferredFree(long), + /** + * MetaBits Header + * int version + * long deferredFree + * int defaultMetaBitsSize + * int length of allocation sizes + */ + final private int cMetaHdrFields = 5; /** * @see Options#META_BITS_SIZE */ @@ -2403,10 +2408,13 @@ * number of filled slots | store used */ public void showAllocators(final StringBuilder str) { - final AllocationStats[] stats = new AllocationStats[m_allocSizes.length]; - for (int i = 0; i < stats.length; i++) { + final AllocationStats[] stats = new AllocationStats[m_allocSizes.length+1]; + for (int i = 0; i < stats.length-1; i++) { stats[i] = new AllocationStats(m_allocSizes[i]*64); } + // for BLOBs + stats[stats.length-1] = new AllocationStats(0); + final Iterator<Allocator> allocs = m_allocs.iterator(); while (allocs.hasNext()) { Allocator alloc = (Allocator) allocs.next(); @@ -2437,10 +2445,9 @@ tfilled += filled; tfilledSlots += stats[i].m_filledSlots; } - for (int i = 0; i < stats.length; i++) { + for (int i = 0; i < stats.length-1; i++) { final long reserved = stats[i].m_reservedSlots * stats[i].m_blockSize; final long filled = stats[i].m_filledSlots * stats[i].m_blockSize; - str.append(padRight("" + stats[i].m_blockSize, 10)); str.append(padLeft("" + stats[i].m_filledSlots, 12) + padLeft("" + stats[i].m_reservedSlots, 12)); str.append(padLeft("" + filled, 14) + padLeft("" + reserved, 14)); @@ -2448,7 +2455,12 @@ str.append(padLeft("" + (treserved==0?0:(reserved * 100 / treserved)) + "%", 8)); str.append("\n"); } - str.append(padRight("Totals", 10)); + // lastly some BLOB stats - only interested in used/reserved slots + str.append(padRight("BLOB", 10)); + str.append(padLeft("" + stats[stats.length-1].m_filledSlots, 12) + padLeft("" + stats[stats.length-1].m_reservedSlots, 12)); + str.append("\n"); + + str.append(padRight("Totals", 10)); str.append(padLeft("" + tfilledSlots, 12)); str.append(padLeft("" + treservedSlots, 12)); str.append(padLeft("" + tfilled, 14)); @@ -2831,25 +2843,31 @@ * data from that into the passed byte array. */ public int registerBlob(final int addr) { - BlobAllocator ba = null; - if (m_freeBlobs.size() > 0) { - ba = (BlobAllocator) m_freeBlobs.get(0); + m_allocationLock.lock(); + try { + BlobAllocator ba = null; + if (m_freeBlobs.size() > 0) { + ba = (BlobAllocator) m_freeBlobs.get(0); + } + if (ba == null) { + final Allocator lalloc = (Allocator) m_allocs.get(m_allocs.size() - 1); + final int psa = lalloc.getRawStartAddr(); // previous block + // start address + assert (psa - 1) > m_nextAllocation; + ba = new BlobAllocator(this, psa - 1); + ba.setFreeList(m_freeBlobs); // will add itself to the free list + ba.setIndex(m_allocs.size()); + m_allocs.add(ba); + } + + if (!m_commitList.contains(ba)) { + m_commitList.add(ba); + } + + return ba.register(addr); + } finally { + m_allocationLock.unlock(); } - if (ba == null) { - final Allocator lalloc = (Allocator) m_allocs.get(m_allocs.size()-1); - final int psa = lalloc.getRawStartAddr(); // previous block start address - assert (psa-1) > m_nextAllocation; - ba = new BlobAllocator(this, psa-1); - ba.setFreeList(m_freeBlobs); // will add itself to the free list - ba.setIndex(m_allocs.size()); - m_allocs.add(ba); - } - - if (!m_commitList.contains(ba)) { - m_commitList.add(ba); - } - - return ba.register(addr); } public void addToCommit(final Allocator allocator) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-11-10 21:11:59
|
Revision: 3927 http://bigdata.svn.sourceforge.net/bigdata/?rev=3927&view=rev Author: mrpersonick Date: 2010-11-10 21:11:53 +0000 (Wed, 10 Nov 2010) Log Message: ----------- fixed the LUBM closure test to add the ontology (no bnodes version) Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-11-10 21:10:16 UTC (rev 3926) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-11-10 21:11:53 UTC (rev 3927) @@ -411,7 +411,7 @@ // And add in the ontology. final List<String> tmp = new LinkedList<String>(); -// tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench.owl"); FIXME Ontology not loaded?!? + tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl"); tmp.addAll(Arrays.asList(dataFiles)); final String[] resources = tmp.toArray(new String[tmp.size()]); @@ -467,7 +467,7 @@ // And add in the ontology. final List<String> tmp = new LinkedList<String>(); -// tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench.owl"); FIXME Ontology not loaded?!? + tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl"); tmp.addAll(Arrays.asList(dataFiles)); final String[] resources = tmp.toArray(new String[tmp.size()]); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-11-10 21:10:22
|
Revision: 3926 http://bigdata.svn.sourceforge.net/bigdata/?rev=3926&view=rev Author: mrpersonick Date: 2010-11-10 21:10:16 +0000 (Wed, 10 Nov 2010) Log Message: ----------- modified version to eliminate bnode generation Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/resources/data/lehigh/univ-bench-modified-no-bnodes.owl 2010-11-10 21:10:16 UTC (rev 3926) @@ -0,0 +1,488 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<rdf:RDF + xmlns = "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#" + xml:base = "http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl" + xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:owl="http://www.w3.org/2002/07/owl#" +> + +<owl:Ontology rdf:about=""> + <rdfs:comment>An university ontology for benchmark tests</rdfs:comment> + <rdfs:label>Univ-bench Ontology</rdfs:label> + <owl:versionInfo>univ-bench-ontology-owl, ver April 1, 2004</owl:versionInfo> +</owl:Ontology> + +<owl:Class rdf:ID="AdministrativeStaff"> + <rdfs:label>administrative staff worker</rdfs:label> + <rdfs:subClassOf rdf:resource="#Employee" /> +</owl:Class> + +<owl:Class rdf:ID="Article"> + <rdfs:label>article</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="AssistantProfessor"> + <rdfs:label>assistant professor</rdfs:label> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="AssociateProfessor"> + <rdfs:label>associate professor</rdfs:label> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="Book"> + <rdfs:label>book</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="Chair"> + <rdfs:label>chair</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#headOf" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#Department" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="ClericalStaff"> + <rdfs:label>clerical staff worker</rdfs:label> + <rdfs:subClassOf rdf:resource="#AdministrativeStaff" /> +</owl:Class> + +<owl:Class rdf:ID="College"> + <rdfs:label>school</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="ConferencePaper"> + <rdfs:label>conference paper</rdfs:label> + <rdfs:subClassOf rdf:resource="#Article" /> +</owl:Class> + +<owl:Class rdf:ID="Course"> + <rdfs:label>teaching course</rdfs:label> + <rdfs:subClassOf rdf:resource="#Work" /> +</owl:Class> + +<owl:Class rdf:ID="Dean"> + <rdfs:label>dean</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#headOf" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#College" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="Department"> + <rdfs:label>university department</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="Director"> + <rdfs:label>director</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#headOf" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#Program" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> +</owl:Class> + +<owl:Class rdf:ID="Employee"> + <rdfs:label>Employee</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#worksFor" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#Organization" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> +</owl:Class> + +<owl:Class rdf:ID="Faculty"> + <rdfs:label>faculty member</rdfs:label> + <rdfs:subClassOf rdf:resource="#Employee" /> +</owl:Class> + +<owl:Class rdf:ID="FullProfessor"> + <rdfs:label>full professor</rdfs:label> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="GraduateCourse"> + <rdfs:label>Graduate Level Courses</rdfs:label> + <rdfs:subClassOf rdf:resource="#Course" /> +</owl:Class> + +<owl:Class rdf:ID="GraduateStudent"> + <rdfs:label>graduate student</rdfs:label> + <rdfs:subClassOf rdf:resource="#Person" /> +<!-- + <rdfs:subClassOf> + <owl:Restriction> + <owl:onProperty rdf:resource="#takesCourse" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#GraduateCourse" /> + </owl:someValuesFrom> + </owl:Restriction> + </rdfs:subClassOf> +--> +</owl:Class> + +<owl:Class rdf:ID="Institute"> + <rdfs:label>institute</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="JournalArticle"> + <rdfs:label>journal article</rdfs:label> + <rdfs:subClassOf rdf:resource="#Article" /> +</owl:Class> + +<owl:Class rdf:ID="Lecturer"> + <rdfs:label>lecturer</rdfs:label> + <rdfs:subClassOf rdf:resource="#Faculty" /> +</owl:Class> + +<owl:Class rdf:ID="Manual"> + <rdfs:label>manual</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="Organization"> + <rdfs:label>organization</rdfs:label> +</owl:Class> + +<owl:Class rdf:ID="Person"> + <rdfs:label>person</rdfs:label> +</owl:Class> + +<owl:Class rdf:ID="PostDoc"> + <rdfs:label>post doctorate</rdfs:label> + <rdfs:subClassOf rdf:resource="#Faculty" /> +</owl:Class> + +<owl:Class rdf:ID="Professor"> + <rdfs:label>professor</rdfs:label> + <rdfs:subClassOf rdf:resource="#Faculty" /> +</owl:Class> + +<owl:Class rdf:ID="Program"> + <rdfs:label>program</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="Publication"> + <rdfs:label>publication</rdfs:label> +</owl:Class> + +<owl:Class rdf:ID="Research"> + <rdfs:label>research work</rdfs:label> + <rdfs:subClassOf rdf:resource="#Work" /> +</owl:Class> + +<owl:Class rdf:ID="ResearchAssistant"> + <rdfs:label>university research assistant</rdfs:label> + <rdfs:subClassOf rdf:resource="#Student" /> +<!-- + <rdfs:subClassOf> + <owl:Restriction> + <owl:onProperty rdf:resource="#worksFor" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#ResearchGroup" /> + </owl:someValuesFrom> + </owl:Restriction> + </rdfs:subClassOf> +--> +</owl:Class> + +<owl:Class rdf:ID="ResearchGroup"> + <rdfs:label>research group</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="Schedule"> + <rdfs:label>schedule</rdfs:label> +</owl:Class> + +<owl:Class rdf:ID="Software"> + <rdfs:label>software program</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="Specification"> + <rdfs:label>published specification</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="Student"> + <rdfs:label>student</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#takesCourse" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#Course" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> +</owl:Class> + +<owl:Class rdf:ID="SystemsStaff"> + <rdfs:label>systems staff worker</rdfs:label> + <rdfs:subClassOf rdf:resource="#AdministrativeStaff" /> +</owl:Class> + +<owl:Class rdf:ID="TeachingAssistant"> + <rdfs:label>university teaching assistant</rdfs:label> +<!-- + <owl:intersectionOf rdf:parseType="Collection"> + <owl:Class rdf:about="#Person" /> + <owl:Restriction> + <owl:onProperty rdf:resource="#teachingAssistantOf" /> + <owl:someValuesFrom> + <owl:Class rdf:about="#Course" /> + </owl:someValuesFrom> + </owl:Restriction> + </owl:intersectionOf> +--> +</owl:Class> + +<owl:Class rdf:ID="TechnicalReport"> + <rdfs:label>technical report</rdfs:label> + <rdfs:subClassOf rdf:resource="#Article" /> +</owl:Class> + +<owl:Class rdf:ID="UndergraduateStudent"> + <rdfs:label>undergraduate student</rdfs:label> + <rdfs:subClassOf rdf:resource="#Student" /> +</owl:Class> + +<owl:Class rdf:ID="University"> + <rdfs:label>university</rdfs:label> + <rdfs:subClassOf rdf:resource="#Organization" /> +</owl:Class> + +<owl:Class rdf:ID="UnofficialPublication"> + <rdfs:label>unnoficial publication</rdfs:label> + <rdfs:subClassOf rdf:resource="#Publication" /> +</owl:Class> + +<owl:Class rdf:ID="VisitingProfessor"> + <rdfs:label>visiting professor</rdfs:label> + <rdfs:subClassOf rdf:resource="#Professor" /> +</owl:Class> + +<owl:Class rdf:ID="Work"> + <rdfs:label>Work</rdfs:label> +</owl:Class> + +<owl:ObjectProperty rdf:ID="advisor"> + <rdfs:label>is being advised by</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> + <rdfs:range rdf:resource="#Professor" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="affiliatedOrganizationOf"> + <rdfs:label>is affiliated with</rdfs:label> + <rdfs:domain rdf:resource="#Organization" /> + <rdfs:range rdf:resource="#Organization" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="affiliateOf"> + <rdfs:label>is affiliated with</rdfs:label> + <rdfs:domain rdf:resource="#Organization" /> + <rdfs:range rdf:resource="#Person" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="age"> + <rdfs:label>is age</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="degreeFrom"> + <rdfs:label>has a degree from</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> + <rdfs:range rdf:resource="#University" /> + <owl:inverseOf rdf:resource="#hasAlumnus"/> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="doctoralDegreeFrom"> + <rdfs:label>has a doctoral degree from</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> + <rdfs:range rdf:resource="#University" /> + <rdfs:subPropertyOf rdf:resource="#degreeFrom" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="emailAddress"> + <rdfs:label>can be reached at</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="hasAlumnus"> + <rdfs:label>has as an alumnus</rdfs:label> + <rdfs:domain rdf:resource="#University" /> + <rdfs:range rdf:resource="#Person" /> + <owl:inverseOf rdf:resource="#degreeFrom"/> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="headOf"> + <rdfs:label>is the head of</rdfs:label> + <rdfs:subPropertyOf rdf:resource="#worksFor"/> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="listedCourse"> + <rdfs:label>lists as a course</rdfs:label> + <rdfs:domain rdf:resource="#Schedule" /> + <rdfs:range rdf:resource="#Course" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="mastersDegreeFrom"> + <rdfs:label>has a masters degree from</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> + <rdfs:range rdf:resource="#University" /> + <rdfs:subPropertyOf rdf:resource="#degreeFrom"/> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="member"> + <rdfs:label>has as a member</rdfs:label> + <rdfs:domain rdf:resource="#Organization" /> + <rdfs:range rdf:resource="#Person" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="memberOf"> +<rdfs:label>member of</rdfs:label> +<owl:inverseOf rdf:resource="#member" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="name"> +<rdfs:label>name</rdfs:label> +</owl:DatatypeProperty> + +<owl:DatatypeProperty rdf:ID="officeNumber"> + <rdfs:label>office room No.</rdfs:label> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="orgPublication"> + <rdfs:label>publishes</rdfs:label> + <rdfs:domain rdf:resource="#Organization" /> + <rdfs:range rdf:resource="#Publication" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="publicationAuthor"> + <rdfs:label>was written by</rdfs:label> + <rdfs:domain rdf:resource="#Publication" /> + <rdfs:range rdf:resource="#Person" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="publicationDate"> + <rdfs:label>was written on</rdfs:label> + <rdfs:domain rdf:resource="#Publication" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="publicationResearch"> + <rdfs:label>is about</rdfs:label> + <rdfs:domain rdf:resource="#Publication" /> + <rdfs:range rdf:resource="#Research" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="researchInterest"> + <rdfs:label>is researching</rdfs:label> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="researchProject"> + <rdfs:label>has as a research project</rdfs:label> + <rdfs:domain rdf:resource="#ResearchGroup" /> + <rdfs:range rdf:resource="#Research" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="softwareDocumentation"> + <rdfs:label>is documented in</rdfs:label> + <rdfs:domain rdf:resource="#Software" /> + <rdfs:range rdf:resource="#Publication" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="softwareVersion"> + <rdfs:label>is version</rdfs:label> + <rdfs:domain rdf:resource="#Software" /> +</owl:ObjectProperty> + +<owl:TransitiveProperty rdf:ID="subOrganizationOf"> + <rdfs:label>is part of</rdfs:label> + <rdfs:domain rdf:resource="#Organization" /> + <rdfs:range rdf:resource="#Organization" /> +</owl:TransitiveProperty> + +<owl:ObjectProperty rdf:ID="takesCourse"> + <rdfs:label>is taking</rdfs:label> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="teacherOf"> + <rdfs:label>teaches</rdfs:label> + <rdfs:domain rdf:resource="#Faculty" /> + <rdfs:range rdf:resource="#Course" /> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="teachingAssistantOf"> + <rdfs:label>is a teaching assistant for</rdfs:label> + <rdfs:domain rdf:resource="#TeachingAssistant" /> + <rdfs:range rdf:resource="#Course" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="telephone"> + <rdfs:label>telephone number</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="tenured"> + <rdfs:label>is tenured:</rdfs:label> + <rdfs:domain rdf:resource="#Professor" /> +</owl:ObjectProperty> + +<owl:DatatypeProperty rdf:ID="title"> + <rdfs:label>title</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> +</owl:DatatypeProperty> + +<owl:ObjectProperty rdf:ID="undergraduateDegreeFrom"> + <rdfs:label>has an undergraduate degree from</rdfs:label> + <rdfs:domain rdf:resource="#Person" /> + <rdfs:range rdf:resource="#University" /> + <rdfs:subPropertyOf rdf:resource="#degreeFrom"/> +</owl:ObjectProperty> + +<owl:ObjectProperty rdf:ID="worksFor"> + <rdfs:label>Works For</rdfs:label> + <rdfs:subPropertyOf rdf:resource="#memberOf" /> +</owl:ObjectProperty> + +</rdf:RDF> + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-11-10 19:41:12
|
Revision: 3925 http://bigdata.svn.sourceforge.net/bigdata/?rev=3925&view=rev Author: thompsonbry Date: 2010-11-10 19:41:05 +0000 (Wed, 10 Nov 2010) Log Message: ----------- It appears that somewhere along the line we modified things such that the LUBM ontology is not being loaded during the two unit tests which are supposed to load the LUBM U1 data set. I've modified the test harness to load the ontology as well, but now an exception is getting thrown out of the logic which verifies that the inferences were computed correctly. I suspect that one of the chunk converters is not properly triming a chunk such that it has some null elements in the array. I've commented out the lines which actually cause the ontology file to be loaded for the moment. Look for "FIXME Ontology not loaded?!?" in the source file. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-11-10 14:01:26 UTC (rev 3924) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestDatabaseAtOnceClosure.java 2010-11-10 19:41:05 UTC (rev 3925) @@ -10,6 +10,9 @@ import java.io.FilenameFilter; import java.io.InputStream; import java.net.URLEncoder; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; import java.util.Properties; import org.openrdf.model.Statement; @@ -24,6 +27,7 @@ import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.axioms.RdfsAxioms; +import com.bigdata.rdf.inf.ClosureStats; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.rio.StatementBuffer; @@ -397,14 +401,20 @@ public void test_fixedPoint_LUBM_U1_As_Full_PipelineJoins() throws Exception { - final String[] resources = readFiles(new File( - "bigdata-rdf/src/resources/data/lehigh/U1"), - new FilenameFilter() { - public boolean accept(File dir, String name) { - return name.endsWith(".owl"); - } - }); + final String[] dataFiles = readFiles(new File( + "bigdata-rdf/src/resources/data/lehigh/U1"), + new FilenameFilter() { + public boolean accept(File dir, String name) { + return name.endsWith(".owl"); + } + }); + // And add in the ontology. + final List<String> tmp = new LinkedList<String>(); +// tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench.owl"); FIXME Ontology not loaded?!? + tmp.addAll(Arrays.asList(dataFiles)); + final String[] resources = tmp.toArray(new String[tmp.size()]); + final Properties properties = getProperties(FullClosure.class, false/* nestedSubquery */); final AbstractTripleStore store = getStore(properties); @@ -448,13 +458,19 @@ public void test_fixedPoint_LUBM_U1_As_Fast_PipelineJoins() throws Exception { - final String[] resources = readFiles(new File("bigdata-rdf/src/resources/data/lehigh/U1"), + final String[] dataFiles = readFiles(new File("bigdata-rdf/src/resources/data/lehigh/U1"), new FilenameFilter() { public boolean accept(File dir, String name) { return name.endsWith(".owl"); } }); + // And add in the ontology. + final List<String> tmp = new LinkedList<String>(); +// tmp.add("bigdata-rdf/src/resources/data/lehigh/univ-bench.owl"); FIXME Ontology not loaded?!? + tmp.addAll(Arrays.asList(dataFiles)); + final String[] resources = tmp.toArray(new String[tmp.size()]); + final Properties properties = getProperties(FastClosure.class, false/* nestedSubquery */); final AbstractTripleStore store = getStore(properties); @@ -770,9 +786,13 @@ * by the proxy test case otherwise which does not give you much * control). */ - closureStore.getInferenceEngine() + final ClosureStats closureStats = closureStore.getInferenceEngine() .computeClosure(null/* focusStore */); + if(log.isInfoEnabled()) + log.info(closureStats.toString()); +// System.err.println("*** "+closureStats.toString()); + if (log.isDebugEnabled()) { log.debug("\nclosure:\n" + closureStore.dumpStore()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |