From: <mrp...@us...> - 2010-07-30 21:46:09
|
Revision: 3378 http://bigdata.svn.sourceforge.net/bigdata/?rev=3378&view=rev Author: mrpersonick Date: 2010-07-30 21:46:03 +0000 (Fri, 30 Jul 2010) Log Message: ----------- adding a goto for optional predicates Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOStarJoin.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IPredicate.java 2010-07-30 20:41:35 UTC (rev 3377) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IPredicate.java 2010-07-30 21:46:03 UTC (rev 3378) @@ -157,6 +157,69 @@ * evaluating a JOIN. */ public boolean isOptional(); + + /** + * Handling nested optional joins can be reduced to a goto logic. Tails + * that are grouped inside an "optional join" are all marked as optional, + * and if any one of them fails then join execution should resume with the + * tail immediately following the optional join in the plan order. + * <p> + * For example, the following query: + * <code> + * where { + * A . + * OPTIONAL { + * B . + * C . + * } + * D . + * } + * </code> + * <p> + * Would yield the following plan: + * <code> + * 0: A, optional=false + * 1: B, optional=true, goto=3 + * 2: C, optional=true, goto=3 + * 3: D, optional=false + * </code> + * <p> + * Double nested optionals would just continue on in the first optional + * join. For example, the following query: + * <code> + * where { + * A . + * OPTIONAL { + * B . + * OPTIONAL { C } . + * D . + * } + * E . + * } + * </code> + * <p> + * Would yield the following plan: + * <code> + * 0: A, optional=false + * 1: B, optional=true, goto=4 + * 2: C, optional=true, goto=3 + * 3: D, optional=true, goto=4 + * 4: E, optional=false + * </code> + * <p> + * Alternatively, the above plan could be reordered as follows for more + * efficient execution: + * <code> + * 0: A, optional=false + * 1: B, optional=true, goto=4 + * 2: D, optional=true, goto=4 + * 3: C, optional=true, goto=4 + * 4: E, optional=false + * </code> + * + * @return + */ + public int getOptionalGoto(); /** * Returns the object that may be used to selectively override the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Predicate.java 2010-07-30 20:41:35 UTC (rev 3377) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Predicate.java 2010-07-30 21:46:03 UTC (rev 3378) @@ -64,7 +64,7 @@ private final IVariableOrConstant[] values; - private final boolean optional; + private final int optionalGoto; private final IElementFilter<E> constraint; @@ -90,7 +90,7 @@ this.values = src.values.clone(); - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -161,7 +161,7 @@ this.values = src.values; - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -209,7 +209,7 @@ this.values = src.values; - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -228,7 +228,7 @@ public Predicate(String relationName, IVariableOrConstant[] values) { this(new String[] { relationName }, -1/* partitionId */, values, - false/* optional */, null/* constraint */, null/* expander */); + -1/* optional=false */, null/* constraint */, null/* expander */); } @@ -250,7 +250,7 @@ * Allows selective override of the predicate evaluation. */ public Predicate(String[] relationName, int partitionId, - IVariableOrConstant[] values, boolean optional, + IVariableOrConstant[] values, int optionalGoto, IElementFilter<E> constraint, ISolutionExpander<E> expander) { if (relationName == null) @@ -286,7 +286,7 @@ this.values = values; - this.optional = optional; + this.optionalGoto = optionalGoto; this.constraint = constraint; @@ -345,10 +345,16 @@ final public boolean isOptional() { - return optional; + return optionalGoto >= 0; } + final public int getOptionalGoto() { + + return optionalGoto; + + } + final public IElementFilter<E> getConstraint() { return constraint; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java 2010-07-30 20:41:35 UTC (rev 3377) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java 2010-07-30 21:46:03 UTC (rev 3378) @@ -479,6 +479,12 @@ } + final public int getOptionalGoto() { + + return -1; + + } + final public IElementFilter getConstraint() { return constraint; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-07-30 20:41:35 UTC (rev 3377) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-07-30 21:46:03 UTC (rev 3378) @@ -76,7 +76,7 @@ /** The context position MAY be <code>null</code>. */ protected final IVariableOrConstant<IV> c; - protected final boolean optional; + protected final int optionalGoto; protected final IElementFilter<ISPO> constraint; @@ -134,7 +134,7 @@ final IVariableOrConstant<IV> p, final IVariableOrConstant<IV> o) { this(new String[] { relationName }, -1/* partitionId */, s, p, o, - null/* c */, false/* optional */, null/* constraint */, null/* expander */); + null/* c */, -1/* optional */, null/* constraint */, null/* expander */); } @@ -154,7 +154,7 @@ final IVariableOrConstant<IV> o, final IVariableOrConstant<IV> c) { this(new String[] { relationName }, -1/* partitionId */, s, p, o, c, - false/* optional */, null/* constraint */, null/* expander */); + -1/* optional */, null/* constraint */, null/* expander */); } @@ -173,7 +173,7 @@ final IVariableOrConstant<IV> p, final IVariableOrConstant<IV> o) { this(relationName, -1/* partitionId */, s, p, o, - null/* c */, false/* optional */, null/* constraint */, null/* expander */); + null/* c */, -1/* optional */, null/* constraint */, null/* expander */); } @@ -190,10 +190,10 @@ public SPOPredicate(final String relationName, final IVariableOrConstant<IV> s, final IVariableOrConstant<IV> p, - final IVariableOrConstant<IV> o, final boolean optional) { + final IVariableOrConstant<IV> o, final int optionalGoto) { this(new String[] { relationName }, -1/* partitionId */, s, p, o, - null/* c */, optional, null/* constraint */, null/* expander */); + null/* c */, optionalGoto, null/* constraint */, null/* expander */); } @@ -215,7 +215,7 @@ final ISolutionExpander<ISPO> expander) { this(new String[] { relationName }, -1/* partitionId */, s, p, o, - null/* c */, false/* optional */, null/* constraint */, + null/* c */, -1/* optional */, null/* constraint */, expander); } @@ -235,11 +235,11 @@ public SPOPredicate(final String relationName, final IVariableOrConstant<IV> s, final IVariableOrConstant<IV> p, - final IVariableOrConstant<IV> o, final boolean optional, + final IVariableOrConstant<IV> o, final int optionalGoto, final ISolutionExpander<ISPO> expander) { this(new String[] { relationName }, -1/* partitionId */, s, p, o, - null/* c */, optional, null/* constraint */, expander); + null/* c */, optionalGoto, null/* constraint */, expander); } @@ -264,7 +264,7 @@ final IVariableOrConstant<IV> p,// final IVariableOrConstant<IV> o,// final IVariableOrConstant<IV> c,// - final boolean optional, // + final int optionalGoto, // final IElementFilter<ISPO> constraint,// final ISolutionExpander<ISPO> expander// ) { @@ -303,7 +303,7 @@ this.o = o; this.c = c; // MAY be null. - this.optional = optional; + this.optionalGoto = optionalGoto; this.constraint = constraint; /// MAY be null. @@ -341,7 +341,7 @@ this.relationName = relationName; // override. - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -384,7 +384,7 @@ this.o = src.o; this.c = src.c; - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -406,7 +406,7 @@ this.o = src.o; this.c = src.c; - this.optional = src.optional; + this.optionalGoto = src.optionalGoto; this.constraint = src.constraint; @@ -439,7 +439,7 @@ p,// o,// c, // override. - optional, // + optionalGoto, // constraint,// expander// ); @@ -465,7 +465,7 @@ p,// o,// c, // - optional, // + optionalGoto, // tmp,// override. expander// ); @@ -795,7 +795,7 @@ } return new SPOPredicate(relationName, partitionId, s, p, o, c, - optional, constraint, expander); + optionalGoto, constraint, expander); } @@ -861,7 +861,7 @@ sb.append(")"); - if (optional || constraint != null || expander != null + if (isOptional() || constraint != null || expander != null || partitionId != -1) { /* @@ -872,9 +872,9 @@ sb.append("["); - if(optional) { + if(isOptional()) { if(!first) sb.append(", "); - sb.append("optional"); + sb.append("optionalGoto="+optionalGoto); first = false; } @@ -906,10 +906,16 @@ final public boolean isOptional() { - return optional; + return optionalGoto >= -1; } + final public int getOptionalGoto() { + + return optionalGoto; + + } + final public IElementFilter<ISPO> getConstraint() { return constraint; @@ -984,7 +990,7 @@ final IVariableOrConstant<IV> c) { return new SPOPredicate(relationName, partitionId, s, p, o, c, - optional, constraint, expander); + optionalGoto, constraint, expander); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOStarJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOStarJoin.java 2010-07-30 20:41:35 UTC (rev 3377) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOStarJoin.java 2010-07-30 21:46:03 UTC (rev 3378) @@ -71,7 +71,7 @@ public SPOStarJoin(final SPOPredicate pred) { this(pred.relationName, pred.partitionId, pred.s(), Var.var(), - Var.var(), pred.c(), pred.isOptional(), + Var.var(), pred.c(), pred.getOptionalGoto(), pred.getConstraint(), pred.getSolutionExpander()); } @@ -89,7 +89,7 @@ this(new String[] { relationName }, -1/* partitionId */, s, Var.var(), Var.var(), - null/* c */, false/* optional */, null/* constraint */, + null/* c */, -1/* optional */, null/* constraint */, null/* expander */); } @@ -116,12 +116,12 @@ final IVariableOrConstant<IV> p,// final IVariableOrConstant<IV> o,// final IVariableOrConstant<IV> c,// - final boolean optional, // + final int optionalGoto, // final IElementFilter<ISPO> constraint,// final ISolutionExpander<ISPO> expander// ) { - super(relationName, partitionId, s, p, o, c, optional, constraint, + super(relationName, partitionId, s, p, o, c, optionalGoto, constraint, expander); this.starConstraints = new LinkedList<IStarConstraint<ISPO>>(); @@ -188,7 +188,7 @@ SPOPredicate pred = super.asBound(bindingSet); SPOStarJoin starJoin = new SPOStarJoin(pred.relationName, - pred.partitionId, pred.s, pred.p, pred.o, pred.c, pred.optional, + pred.partitionId, pred.s, pred.p, pred.o, pred.c, pred.optionalGoto, pred.constraint, pred.expander); for (IStarConstraint starConstraint : starConstraints) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-08-19 21:07:28
|
Revision: 3452 http://bigdata.svn.sourceforge.net/bigdata/?rev=3452&view=rev Author: thompsonbry Date: 2010-08-19 21:07:19 +0000 (Thu, 19 Aug 2010) Log Message: ----------- I raised the getFromKey() and getToKey() methods onto IKeyOrder, defined an AbstractKeyOrder, made the various existing implementations derive from that, and modified SPOKeyOrder to override a protected inner method which handles the encoding of a single component of the key so it can do the right thing with the IVs. I moved the SPOAccessPath#init() logic into AbstractAccessPath, which makes the fromKey and toKey final and AbstractAccessPath is no longer abstract and could be renamed at some point. I have setup some unit tests in com.bigdata.bop.ap where I am starting to work through testing bop evaluation in the context of a simple relation "R" and relation element type "E" which are defined in that package in the test suite. See TestPredicateAccessPath in that package in the test suite. I would like to get this to the point where we are using the annotated bops as we plan to use them for scale up/out query. I would like to test the ability to use bops to describe filters (IElementFilter) and constraints (IConstraint) on IPredicates. I plan to add IConstraint's to IPredicate. We will renaming IPredicate#getConstraint():IElementFilter as getElementFilter() and add an IPredicate#getConstraint():IConstraint method. We can use the IConstraint on the IPredicate to validate what used to verify "rule" level constraints and this will let us customize which constraints we verify where (if we need to do that). I also want to use an optional "variablesToKeep" annotation do things like stripping off undesired variables (when it is not specified we pass along all variables). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bop-notes.txt branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/SameVariableConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/LocalJoinTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/UnsyncLocalOutputBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/UnsynchronizedOutputBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/ISPO.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPO.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IElement.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/SampleLocalShard.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestSortBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/E.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/EComparator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestDistinctElementFilter.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ndx/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ndx/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ndx/TestSampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ndx/TestSampleLocalShard.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Distinct.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalShard.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -175,7 +175,7 @@ } - public int arity() { + final public int arity() { return args.length; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -57,8 +57,11 @@ * @param args * @param annotations */ - protected AbstractChunkedOrderedIteratorOp(BOp[] args, Map<String, Object> annotations) { + protected AbstractChunkedOrderedIteratorOp(final BOp[] args, + final Map<String, Object> annotations) { + super(args, annotations); + } protected int getChunkCapacity() { Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractSampleIndex.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractSampleIndex.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -1,101 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop; - - -import com.bigdata.btree.IIndex; -import com.bigdata.relation.accesspath.IAccessPath; - -/** - * Abstract base class for sampling operator for an {@link IIndex}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - * The generic type of the elements materialized from that index. - * - * @todo Implement sample operator. E.g., sampleRange(fromKey,toKey,limit). This - * could be on {@link IIndex} or on {@link IAccessPath}. For a shard view, - * it must proportionally select from among the ordered components of the - * view. For a hash table it would be sample(limit) since range based - * operations are not efficient. - * <p> - * This should accept an index, not a predicate (for RDF we determine the - * index an analysis of the bound and unbound arguments on the predicate - * and always have a good index, but this is not true in the general - * case). When the index is remote, it should be executed at the remote - * index. - * - * @todo This needs to operation on element chunks, not {@link IBindingSet} - * chunks. It also may not require pipelining. - */ -abstract public class AbstractSampleIndex<E> extends AbstractPipelineOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Known annotations. - */ - public interface Annotations extends BOp.Annotations { - /** - * The sample limit. - */ - String LIMIT = "limit"; - } - - protected AbstractSampleIndex(final IPredicate<E> pred, final int limit) { - - super(new BOp[] { pred }, NV.asMap(new NV[] {// - new NV(Annotations.LIMIT, Integer.valueOf(limit)) // - })); - - if (pred == null) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - } - - @SuppressWarnings("unchecked") - public IPredicate<E> pred() { - - return (IPredicate<E>) args[0]; - - } - - public int limit() { - - return (Integer) annotations.get(Annotations.LIMIT); - - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -31,64 +31,25 @@ import java.util.List; import java.util.Map; - /** - * An operator, such as a constant, variable, join, sort, filter, etc. Operators - * are organized in a tree of operators. The arity of an operator is the number - * of children declared by that operator class. The children of an operator are - * themselves operators and traversal is supported between a parent and its - * children. In addition to their arguments, operators may have a variety of - * annotations, including those specific to an operator (such as the maximum - * number of iterators for a closure operator), those shared by many operators - * (such as set of variables which are selected by a join or distributed hash - * table), or those shared by all operators (such as a cost model). + * An operator, such as a constant, variable, join, sort, filter, etc. * <p> - * Operators are mutable, thread-safe, {@link Serializable} to facilitate - * distributed computing, and {@link Cloneable} to facilitate non-destructive - * tree rewrites. + * Operators are organized in a tree of operators. The <i>arity</i> of an + * operator is the number of child operands declared by that operator class. The + * children of an operator are themselves operators. Parents reference their + * children, but back references to the parents are not maintained. * <p> - * What follows is a summary of some of the more important kinds of operations. - * For each type of operation, there may be several implementations. One common - * way in which implementations of the same operator may differ is whether they - * are designed for low-volume selective queries or high volume unselective - * queries. - * <dl> - * <dt>JOINs</dt> - * <dd></dd> - * <dt>Mapping binding sets across shards (key-range partitions) or nodes (hash - * partitioned)</dt> - * <dd></dd> - * <dt>Predicates and access paths</dt> - * <dd></dd> - * <dt>SORT</dt> - * <dd></dd> - * <dt>DISTINCT</dt> - * <dd></dd> - * <dt>Element filters</dt> - * <dd></dd> - * <dt>Rule constraints</dt> - * <dd></dd> - * <dt>Binding set filters (removing binding sets which are not required outside - * of some context)</dt> - * <dd></dd> - * <dt>Identifiers for sinks to which binding sets can be written and - * conditional routing of binding sets, for example based on variable value or - * type or join success or failure</dt> - * <dd></dd> - * <dt>Sequential or iterative programs.</dt> - * <dd></dd> - * <dt>Creating or destroying transient or persistent resources (graphs, tables, - * DHTs, etc). Such life cycle operators dominate the subtree within which the - * resource will be utilized.</dt> - * <dd></dd> - * <dt>Export of proxy objects, especially for query or mutation buffers.</dt> - * <dd></dd> - * </dl> + * In addition to their arguments, operators may have a variety of annotations, + * including those specific to an operator (such as the maximum number of + * iterators for a closure operator), those shared by many operators (such as + * set of variables which are selected by a join or distributed hash table), or + * those shared by all operators (such as a cost model). + * <p> + * Operators are immutable, {@link Serializable} to facilitate distributed + * computing, and {@link Cloneable} to facilitate non-destructive tree rewrites. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo javadoc review and reconcile with notes. */ public interface BOp extends Cloneable, Serializable { @@ -106,79 +67,7 @@ * @return The argument. */ BOp get(int index); - -// /** -// * Bind an argument of the operation. -// * -// * @param index -// * The argument index in [0:{@link #arity()}-1]. -// * @param newValue -// * The bound value. -// * -// * @return A copy of the operation having the bound value for the argument. -// */ -// BOp<?> setArg(int index, BOp<?> newValue); - -// /** -// * Return the value of the named operator annotation. -// * -// * @param name -// * The annotation name. -// * -// * @return The value of the named operator annotation. -// */ -// Object getProperty(String name); - -// /** -// * Set the value of the named operator annotation. -// * -// * @param name -// * The annotation name. -// * @param newValue -// * The new value for the named annotation, -// * -// * @return The old value of the named operator annotation. -// */ -// Object setProperty(String name,Object newValue); -// /** -// * Return the type constraint on the specified argument. -// * -// * @param index -// * The argument index in [0:{@link #arity()}-1]. -// * -// * @return The type constraint on that argument. -// */ -// Class<?> getArgType(int index); - -// /** -// * The type of the values produced by the operation (Constant or variable, -// * primitive?, relation, triple store, index, file, bat, ...). -// */ -// Class<T> getResultType(); - -// /** -// * @TODO There needs to be some simple evaluation path for things such as -// * native SPARQL operations. This is currently -// * {@link IConstraint#accept(IBindingSet)}, which returns a truth -// * value. This seems quite adequate. -// */ -// boolean accept(IBindingSet bset); - -// /** -// * The #of arguments to this operation which are variables. This method does -// * not report on variables in child nodes nor on variables in attached -// * {@link IConstraint}, etc. -// */ -// int getVariableCount(); - -// /** -// * Return an iterator visiting those arguments of this operator which are -// * variables. This method does not report on variables in child nodes nor on -// * variables in attached {@link IConstraint}, etc. -// */ -// Iterator<IVariable<?>> getVariables(); - /** * The operator's arguments. */ Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Distinct.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Distinct.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Distinct.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -1,150 +0,0 @@ -package com.bigdata.bop; - -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; - -import com.bigdata.btree.keys.KeyBuilder; -import com.bigdata.rdf.relation.rule.BindingSetSortKeyBuilder; -import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.striterator.DistinctFilter; -import com.bigdata.striterator.IChunkConverter; - -/** - * A DISTINCT operator based on a hash table. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <E> - * - * @todo could have an implementation backed by a persistent hash map using an - * extensible hash function to automatically grow the persistence store. - * This could be a general purpose persistent hash functionality, but it - * could also operate against a temporary file when used in the context of - * a query (the backing file can be destroyed afterwards or the data can - * be simply written onto the current temporary store). - * - * @todo Consider the use of lock amortization (batching) to reduce contention - * for the backing map. Alternatively, we could accept entire blocks of - * elements from a single source at a time, which would single thread us - * through the map. Or bound the #of threads hitting the map at once, - * increase the map concurrency level, etc. - * - * @todo Reconcile with {@link IChunkConverter} and {@link DistinctFilter}. - */ -public class Distinct<E> -extends AbstractBOp -//extends AbstractChunkedIteratorOp<E> -//implements IElementFilter<E>, -// implements IConstraint, -// implements ChunkedIteratorOp<E> -{ - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends BOp.Annotations { - - String INITIAL_CAPACITY = "initialCapacity"; - - String LOAD_FACTOR = "loadFactor"; - - String CONCURRENCY_LEVEL = "concurrencyLevel"; - - } - - public Distinct(final IVariable<?>[] distinctList, final UUID masterUUID) { - - super(distinctList, NV.asMap(new NV[] { new NV(Annotations.QUERY_ID, - masterUUID), - // new NV(Annotations.BOP_ID, bopId) - })); - - if (masterUUID == null) - throw new IllegalArgumentException(); - - } - -// public Future<Void> eval(final IBigdataFederation<?> fed, -// final IJoinNexus joinNexus, final IBlockingBuffer<E[]> buffer) { -// -// final FutureTask<Void> ft = new FutureTask<Void>(new DHTTask(joinNexus, -// buffer)); -// -// joinNexus.getIndexManager().getExecutorService().execute(ft); -// -// return ft; -// -// } - - /** - * Task executing on the node. - */ - private class DHTTask implements Callable<Void> { - - private final IJoinNexus joinNexus; - - private final IBlockingBuffer<E[]> buffer; - - private final ConcurrentHashMap<byte[], Void> map; - - /* Note: This is NOT thread safe! */ - private final BindingSetSortKeyBuilder sortKeyBuilder; - - DHTTask(final IJoinNexus joinNexus, - final IBlockingBuffer<E[]> buffer) { - - this.joinNexus = joinNexus; - - this.buffer = buffer; - - final IVariable<?>[] vars = ((BOpList) args[0/* distinctList */]) - .toArray(new IVariable[0]); - - this.sortKeyBuilder = new BindingSetSortKeyBuilder(KeyBuilder - .newInstance(), vars); - - this.map = new ConcurrentHashMap<byte[], Void>(/* - * @todo initialCapacity using annotations - * @todo loadFactor ... - * @todo concurrencyLevel ... - */); - } - - private boolean accept(final IBindingSet bset) { - - return map.putIfAbsent(sortKeyBuilder.getSortKey(bset), null) == null; - - } - - public Void call() throws Exception { - - /* - * FIXME Setup to drain binding sets from the source. Note that the - * sort key builder is not thread safe, so a pool of key builders - * with a non-default initial capacity (LT 1024) might be used to - * allow higher concurrency for key building. - * - * Alternatively, the caller could generate the keys (SOUNDS GOOD) - * and just ship the byte[] keys to the DHTFilter. - * - * The DHTFilter needs to send back its boolean[] responses bit - * coded or run length coded. See AbstractArrayIndexProcedure which - * already does some of that. Those responses should move through - * NIO Buffers just like everything else, but the response will be - * much smaller than the incoming byte[][] (aka IRaba). - */ - throw new UnsupportedOperationException(); - - } - - } - - // public ResultBitBuffer bulkFilter(final K[] elements) { - // - // } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -155,7 +155,7 @@ public IBindingSet clone(); /** - * Return a shallow copy of the binding set, eliminating unecessary + * Return a shallow copy of the binding set, eliminating unnecessary * variables. */ public IBindingSet copy(IVariable[] variablesToKeep); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IElement.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IElement.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IElement.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -0,0 +1,59 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 19, 2010 + */ + +package com.bigdata.bop; + +import com.bigdata.relation.rule.eval.IJoinNexus; + +/** + * An interface for exposing the data in an object view of a tuple by index + * position. This facilitates binding values elements read from an access path + * onto binding sets during join processing. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @see IPredicate#get(Object, int) + * @see IJoinNexus#bind(IPredicate, IConstraint, Object, IBindingSet) + */ +public interface IElement { + + /** + * Return the value at the specified index. + * + * @param index + * The index. + * + * @return The value at the specified index. + * + * @throws IllegalArgumentException + * if the index is less than zero or GTE the #of fields defined + * for the element. + */ + public Object get(int index); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IElement.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -298,7 +298,7 @@ * this method in the context of the "schema" imposed by the predicate. * * @param e - * The element. + * The element, which must implement {@link IElement}. * @param index * The index. * @@ -307,9 +307,12 @@ * @throws UnsupportedOperationException * If this operation is not supported by the {@link IPredicate} * implementation or for the given element type. + * + * @deprecated by {@link IElement#get(int)} which does exactly what this + * method is trying to do. */ public IConstant<?> get(E e, int index); - + /** * A copy of this {@link IPredicate} in which zero or more variables have * been bound to constants using the given {@link IBindingSet}. @@ -317,6 +320,27 @@ public IPredicate<E> asBound(IBindingSet bindingSet); /** + * Extract the as bound value from the predicate. When the predicate is not + * bound at that index, the value of the variable is taken from the binding + * set. + * + * @param index + * The index into that predicate. + * @param bindingSet + * The binding set. + * + * @return The bound value -or- <code>null</code> if no binding is available + * (the predicate is not bound at that index and the variable at + * that index in the predicate is not bound in the binding set). + * + * @throws IndexOutOfBoundsException + * unless the <i>index</i> is in [0:{@link #arity()-1], inclusive. + * @throws IllegalArgumentException + * if the <i>bindingSet</i> is <code>null</code>. + */ + public Object asBound(int index, IBindingSet bindingSet); + + /** * A copy of this {@link IPredicate} in which the <i>relationName</i>(s) * replace the existing set of relation name(s). * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -129,7 +129,7 @@ * the {@link IJoinNexus#getIndexManager()} returns the * {@link IBigdataFederation} since each read would use RMI. This * condition should be checked by the operator implementation. - * @param buffer + * @param sink * Where to write the output of the operator. * * @return The {@link Future} for the operator's evaluation. @@ -137,6 +137,6 @@ * @todo return the execution statistics here? Return Void? */ Future<Void> eval(IBigdataFederation<?> fed, IJoinNexus joinNexus, - IBlockingBuffer<E[]> buffer); + IBlockingBuffer<E[]> sink); } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalBTree.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalBTree.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalBTree.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -1,103 +0,0 @@ -package com.bigdata.bop; - -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; - -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.service.IBigdataFederation; - -/** - * Sampling operator for an {@link AbstractBTree}. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalBTree<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalBTree(final IPredicate<E> pred, final int limit) { - - super(pred, limit); - - } - - public Future<Void> eval(final IBigdataFederation<?> fed, - final IJoinNexus joinNexus, final IBlockingBuffer<E[]> buffer) { - - if (pred().getPartitionId() != -1) { - // Must not be specific to a shard. - throw new UnsupportedOperationException(); - } - - final FutureTask<Void> ft = new FutureTask<Void>(new LocalBTreeSampleTask( - joinNexus, buffer)); - - joinNexus.getIndexManager().getExecutorService().execute(ft); - - return ft; - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalBTreeSampleTask implements - Callable<Void> { - - private final IJoinNexus joinNexus; - - private final IBlockingBuffer<E[]> buffer; - - LocalBTreeSampleTask(final IJoinNexus joinNexus, - final IBlockingBuffer<E[]> buffer) { - - this.joinNexus = joinNexus; - - this.buffer = buffer; - - } - - public Void call() throws Exception { - - /* - * FIXME Decide how we are going to resolve the appropriate index - * for the predicate. This could go through - * IJoinNexus.getTailRelationView() and - * IJoinNexus.getTailAccessPath(). Those are just going through the - * locator. Review how the actual access path is selected versus the - * IKeyOrder specified on the IPredicate. If the IKeyOrder of - * interest is on the IPredicate, then why not just use that? - */ - -// final IPredicate<E> pred = pred(); -// -// final String relationName = pred.getOnlyRelationName(); -// -// final IRelation<E> rel = (IRelation<E>) joinNexus.getIndexManager() -// .getResourceLocator().locate(relationName, -// joinNexus.getReadTimestamp()); -// -// final IAccessPath<E> accessPath = rel.getAccessPath(pred); - - /* - * FIXME Sample N randomly chosen indices or evenly selected? - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalBTreeSampleTask - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalShard.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalShard.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/SampleLocalShard.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -1,94 +0,0 @@ -package com.bigdata.bop; - -import java.util.concurrent.Callable; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; - -import com.bigdata.btree.AbstractBTree; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.service.IBigdataFederation; - -/** - * Sampling operator for a shard view. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ -public class SampleLocalShard<E> extends AbstractSampleIndex<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public SampleLocalShard(final IPredicate<E> pred, final int limit) { - - super(pred,limit); - - } - - /* - * Note: This is done at evaluation time, local to the data. - */ - public Future<Void> eval(final IBigdataFederation<?> fed, - final IJoinNexus joinNexus, final IBlockingBuffer<E[]> buffer) { - - if (pred().getPartitionId() == -1) { - // Must be specific to a shard. - throw new UnsupportedOperationException(); - } - - final FutureTask<Void> ft = new FutureTask<Void>(new LocalShardSampleTask( - joinNexus, buffer)); - - joinNexus.getIndexManager().getExecutorService().execute(ft); - - return ft; - - } - - /** - * Sample an {@link AbstractBTree}. - */ - private class LocalShardSampleTask implements Callable<Void> { - - private final IJoinNexus joinNexus; - private final IBlockingBuffer<E[]> buffer; - - LocalShardSampleTask(final IJoinNexus joinNexus, - final IBlockingBuffer<E[]> buffer) { - - this.joinNexus = joinNexus; - - this.buffer = buffer; - - } - - public Void call() throws Exception { - - final IPredicate<E> pred = pred(); - - final IRelation<E> view = joinNexus.getTailRelationView(pred); - - final IAccessPath<E> accessPath = view.getAccessPath(pred); - - /* - * FIXME Sample N tuples based on a uniform offset distribution, - * discarding duplicates or tuples which are deleted in their - * most recent revision. - * - * Note: If there are only 100 leaves and we sample evenly, that - * could result in reading all the leaves. However, when the - * B+Tree is large we will only touch a few leaves even with - * uniform sampling. - */ - throw new UnsupportedOperationException(); - - } - - } // class LocalShardSampleTask - -} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html 2010-08-19 21:07:19 UTC (rev 3452) @@ -0,0 +1,17 @@ +<html> +<head> +<title>distinct, sort, and aggregation operators</title> +</head> +<body> + +<p> + + This package provides distinct, sort, and aggregation operators. All of + these are potentially high volume hash partitioned operations against a + clustered database. Both in memory and disk based versions of the each + operator should be implemented. + +</p> + +</body> +</html> \ No newline at end of file Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -0,0 +1,159 @@ +package com.bigdata.bop.ap; + +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; + +import com.bigdata.bop.AbstractBOp; +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpList; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.NV; +import com.bigdata.btree.keys.KeyBuilder; +import com.bigdata.rdf.relation.rule.BindingSetSortKeyBuilder; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.rule.eval.IJoinNexus; +import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.striterator.DistinctFilter; +import com.bigdata.striterator.IChunkConverter; +import com.bigdata.striterator.MergeFilter; + +/** + * A DISTINCT operator based on a hash table. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * @param <E> + * + * @todo could have an implementation backed by a persistent hash map using an + * extensible hash function to automatically grow the persistence store. + * This could be a general purpose persistent hash functionality, but it + * could also operate against a temporary file when used in the context of + * a query (the backing file can be destroyed afterwards or the data can + * be simply written onto the current temporary store). + * + * @todo Consider the use of lock amortization (batching) to reduce contention + * for the backing map. Alternatively, we could accept entire blocks of + * elements from a single source at a time, which would single thread us + * through the map. Or bound the #of threads hitting the map at once, + * increase the map concurrency level, etc. + * + * @todo Reconcile with {@link IChunkConverter}, {@link DistinctFilter} (handles + * solutions) and {@link MergeFilter} (handles comparables). + */ +public class DistinctElementFilter<E> +extends AbstractBOp +//extends AbstractChunkedIteratorOp<E> +//implements IElementFilter<E>, +// implements IConstraint, +// implements ChunkedIteratorOp<E> +{ + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BOp.Annotations { + + String INITIAL_CAPACITY = "initialCapacity"; + + String LOAD_FACTOR = "loadFactor"; + + String CONCURRENCY_LEVEL = "concurrencyLevel"; + + } + + public DistinctElementFilter(final IVariable<?>[] distinctList, final UUID masterUUID) { + + super(distinctList, NV.asMap(new NV[] { new NV(Annotations.QUERY_ID, + masterUUID), + // new NV(Annotations.BOP_ID, bopId) + })); + + if (masterUUID == null) + throw new IllegalArgumentException(); + + } + +// public Future<Void> eval(final IBigdataFederation<?> fed, +// final IJoinNexus joinNexus, final IBlockingBuffer<E[]> buffer) { +// +// final FutureTask<Void> ft = new FutureTask<Void>(new DHTTask(joinNexus, +// buffer)); +// +// joinNexus.getIndexManager().getExecutorService().execute(ft); +// +// return ft; +// +// } + + /** + * Task executing on the node. + */ + private class DHTTask implements Callable<Void> { + + private final IJoinNexus joinNexus; + + private final IBlockingBuffer<E[]> buffer; + + private final ConcurrentHashMap<byte[], Void> map; + + /* Note: This is NOT thread safe! */ + private final BindingSetSortKeyBuilder sortKeyBuilder; + + DHTTask(final IJoinNexus joinNexus, + final IBlockingBuffer<E[]> buffer) { + + this.joinNexus = joinNexus; + + this.buffer = buffer; + + final IVariable<?>[] vars = ((BOpList) args[0/* distinctList */]) + .toArray(new IVariable[0]); + + this.sortKeyBuilder = new BindingSetSortKeyBuilder(KeyBuilder + .newInstance(), vars); + + this.map = new ConcurrentHashMap<byte[], Void>(/* + * @todo initialCapacity using annotations + * @todo loadFactor ... + * @todo concurrencyLevel ... + */); + } + + private boolean accept(final IBindingSet bset) { + + return map.putIfAbsent(sortKeyBuilder.getSortKey(bset), null) == null; + + } + + public Void call() throws Exception { + + /* + * FIXME Setup to drain binding sets from the source. Note that the + * sort key builder is not thread safe, so a pool of key builders + * with a non-default initial capacity (LT 1024) might be used to + * allow higher concurrency for key building. + * + * Alternatively, the caller could generate the keys (SOUNDS GOOD) + * and just ship the byte[] keys to the DHTFilter. + * + * The DHTFilter needs to send back its boolean[] responses bit + * coded or run length coded. See AbstractArrayIndexProcedure which + * already does some of that. Those responses should move through + * NIO Buffers just like everything else, but the response will be + * much smaller than the incoming byte[][] (aka IRaba). + */ + throw new UnsupportedOperationException(); + + } + + } + + // public ResultBitBuffer bulkFilter(final K[] elements) { + // + // } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -28,9 +28,13 @@ package com.bigdata.bop.ap; +import java.util.Map; + import com.bigdata.bop.AbstractChunkedOrderedIteratorOp; +import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; +import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; @@ -148,19 +152,17 @@ } + @SuppressWarnings("unchecked") public IVariableOrConstant get(final int index) { return (IVariableOrConstant<?>) args[index]; } - /** - * @todo there is no general means available to implement this method of an - * awareness of the internal structure of the element type. - */ + @SuppressWarnings("unchecked") public IConstant<?> get(final E e, final int index) { - throw new UnsupportedOperationException(); + return new Constant(((IElement) e).get(index)); } @@ -295,8 +297,30 @@ } + public Object asBound(final int index, final IBindingSet bindingSet) { + + if (bindingSet == null) + throw new IllegalArgumentException(); + + final IVariableOrConstant<?> t = get(index); + + final IConstant<?> c; + if (t.isVar()) { + + c = bindingSet.get((IVariable<?>) t); + + } else { + + c = (IConstant<?>) t; + + } + + return c == null ? null : c.get(); + + } + public Predicate<E> setRelationName(final String[] relationName) { - + throw new UnsupportedOperationException(); // return new Predicate<E>(this, relationName); @@ -348,65 +372,83 @@ sb.append("("); - sb.append(getOnlyRelationName()); - for (int i = 0; i < args.length; i++) { -// if (i > 0) + if (i > 0) sb.append(", "); final IVariableOrConstant<?> v = get(i); - sb.append(v.isConstant() || bindingSet == null - || !bindingSet.isBound((IVariable<?>) v) ? v.toString() - : bindingSet.get((IVariable<?>) v)); + sb.append(v.isConstant() ? v.toString() + : (v + "=" + (bindingSet == null ? null : bindingSet + .get((IVariable<?>) v)))); } sb.append(")"); - if (isOptional() || getConstraint() != null - || getSolutionExpander() != null || getPartitionId() != -1) { - - /* - * Something special, so do all this stuff. - */ - + if (!annotations.isEmpty()) { + sb.append("["); boolean first = true; - - sb.append("["); - - if (isOptional()) { + for (Map.Entry<String, Object> e : annotations.entrySet()) { if (!first) sb.append(", "); - sb.append("optional"); + sb.append(e.getKey() + "=" + e.getValue()); first = false; } - - if (getConstraint() != null) { - if (!first) - sb.append(", "); - sb.append(getConstraint().toString()); - first = false; - } - - if (getSolutionExpander() != null) { - if (!first) - sb.append(", "); - sb.append(getSolutionExpander().toString()); - first = false; - } - - if (getPartitionId() != -1) { - if (!first) - sb.append(", "); - sb.append("partitionId=" + getPartitionId()); - first = false; - } - sb.append("]"); - } + +// final String relationName = getOnlyRelationName(); +// final boolean optional = isOptional(); +// final IElementFilter<E> constraint = getConstraint(); +// final ISolutionExpander<E> solutionExpander = getSolutionExpander(); +// final int partitionId = getPartitionId(); +// +// if (optional || constraint != null || solutionExpander != null +// || partitionId != -1) { +// +// /* +// * Something special, so do all this stuff. +// */ +// +// boolean first = true; +// +// sb.append("["); +// +// sb.append(getOnlyRelationName()); +// +// if (isOptional()) { +// if (!first) +// sb.append(", "); +// sb.append("optional"); +// first = false; +// } +// +// if (getConstraint() != null) { +// if (!first) +// sb.append(", "); +// sb.append(getConstraint().toString()); +// first = false; +// } +// +// if (getSolutionExpander() != null) { +// if (!first) +// sb.append(", "); +// sb.append(getSolutionExpander().toString()); +// first = false; +// } +// +// if (getPartitionId() != -1) { +// if (!first) +// sb.append(", "); +// sb.append("partitionId=" + getPartitionId()); +// first = false; +// } +// +// sb.append("]"); +// +// } return sb.toString(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java 2010-08-19 21:07:19 UTC (rev 3452) @@ -53,6 +53,24 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * + * @todo I have some basic questions about the ability to use a UNION of two + * predicates in scale-out. I think that this might be more accurately + * modeled as the UNION of two joins. That is, rather than: + * + * <pre> + * JOIN( ..., + * UNION( foo.spo(A,loves,B), + * bar.spo(A,loves,B) ) + * ) + * </pre> + * using + * <pre> + * UNION( JOIN( ..., foo.spo(A,loves,B) ), + * JOIN( ..., bar.spo(A,loves,B) ) + * ) + * </pre> + * which would be a binding set union rather than an element union. + * * @todo This was historically handled by {@link RelationFusedView} which should * be removed when this class is implemented. * @@ -68,7 +86,7 @@ * @todo A similar operator could be defined where child operands to execute * concurrently and the result is no longer strongly ordered. * - * FIXME Implement the send/receive pattern. + * @todo Implement the send/receive pattern. * <p> * This COULD be done using {@link IRemoteChunkedIterator} if the send and * receive operators are appropriately decorated in order to pass the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bop-notes.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bop-notes.txt 2010-08-19 20:56:32 UTC (rev 3451) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bop-notes.txt 2010-08-19 21:07:19 UTC (rev 3452) @@ -1,3 +1,11 @@ +- Add IElement interface with Object:get(int index) to pull off the + fields from the element by index position. Use this to make + RDFJoinNexus#bind() and copyValues() generic. We can just do a cast + to IElement, but we could also change the generic type constraint on + IRelation from <E> to IRelation<E extends IElement>. But we can + just force the cast for now and not have to update all of those + generic constraints. + - Review annotation names and defaults. Make sure that the annotation names are all in appropriate namespaces. The namespaces should probably be the interface or class of the operator which defines @@ -93,12 +101,174 @@ .IOpN ..IPredicate(name,arg...)[shardId:int;optional:boolean;constraint:IConstraint[],expander] +- Distributed query execution pattern: + + The historical pipeline join propagated evaluation from left to + right. This needs to be revisited in now that we are dealing with + operator trees. Operator trees lend themselves naturally to top-down + evaluation. While I think that we can do top-down evaluation of the + operator tree for scaleup, the distributed query execution logic is + more complicated and top-down evaluation is not compatible with + distributed evaluation because joins must run for different shards + based on the partition identifier associated with each bindingSet[] + they receive. + + What we been doing is pushing binding sets along propagating joins + at specific shards onto nodes together with those binding sets. + This was a left-to-right evaluation strategy when the IRule was just + an ordered array of tails on which we needed to read. However, now + that we have an operator tree it would correspond to a bottom up + evaluation of a left-deep tree where the right operands were always + access path reads. That makes my head hurt just a bit when I + consider that the bottom up evaluation would also be "partial" as + binding sets appear. + + For a given incoming IBindingSet[] chunk we will continue to do + exactly what we have been doing, but the surrounding logic needs to + be replaced. Starting a "join" (at least for piplined scale-out) + needs to merely register a join task factory that will handle + binding sets as they arrive. + + I am thinking that the way to handle this is to send the query + identifier, join operation identifier, and partition identifier + along with the client's proxy object, the senders operator + identifier, and the senders's service identifier. If the node (a + data service) which receives that RMI message has not seen the query + yet it uses RMI against the client's proxy to fetch the query and + then "starts" the query on that node. Starting a query would merely + allow execution of the operators described in the query plan on the + node once they had sufficient data to run. The current pipeline + join is a chunk-wise nested index join. It runs one pass each time + it has a chunk of binding sets for some shard. Query termination + would be controlled by the client. It would instruct all nodes + known to be running the query to halt execution for that query. + Even if a node misses that message, it will find out when it tries + to send intermediate results to another node that the query was + cancelled. + + Per shard locks, tasks, and threads. + + When running within a single Journal, the query plan is executed by + one task which holds all of the necessary locks. Those locks are + acquired up front by an inspection of the query plan to determine + which indices are needed [actually, it may just be using the + unisolated read/write index and otherwise historical views w/o + locks.] + + There are issues relating to execution of the joins under the + concurrency manager, both in order to have access to the correct + IIndexManager and in order to managing reads and writes against the + unisolated indices by acquiring the appropriate locks. The way + things work right now the upstream join tests a cache for the + downstream join task for a given shard. If there is a cache miss, + it sends a factory task which uses a singleton pattern to start a + join task executing with the appropriate locks under the concurrency + manager and then returns the proxy for that join task to the caller. + This guarantees that each join task has the correct locks, but it + does so at the expense of running one thread per join task. It will + be difficult to get around this one task per shard per join + constraint without going directly to the lock manager with the shard + lock requests. [Or just submitting a per binding set chunk task to + the ConcurrencyManager, which might not be that bad if the chunks + are chunky.] + + ==> Given this, maybe it would be easiest to adapt the current join + execution to allow optional gotos by paying close attention to the + termination condition for the query? We could then refactor to + support BOPs within the same general control logic. A DISTINCT + filter could be yet another custom RMI thing layered directly into + the join logic. + + Rather than defining an eval() method for each operator, we have + standoff interpretation of the pipeline operators (whether for + binding sets, elements, or solutions). The query plan could + encapsulate the local versus distributed execution with annotations + on the operators rather than interposing operators and those + annotations would be used to wrap the sink with one which marshells + the outputs onto NIO buffers. + + - Pipelined chunk-wise nested index join. This is the existing + join algorithm. For each binding set chunk received on a node to + be joined with a given shard, we execute that chunk wise join and + emit the intermediate results. [When the join is optional, we + have an optional target and we send the binding sets which do not + join to that optional target.] + + - multi-block io pipelined join. This is a version of the piplined + chunk-wise nested index join which accumulates much larger chunks + (mega chunks) of binding sets (potentially all intermediate + results) and then computes the join of that using the + operator-at-a-time approach for that mega chunk. The tradeoff + between this join and the pure operator at a time join is that we + can keep the intermediate results off the disk using this + approach but we may have to read the shard multiple times. + + - operator-at-a-time shard wise multi-block-io join. This join + proceeds an operator at a time. Once the producer is done, it + computes the entire join using the intermediate results from the + prior join and a single multi-block IO pass over the shard view. + + A tuple read from the shard joins if there exists a binding set + which is consistent with that tuple. For example, given: + + :- ..., POS(A loves B), SPO(B loves C). + + and the following intermediate results from the POS shard: + + B0:[A=John, B=Mary, ...] + B1:[A=Mary, B=Paul, ...] + B2:[A=Paul, B=Leon, ...] + B3:[A=Leon, B=Paul, ...] + + and the following tuples read from the SPO shard: + + T0:(John loves Mary) + T1:(Mary loves Paul) + T2:(Paul loves Leon) + T3:(Leon loves Paul) + + then we have the following joins: + + (T2, B3) // T2:(Paul loves Leon) with B3:[A=Leon, B=Paul, ...]. + (T3, B2) // T3:(Leon loves Leon) with T2:[A=Paul, B=Leon, ...]. + + There are several ways to manipulate the intermediate results to + setup the join: + + Merge join: Merge sort the binding sets based on the relevant + bound values (A,B) and do an ordered scan of the binding sets and + the shard, emitting results which join. + + Hash join:... [truncated message content] |
From: <tho...@us...> - 2010-08-20 19:00:53
|
Revision: 3454 http://bigdata.svn.sourceforge.net/bigdata/?rev=3454&view=rev Author: thompsonbry Date: 2010-08-20 19:00:43 +0000 (Fri, 20 Aug 2010) Log Message: ----------- more simplification of access paths. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/view/FusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPathFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NestedSubqueryWithJoinThreadsTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/JoinTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/E.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/TempMagicStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/relation/rule/BindingSetSortKeyBuilder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/SimpleClosure.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestIRIS.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestMagicKeyOrderStrategy.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/magic/TestMagicStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPOAccessPath.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexusFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexusFactory.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicAccessPath.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -3,7 +3,7 @@ import com.bigdata.btree.ILocalBTreeView; import com.bigdata.journal.IIndexManager; import com.bigdata.rawstore.Bytes; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBuffer; @@ -74,12 +74,12 @@ /** * If the estimated rangeCount for an - * {@link AbstractAccessPath#iterator()} is LTE this threshold then use + * {@link AccessPath#iterator()} is LTE this threshold then use * a fully buffered (synchronous) iterator. Otherwise use an * asynchronous iterator whose capacity is governed by * {@link #CHUNK_OF_CHUNKS_CAPACITY}. */ - String FULLY_BUFFERED_READ_THRESHOLD = AbstractAccessPath.class + String FULLY_BUFFERED_READ_THRESHOLD = AccessPath.class .getName() + ".fullyBufferedReadThreadshold"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -34,7 +34,7 @@ import com.bigdata.mdi.PartitionLocator; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.IRule; @@ -146,7 +146,7 @@ * <p> * Note: The ability to specify an index partition identifier for a * predicate is provided in support of scale-out JOIN strategies. The - * {@link AbstractAccessPath} and the {@link JoinMasterTask} are both aware + * {@link AccessPath} and the {@link JoinMasterTask} are both aware * of this property. The {@link JoinMasterTask} sets the partition * identifier in order to request an access path backed by the name of the * local index object on a {@link DataService} rather than the name of the @@ -167,7 +167,7 @@ * predicate is not locked to a specific index partition. * * @see PartitionLocator - * @see AbstractAccessPath + * @see AccessPath * @see JoinMasterTask */ public int getPartitionId(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -527,6 +527,9 @@ final IRelation<E> relation = joinNexus .getTailRelationView(this/* predicate */); + if (relation == null) + throw new RuntimeException("Not found: " + getOnlyRelationName()); + return joinNexus.getTailAccessPath(relation, this/* predicate */) .iterator(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -70,7 +70,7 @@ import com.bigdata.journal.ITx; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.BufferClosedException; @@ -1327,7 +1327,7 @@ protected void reorderTasks(final AccessPathTask[] tasks) { // @todo layered access paths do not expose a fromKey. - if (tasks[0].accessPath instanceof AbstractAccessPath<?>) { + if (tasks[0].accessPath instanceof AccessPath<?>) { // reorder the tasks. Arrays.sort(tasks); @@ -1477,7 +1477,7 @@ */ protected byte[] getFromKey() { - return ((AbstractAccessPath<?>) accessPath).getFromKey(); + return ((AccessPath<?>) accessPath).getFromKey(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -65,7 +65,7 @@ import com.bigdata.journal.IJournal; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.BufferClosedException; @@ -621,6 +621,7 @@ this.variablesToKeep = joinOp.variablesToKeep(); this.joinNexus = joinNexus; this.relation = joinNexus.getTailRelationView(right); + this.sink = sink; this.partitionId = -1; /* * FIXME The partition identifier probably * needs to be passed in at evaluation time @@ -975,7 +976,7 @@ try { - sinkFuture.get(); +// sinkFuture.get(); FIXME control logic } catch (Throwable t) { @@ -995,7 +996,7 @@ sink.reset(); - sinkFuture.cancel(true/* mayInterruptIfRunning */); +// sinkFuture.cancel(true/* mayInterruptIfRunning */); FIXME Control logic. } @@ -1302,7 +1303,7 @@ protected void reorderTasks(final AccessPathTask[] tasks) { // @todo layered access paths do not expose a fromKey. - if (tasks[0].accessPath instanceof AbstractAccessPath<?>) { + if (tasks[0].accessPath instanceof AccessPath<?>) { // reorder the tasks. Arrays.sort(tasks); @@ -1453,7 +1454,7 @@ */ protected byte[] getFromKey() { - return ((AbstractAccessPath<?>) accessPath).getFromKey(); + return ((AccessPath<?>) accessPath).getFromKey(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/view/FusedView.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/view/FusedView.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/view/FusedView.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -64,7 +64,7 @@ import com.bigdata.counters.ICounterSet; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.mdi.LocalPartitionMetadata; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.service.MetadataService; import com.bigdata.service.Split; @@ -1457,7 +1457,7 @@ * index (if it exists). Normally false positives will be reported * directly to the specific bloom filter instance by the contains() or * lookup() method for that index. However, the - * {@link AbstractAccessPath} also tests the bloom filter and needs a + * {@link AccessPath} also tests the bloom filter and needs a * means to report false positives. It should be the only one that calls * this method on this implementation class. */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -83,7 +83,7 @@ return getNamespace() + "." + keyOrder.getIndexName(); } - + /** * Return the index for the {@link IKeyOrder} the timestamp for this view of * the relation. @@ -91,15 +91,15 @@ * @param keyOrder * The natural index order. * - * @return The index -or- <code>null</code> iff the index does not exist - * as of the timestamp for this view of the relation. + * @return The index -or- <code>null</code> iff the index does not exist as + * of the timestamp for this view of the relation. * * @see #getIndex(String) * - * FIXME For efficiency the concrete implementations need to override this - * saving a hard reference to the index and then using a switch like - * construct to return the correct hard reference. This behavior should be - * encapsulated. + * @todo For efficiency the concrete implementations need to override this + * saving a hard reference to the index and then use a switch like + * construct to return the correct hard reference. This behavior + * should be encapsulated. */ public IIndex getIndex(final IKeyOrder<? extends E> keyOrder) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractResource.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -39,6 +39,7 @@ import org.apache.log4j.Logger; +import com.bigdata.bop.BOp; import com.bigdata.config.Configuration; import com.bigdata.config.IValidator; import com.bigdata.config.IntegerValidator; @@ -52,7 +53,7 @@ import com.bigdata.rdf.rules.RuleFastClosure5; import com.bigdata.rdf.rules.RuleFastClosure6; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBuffer; @@ -102,6 +103,8 @@ * The capacity of the buffers accumulating chunks from concurrent producers. * * @see Options#CHUNK_OF_CHUNKS_CAPACITY + * + * @deprecated by {@link BOp} annotations. */ final public int getChunkOfChunksCapacity() { @@ -113,6 +116,8 @@ * The target chunk size. * * @see Options#CHUNK_CAPACITY + * + * @deprecated by {@link BOp} annotations. */ final public int getChunkCapacity() { @@ -126,6 +131,8 @@ * current chunk. This may be ZERO (0) to disable the chunk combiner. * * @see Options#CHUNK_TIMEOUT + * + * @deprecated by {@link BOp} annotations. */ public final long getChunkTimeout() { @@ -140,6 +147,8 @@ * read. * * @see Options#FULLY_BUFFERED_READ_THRESHOLD + * + * @deprecated by {@link BOp} annotations. */ public int getFullyBufferedReadThreshold() { @@ -152,6 +161,8 @@ * sequentially even when they are not flagged as a sequential program. * * @see Options#FORCE_SERIAL_EXECUTION + * + * @deprecated by {@link BOp} annotations. */ public boolean isForceSerialExecution() { @@ -166,6 +177,8 @@ * time to the {@link ExecutorService}. * * @see Options#MAX_PARALLEL_SUBQUERIES + * + * @deprecated by {@link BOp} annotations. */ public int getMaxParallelSubqueries() { @@ -178,6 +191,10 @@ * applied. Otherwise the {@link JoinMasterTask} is applied. * * @see Options#NESTED_SUBQUERY + * + * @deprecated by {@link BOp} annotations and the pipeline join, which + * always does better than the older nested subquery evaluation + * logic. */ public boolean isNestedSubquery() { @@ -215,12 +232,14 @@ * producers if the producers are generating small chunks, e.g., because * there are few solutions for a join subquery. * </p> + * @deprecated by {@link BOp} annotations. */ String CHUNK_OF_CHUNKS_CAPACITY = BlockingBuffer.class.getName() + ".chunkOfChunksCapacity"; /** - * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = "1000"; @@ -233,6 +252,7 @@ * </p> * * @see #CHUNK_OF_CHUNKS_CAPACITY + * @deprecated by {@link BOp} annotations. */ String CHUNK_CAPACITY = IBuffer.class.getName() + ".chunkCapacity"; @@ -241,6 +261,8 @@ * <p> * Note: This used to be 20k, but chunks of chunks works better than * just a large chunk. + * + * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_CAPACITY = "100"; @@ -249,6 +271,8 @@ * for another chunk to combine with the current chunk before returning * the current chunk (default {@link #DEFAULT_CHUNK_TIMEOUT}). This may * be ZERO (0) to disable the chunk combiner. + * + * @deprecated by {@link BOp} annotations. */ String CHUNK_TIMEOUT = BlockingBuffer.class.getName() + ".chunkTimeout"; @@ -256,24 +280,28 @@ * The default for {@link #CHUNK_TIMEOUT}. * * @todo this is probably much larger than we want. Try 10ms. + * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_TIMEOUT = "1000"; - + /** * If the estimated rangeCount for an - * {@link AbstractAccessPath#iterator()} is LTE this threshold then use + * {@link AccessPath#iterator()} is LTE this threshold then use * a fully buffered (synchronous) iterator. Otherwise use an * asynchronous iterator whose capacity is governed by * {@link #CHUNK_OF_CHUNKS_CAPACITY}. + * + * @deprecated by {@link BOp} annotations. */ - String FULLY_BUFFERED_READ_THRESHOLD = AbstractAccessPath.class + String FULLY_BUFFERED_READ_THRESHOLD = AccessPath.class .getName() + ".fullyBufferedReadThreadshold"; /** - * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD} + * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD} * * @todo figure out how good this value is. + * @deprecated by {@link BOp} annotations. */ String DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = ""+20*Bytes.kilobyte32; @@ -286,18 +314,17 @@ * {@link AbstractTripleStore}. and should be relocated. * <P> * The {@link #CLOSURE_CLASS} option defaults to - * {@link FastClosure}, which has very little possible - * parallelism (it is mostly a sequential program by nature). For - * that reason, {@link #FORCE_SERIAL_EXECUTION} defaults to - * <code>false</code> since the overhead of parallel execution - * is more likely to lower the observed performance with such - * limited possible parallelism. However, when using - * {@link FullClosure} the benefits of parallelism MAY justify its - * overhead. + * {@link FastClosure}, which has very little possible parallelism + * (it is mostly a sequential program by nature). For that reason, + * {@link #FORCE_SERIAL_EXECUTION} defaults to <code>false</code> + * since the overhead of parallel execution is more likely to + * lower the observed performance with such limited possible + * parallelism. However, when using {@link FullClosure} the + * benefits of parallelism MAY justify its overhead. * <p> * The following data are for LUBM datasets. * - * <pre> + * <pre> * U1 Fast Serial : closure = 2250ms; 2765, 2499, 2530 * U1 Fast Parallel : closure = 2579ms; 2514, 2594 * U1 Full Serial : closure = 10437ms. @@ -309,18 +336,19 @@ * U10 Full Parallel : closure = 248550ms. * </pre> * - * Note that the only rules in the fast closure program that have - * potential parallelism are {@link RuleFastClosure5} and - * {@link RuleFastClosure6} and these rules are not being triggered by - * these datasets, so there is in fact NO potential parallelism (in the - * data) for these datasets. - * <p> - * It is possible that a machine with more cores would perform better - * under the "full" closure program with parallel rule execution (these - * data were collected on a laptop with 2 cores) since performance tends - * to be CPU bound for small data sets. However, the benefit of the - * "fast" closure program is so large that there is little reason to - * consider parallel rule execution for the "full" closure program. + * Note that the only rules in the fast closure program that have + * potential parallelism are {@link RuleFastClosure5} and + * {@link RuleFastClosure6} and these rules are not being + * triggered by these datasets, so there is in fact NO potential + * parallelism (in the data) for these datasets. + * <p> + * It is possible that a machine with more cores would perform + * better under the "full" closure program with parallel rule + * execution (these data were collected on a laptop with 2 cores) + * since performance tends to be CPU bound for small data sets. + * However, the benefit of the "fast" closure program is so large + * that there is little reason to consider parallel rule execution + * for the "full" closure program. * * @todo collect new timings for this option. The LUBM performance has * basically doubled since these data were collected. Look further @@ -328,10 +356,14 @@ * parallelism and also for when rule parallelism is not enabled. * * @todo rename as parallel_rule_execution. + * @deprecated by {@link BOp} annotations. */ String FORCE_SERIAL_EXECUTION = ProgramTask.class.getName() + ".forceSerialExecution"; - + + /** + * @deprecated by {@link BOp} annotations. + */ String DEFAULT_FORCE_SERIAL_EXECUTION = "true"; /** @@ -348,10 +380,14 @@ * currently imposed by a per {@link JoinTask} * {@link ExecutorService}, which must be explicitly enabled in * the code). + * @deprecated by {@link BOp} annotations. */ String MAX_PARALLEL_SUBQUERIES = ProgramTask.class.getName() + ".maxParallelSubqueries"; - + + /** + * @deprecated by {@link BOp} annotations. + */ String DEFAULT_MAX_PARALLEL_SUBQUERIES = "5"; /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -28,7 +28,6 @@ package com.bigdata.relation; -import java.util.Iterator; import java.util.Set; import java.util.concurrent.ExecutorService; @@ -63,22 +62,12 @@ * The {@link IIndexManager} for the {@link IRelation}. */ public IIndexManager getIndexManager(); - + /** - * The service used to run asynchronous or parallel tasks for the {@link IRelation}. + * The service used to run asynchronous or parallel tasks for the + * {@link IRelation}. */ public ExecutorService getExecutorService(); - -// /** -// * The #of elements in the relation. -// * -// * @param exact -// * When <code>true</code> an exact count is reported. An exact -// * count will require a key-range scan if delete markers are in -// * use, in which case it will be more expensive. See -// * {@link IRangeQuery}. -// */ -// long getElementCount(boolean exact); /** * Return the best {@link IAccessPath} for a relation given a predicate with @@ -103,8 +92,8 @@ * with it to be evaluated local to the data. * <p> * Note: Filters should be specified when the {@link IAccessPath} is - * constructed so that they will be evaluated on the data service rather than - * materializing the elements and then filtering then. This can be + * constructed so that they will be evaluated on the data service rather + * than materializing the elements and then filtering then. This can be * accomplished by adding the filter as a constraint on the predicate when * specifying the access path. * @@ -119,6 +108,30 @@ // IAccessPath<E> getAccessPathForIndexPartition(IIndexManager indexManager, IPredicate<E> predicate); /** + * The fully qualified name of the index. + * + * @param keyOrder + * The natural index order. + * + * @return The index name. + */ + String getFQN(IKeyOrder<? extends E> keyOrder); + + /** + * Return the index for the {@link IKeyOrder} the timestamp for this view of + * the relation. + * + * @param keyOrder + * The natural index order. + * + * @return The index -or- <code>null</code> iff the index does not exist as + * of the timestamp for this view of the relation. + * + * @see #getIndex(String) + */ + IIndex getIndex(IKeyOrder<? extends E> keyOrder); + + /** * Return the fully qualified name of each index maintained by this * relation. * @@ -132,15 +145,20 @@ * New methods. */ + /** + * Return the {@link IKeyOrder} for the primary index for the relation. + */ + IKeyOrder<E> getPrimaryKeyOrder(); + // /** // * Return the {@link IKeyOrder}s corresponding to the registered indices for -// * this relation. +// * this relation. [rather than getIndexNames?] // */ // Iterator<IKeyOrder<E>> getKeyOrders(); // // /** // * Return the {@link IKeyOrder} for the predicate corresponding to the -// * perfect access path. A perfect access path is one where the bound values +// * perfect (best?) access path. A perfect access path is one where the bound values // * in the predicate form a prefix in the key space of the corresponding // * index. // * @@ -151,19 +169,7 @@ // * access path for that predicate. // */ // IKeyOrder<E> getKeyOrder(IPredicate<E> p); -// -// /** -// * Return the {@link IKeyOrder} for the primary index for the relation. -// */ -// IKeyOrder<E> getPrimaryKeyOrder(); -// -// /** -// * Return the primary index for the relation. -// * -// * @todo how about getIndex(IKeyOrder) instead? -// */ -// IIndex getPrimaryIndex(); - + /* * End new methods. */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -4,20 +4,27 @@ import java.util.Set; import java.util.concurrent.ExecutorService; +import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.btree.IIndex; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.TemporaryStore; -import com.bigdata.relation.accesspath.AbstractAccessPath; +import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.AccessPathFusedView; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.striterator.IKeyOrder; /** * A factory for fused views reading from both of the source {@link IRelation}s. - * + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * @param <E> + * + * @deprecated by {@link BOp}s using the UNION of JOINs. However, also note that + * this is only used for TM and that the focus store is always local + * for TM. */ public class RelationFusedView<E> implements IRelation<E> { @@ -66,11 +73,11 @@ } - public IAccessPath<E> getAccessPath(IPredicate<E> predicate) { + public IAccessPath<E> getAccessPath(final IPredicate<E> predicate) { return new AccessPathFusedView<E>(// - (AbstractAccessPath<E>)relation1.getAccessPath(predicate),// - (AbstractAccessPath<E>)relation2.getAccessPath(predicate)// + (AccessPath<E>)relation1.getAccessPath(predicate),// + (AccessPath<E>)relation2.getAccessPath(predicate)// ); } @@ -138,6 +145,15 @@ } + /** + * The value for the first relation in the view. + */ + public IKeyOrder<E> getPrimaryKeyOrder() { + + return relation1.getPrimaryKeyOrder(); + + } + /* * Note: These methods can not be implemented for the fused view. */ @@ -180,4 +196,12 @@ } + public String getFQN(IKeyOrder<? extends E> keyOrder) { + throw new UnsupportedOperationException(); + } + + public IIndex getIndex(IKeyOrder<? extends E> keyOrder) { + throw new UnsupportedOperationException(); + } + } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java 2010-08-20 16:14:13 UTC (rev 3453) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractAccessPath.java 2010-08-20 19:00:43 UTC (rev 3454) @@ -1,1313 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Jun 19, 2008 - */ - -package com.bigdata.relation.accesspath; - -import java.util.Iterator; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.RejectedExecutionException; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.IPredicate; -import com.bigdata.btree.BytesUtil; -import com.bigdata.btree.IBloomFilter; -import com.bigdata.btree.IIndex; -import com.bigdata.btree.ILocalBTreeView; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.btree.ITuple; -import com.bigdata.btree.ITupleIterator; -import com.bigdata.btree.Tuple; -import com.bigdata.btree.filter.FilterConstructor; -import com.bigdata.btree.filter.IFilterConstructor; -import com.bigdata.btree.filter.ITupleFilter; -import com.bigdata.btree.filter.TupleFilter; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.journal.IIndexManager; -import com.bigdata.journal.TimestampUtility; -import com.bigdata.mdi.LocalPartitionMetadata; -import com.bigdata.relation.AbstractResource; -import com.bigdata.relation.IRelation; -import com.bigdata.service.IDataService; -import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.ChunkedWrappedIterator; -import com.bigdata.striterator.EmptyChunkedIterator; -import com.bigdata.striterator.IChunkedIterator; -import com.bigdata.striterator.IChunkedOrderedIterator; -import com.bigdata.striterator.IKeyOrder; - -import cutthecrap.utils.striterators.Striterator; - -/** - * Abstract base class for type-specific {@link IAccessPath} implementations. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param R - * The generic type of the [R]elation elements of the - * {@link IRelation}. - * - * @todo This needs to be more generalized so that you can use a index that is - * best without being optimal by specifying a low-level filter to be - * applied to the index. That requires a means to dynamically filter out - * the elements we do not want from the key-range scan - the filtering - * should of course be done at the {@link IDataService}. - * - * FIXME Rename since no longer abstract! - */ -public class AbstractAccessPath<R> implements IAccessPath<R> { - - static final protected Logger log = Logger.getLogger(IAccessPath.class); - - /** Access to the index, resource locator, executor service, etc. */ - protected final IIndexManager indexManager; - - /** Timestamp of the view. */ - protected final long timestamp; - - /** Predicate (the resource name on the predicate is the relation namespace). */ - protected final IPredicate<R> predicate; - - /** - * The description of the index partition iff the {@link #predicate} is - * constrained to an index partition and <code>null</code> otherwise. - */ - final LocalPartitionMetadata pmd; - - /** - * Index order (the relation namespace plus the index order and the option - * partitionId constraint on the predicate identify the index). - */ - protected final IKeyOrder<R> keyOrder; - - /** The index. */ - protected final IIndex ndx; - - /** Iterator flags. */ - protected final int flags; - protected final int chunkOfChunksCapacity; - protected final int chunkCapacity; - protected final int fullyBufferedReadThreshold; - - private final boolean isFullyBoundForKey; - - /** - * <code>true</code> iff all elements in the predicate which are required - * to generate the key are bound to constants. - */ - public boolean isFullyBoundForKey() { - - return isFullyBoundForKey; - - } - - /** - * @see AbstractResource#getChunkCapacity() - */ - public int getChunkCapacity() { - - return chunkCapacity; - - } - - /** - * @see AbstractResource#getChunkOfChunksCapacity() - */ - public int getChunkOfChunksCapacity() { - - return chunkOfChunksCapacity; - - } - - /** - * The maximum <em>limit</em> that is allowed for a fully-buffered read. - * The {@link #asynchronousIterator(Iterator)} will always be used above - * this limit. - */ - protected static final int MAX_FULLY_BUFFERED_READ_LIMIT = 250000; - - /** - * We cache some stuff for historical reads. - * <p> - * Note: We cache results on a per-{@link IAccessPath} basis rather than a - * per-{@link IIndex} basis since range counts and range iterators are both - * constrained to a specific key range of interest for an - * {@link IAccessPath} while they would span the entire {@link IIndex} - * otherwise. - * - * @todo cache the {@link IAccessPath}s themselves so that we benefit from - * reuse of the cached data. - * - * @todo we could also cache small iterator result sets. - */ - private final boolean historicalRead; - - /** - * For {@link #historicalRead}s only, the range count is cached once it is - * computed. It is also set if we discover using {@link #isEmpty()} or - * {@link #iterator(long, long, int)} that the {@link IAccessPath} is empty. - * Likewise, those methods test this flag to see if we have proven the - * {@link IAccessPath} to be empty. - */ - private long rangeCount = -1L; - - /** - * The filter derived from the {@link IElementFilter}. - */ - final protected FilterConstructor<R> filter; - -// /** -// * A copy of the filter derived from the {@link IElementFilter}. -// */ -// public FilterConstructor<R> getFilter() { -// -// if (filter == null) -// return null; -// -// return filter.clone(); -// -// } - - /** - * Used to detect failure to call {@link #init()}. - */ - private boolean didInit = false; - - private final byte[] fromKey; - - private final byte[] toKey; - - /** - * The key corresponding to the inclusive lower bound for the - * {@link IAccessPath} <code>null</code> if there is no lower bound. - * <p> - * <strong>This MUST be set by the concrete subclass using - * {@link #setFromKey(byte[])} BEFORE calling - * {@link AbstractAccessPath#init()} - it MAY be set to a <code>null</code> - * value</strong>. - */ - public byte[] getFromKey() { - - return fromKey; - - } - - /** - * The key corresponding to the exclusive upper bound for the - * {@link IAccessPath} -or- <code>null</code> if there is no upper bound. - * <p> - * <strong>This MUST be set by the concrete subclass using - * {@link #setFromKey(byte[])} BEFORE calling - * {@link AbstractAccessPath#init()} - it MAY be set to a <code>null</code> - * value.</strong> - */ - public byte[] getToKey() { - - return toKey; - - } - -// protected void setFromKey(final byte[] fromKey) { -// -// assertNotInitialized(); -// -// if (pmd != null) { -// -// /* -// * The predicate is constrained to an index partition, so constrain -// * the fromKey so that it lies within that index partition. -// */ -// -// this.fromKey = AbstractKeyRangeIndexProcedure.constrainFromKey(fromKey, -// pmd); -// -// } else { -// -// this.fromKey = fromKey; -// -// } -// -// } -// -// protected void setToKey(final byte[] toKey) { -// -// assertNotInitialized(); -// -// if (pmd != null) { -// -// /* -// * The predicate is constrained to an index partition, so constrain -// * the toKey so that it lies within that index partition. -// */ -// -// this.toKey = AbstractKeyRangeIndexProcedure.constrainToKey(toKey, pmd); -// -// } else { -// -// this.toKey = toKey; -// -// } -// -// } - - public IKeyOrder<R> getKeyOrder() { - - return keyOrder; - - } - - /** - * @param indexManager - * Access to the indices, resource locators, executor service, - * etc. - * @param timestamp - * The timestamp of the index view. - * @param predicate - * The constraints on the access path. - * @param keyOrder - * The order in which the elements would be visited for this - * access path. - * @param ndx - * The index on which the access path is reading. - * @param flags - * The default {@link IRangeQuery} flags. - * @param chunkOfChunksCapacity - * The #of chunks that can be held by an {@link IBuffer} that is - * the target or one or more producers. This is generally a small - * number on the order of the #of parallel producers that might - * be writing on the {@link IBuffer} since the capacity of the - * {@link UnsynchronizedArrayBuffer}s is already quite large - * (10k or better elements, defining a single "chunk" from a - * single producer). - * @param chunkCapacity - * The maximum size for a single chunk (generally 10k or better). - * @param fullyBufferedReadThreshold - * If the estimated remaining rangeCount for an - * {@link #iterator(long, long, int)} is LTE this threshold then - * we will do a fully buffered (synchronous) read. Otherwise we - * will do an asynchronous read. - */ - protected AbstractAccessPath(// - final IIndexManager indexManager, // - final long timestamp,// - final IPredicate<R> predicate,// - final IKeyOrder<R> keyOrder, // - final IIndex ndx,// - final int flags, // - final int chunkOfChunksCapacity, - final int chunkCapacity, - final int fullyBufferedReadThreshold - ) { - - if (indexManager == null) - throw new IllegalArgumentException(); - - if (predicate == null) - throw new IllegalArgumentException(); - - if (keyOrder == null) - throw new IllegalArgumentException(); - - if (ndx == null) - throw new IllegalArgumentException(); - - final int partitionId = predicate.getPartitionId(); - - if (partitionId != -1) { - - /* - * An index partition constraint was specified, so verify that we - * were given a local index object and that the index object is for - * the correct index partition. - */ - - pmd = ndx.getIndexMetadata().getPartitionMetadata(); - - if (pmd == null) - throw new IllegalArgumentException("Not an index partition"); - - if (pmd.getPartitionId() != partitionId) { - - throw new IllegalArgumentException("Expecting partitionId=" - + partitionId + ", but have " + pmd.getPartitionId()); - - } - - } else { - - // The predicate is not constrained to an index partition. - - pmd = null; - - } - - this.indexManager = indexManager; - - this.timestamp = timestamp; - - this.predicate = predicate; - - this.keyOrder = keyOrder; - - this.ndx = ndx; - - this.flags = flags; - - this.chunkOfChunksCapacity = chunkOfChunksCapacity; - - this.chunkCapacity = chunkCapacity; - -// this.fullyBufferedReadThreshold = 100000; - this.fullyBufferedReadThreshold = fullyBufferedReadThreshold; - - this.historicalRead = TimestampUtility.isReadOnly(timestamp); - - this.isFullyBoundForKey = predicate.isFullyBound(keyOrder); - - final IElementFilter<R> constraint = predicate.getConstraint(); - - /* - * Optional constraint enforces the "same variable" constraint. The - * constraint will be null unless at least one variable appears in more - * than one position in the predicate. - */ - final SameVariableConstraint<R> sameVarConstraint = SameVariableConstraint - .newInstance(predicate); - - if (constraint == null && sameVarConstraint == null) { - - filter = null; - - } else { - - filter = new FilterConstructor<R>(); - - if (constraint != null) { - - filter.addFilter(new ElementFilter<R>(constraint)); - - } - - if (sameVarConstraint != null) { - - filter.addFilter(new ElementFilter<R>(sameVarConstraint)); - - } - - } - - final IKeyBuilder keyBuilder = ndx.getIndexMetadata() - .getTupleSerializer().getKeyBuilder(); - - fromKey = keyOrder.getFromKey(keyBuilder, predicate); - - toKey = keyOrder.getToKey(keyBuilder, predicate); - - } - - /** - * Align the predicate's {@link IElementFilter} constraint with - * {@link ITupleFilter} so that the {@link IElementFilter} can be evaluated - * close to the data by an {@link ITupleIterator}. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * @param <R> - * The generic type of the elements presented to the filter. - */ - public static class ElementFilter<R> extends TupleFilter<R> { - - private static final long serialVersionUID = 1L; - - private final IElementFilter<R> constraint; - - public ElementFilter(final IElementFilter<R> constraint) { - - if (constraint == null) - throw new IllegalArgumentException(); - - this.constraint = constraint; - - } - - public boolean isValid(final ITuple<R> tuple) { - - final R obj = (R) tuple.getObject(); - - return constraint.accept( obj ); - - } - - } - - public String toString() { - - return getClass().getName() + "{predicate=" + predicate + ", keyOrder=" - + keyOrder + ", flags=" + Tuple.flagString(flags) - + ", fromKey=" - + (fromKey == null ? "n/a" : BytesUtil.toString(fromKey)) - + ", toKey=" - + (toKey == null ? "n/a" : BytesUtil.toString(toKey) + "}"); - - } - - /** - * @throws IllegalStateException - * unless {@link #init()} has been invoked. - */ - final private void assertNotInitialized() { - - if (didInit) - throw new IllegalStateException(); - - } - - /** - * @throws IllegalStateException - * unless {@link #init()} has been invoked. - */ - final protected void assertInitialized() { - - if (!didInit) - throw new IllegalStateException(); - - } - - /** - * Required post-ctor initialization. - * - * @return <i>this</i> - */ - public AbstractAccessPath<R> init() { - - if (didInit) - throw new IllegalStateException(); - - didInit = true; - - if(log.isDebugEnabled()) { - - if (fromKey != null && toKey != null) { - - if (BytesUtil.compareBytes(fromKey, toKey) >= 0) { - - throw new AssertionError("keys are out of order: " + toString()); - - } - - } - - log.debug(toString()); - - } - - return this; - - } - - public IIndexManager getIndexManager() { - - return indexManager; - - } - - public long getTimestamp() { - - return timestamp; - - } - - public IPredicate<R> getPredicate() { - -// assertInitialized(); - - return predicate; - - } - - public IIndex getIndex() { - - return ndx; - - } - - /** - * @todo for scale-out, it may be better to implement {@link #isEmpty()} - * without specifying a capacity of ONE (1) and then caching the - * returned iterator. This could avoid an expensive RMI test if we - * invoke {@link #iterator()} shortly after {@link #isEmpty()} returns - * <code>false</code>. - */ - public boolean isEmpty() { - - assertInitialized(); - - if (historicalRead && rangeCount != -1) { - - /* - * Optimization for a historical read in which we have already - * proven that the access path is empty. - */ - - return rangeCount == 0L; - - } - - if(log.isDebugEnabled()) { - - log.debug(toString()); - - } - - final IChunkedIterator<R> itr = iterator(1,1); - - try { - - final boolean empty = ! itr.hasNext(); - - if (empty && historicalRead) { - - // the access path is known to be empty. - - rangeCount = 0L; - - } - - return empty; - - } finally { - - itr.close(); - - } - - } - - final public IChunkedOrderedIterator<R> iterator() { - - return iterator(0L/* offset */, 0L/* limit */, 0); - - } - - final public IChunkedOrderedIterator<R> iterator(final int limit, - final int capacity) { - - return iterator(0L/* offset */, limit, capacity); - - } - - /** - * @throws RejectedExecutionException - * if the iterator is run asynchronously and the - * {@link ExecutorService} is shutdown or has a maximum capacity - * and is saturated. - * - * FIXME Support both offset and limit for asynchronous - * iterators. right now this will force the use of the - * {@link #synchronousIterator(long, long, Iterator)} when the - * offset or limit are non-zero, but that is only permitted up - * to a limit of {@link #MAX_FULLY_BUFFERED_READ_LIMIT}. - * - * FIXME in order to support large limits we need to verify that - * the asynchronous iterator can correctly handle REMOVEALL and - * that incremental materialization up to the [limit] will not - * effect the semantics for REMOVEALL or the other iterator - * flags (per above). (In fact, the asynchronous iterator does - * not support either [offset] or [limit] at this time). - * - * FIXME write unit tests for slice handling by this method and - * modify the SAIL integration to use it for SLICE on an - * {@link IAccessPath} scan. Note that there are several - * {@link IAccessPath} implementations and they all need to be - * tested with SLICE. - * - * Those tests should be located in - * {@link com.bigdata.rdf.spo.TestSPOAccessPath}. - * - * FIXME The offset and limit should probably be rolled into the - * predicate and removed from the {@link IAccessPath}. This way - * they will be correctly applied when {@link #isEmpty()} is - * implemented using the {@link #iterator()} to determine if any - */ - @SuppressWarnings("unchecked") - final public IChunkedOrderedIterator<R> iterator(final long offset, - long limit, int capacity) { - - if (offset < 0) - throw new IllegalArgumentException(); - - if (limit < 0) - throw new IllegalArgumentException(); - - if (limit == Long.MAX_VALUE) { - - // treat MAX_VALUE as meaning NO limit. - limit = 0L; - - } - - if (limit > MAX_FULLY_BUFFERED_READ_LIMIT) { - - // Note: remove constraint when async itr supports SLICE. - throw new UnsupportedOperationException("limit=" + limit - + " exceeds maximum fully buffered read limit: " - + MAX_FULLY_BUFFERED_READ_LIMIT); - - } - - if (historicalRead && rangeCount >= 0L && ((rangeCount - offset) <= 0L)) { - - /* - * The access path has already been proven to be empty. - */ - - if (log.isDebugEnabled()) - log.debug("Proven empty by historical range count"); - - return new EmptyChunkedIterator<R>(keyOrder); - - } - - if (log.isDebugEnabled()) - log.debug("offset=" + offset + ", limit=" + limit + ", capacity=" - + capacity + ", accessPath=" + this); - - final boolean fullyBufferedRead; - - // true iff a point test is a hit on the bloom filter. - boolean bloomHit = false; - - if(isFullyBoundForKey) { - - if (log.isDebugEnabled()) - log.debug("Predicate is fully bound for the key."); - - /* - * If the predicate is fully bound then there can be at most one - * element matched so we constrain the limit and capacity - * accordingly. - */ - - if (offset > 0L) { - - // the iterator will be empty if the offset is GT zero. - return new EmptyChunkedIterator<R>(keyOrder); - - } - - capacity = 1; - - limit = 1L; - - fullyBufferedRead = true; - - /* - * Note: Since this is a point test, we apply the bloom filter for - * fast rejection. However, we can only apply the bloom filter if - * (a) you are using the local index object (either a BTree or a - * FusedView); and (b) the bloom filter exists (and is enabled). - * - * Note: The scale-out case is dealt with by pipelining the - * intermediate binding sets to the data service on which the index - * partition resides, at which point we again can apply the local - * bloom filter efficiently. - */ - - if(ndx instanceof ILocalBTreeView) { - - final IBloomFilter filter = ((ILocalBTreeView)ndx).getBloomFilter(); - - if (filter != null) { - - if(!filter.contains(fromKey)) { - - // proven to not exist. - return new EmptyChunkedIterator<R>(keyOrder); - - } - - bloomHit = true; - - // fall through - - } - - // fall through - - } - - // fall through - - } else if (limit > 0L) { - - /* - * A [limit] was specified. - * - * NOTE: When the [limit] is (GT ZERO) we MUST NOT let the - * DataService layer iterator read more than [limit] elements at a - * time. - * - * This is part of the contract for REMOVEALL - when you set the - * [limit] and specify REMOVEALL you are only removing the 1st - * [limit] elements in the traversal order. - * - * This is also part of the atomic queue operations contract - the - * head and tail queue operations function by specifying [limit := - * 1] (tail also specifies the REVERSE traversal option). - * - * Note: When the [limit] is specified we always do a fully buffered - * (aka synchronous) read. This simplifies the behavior of the - * iterator and limits are generally quite small. - */ - - capacity = (int) limit; - - fullyBufferedRead = true; - - } else { - - /* - * No limit was specified. - * - * Range count the access path and use a synchronous read if the - * rangeCount is LTE the threshold. - * - * Note: the range count is corrected by the offset so that it gives - * the effective remaining range count. When the effective remaining - * range count is zero we know that the iterator will not visit - * anything. - * - * @todo this kind of rangeCount might be replaced by an estimated - * range count basic on historical data and NOT requiring RMI. - */ - - final long rangeCountRemaining = rangeCount(false/* exact */) - - offset; - - if (log.isDebugEnabled()) - log.debug("offset=" + offset + ", limit=" + limit - + ", rangeCountRemaining=" + rangeCountRemaining - + ", fullyBufferedReadThreashold=" - + fullyBufferedReadThreshold); - - if(rangeCountRemaining <= 0) { - - /* - * Since the range count is an upper bound we KNOW that the - * iterator would not visit anything. - */ - - if (log.isDebugEnabled()) - log.debug("No elements based on range count."); - - return new EmptyChunkedIterator<R>(keyOrder); - - } - - if(rangeCountRemaining < fullyBufferedReadThreshold) { - - // adjust limit to no more than the #of remaining elements. - if (limit == 0L) { - limit = rangeCountRemaining; - } else { - limit... [truncated message content] |
From: <tho...@us...> - 2010-08-20 20:43:51
|
Revision: 3455 http://bigdata.svn.sourceforge.net/bigdata/?rev=3455&view=rev Author: thompsonbry Date: 2010-08-20 20:43:44 +0000 (Fri, 20 Aug 2010) Log Message: ----------- Identified some problems with using variable arity for SPOPredicate and have reverted to using arity/4 at all times but allowing null for [c]. This decision should be reviewed, but I wanted to get as many of the unit tests running as possible right now. For the same reason, I have modified Predicate to allow more than one relation name (for RelationViews). The truth maintenance code currently depends on this, so by allowing this again we can validate more of the test suite. There is a problem in AbstractBOp#clone(). The Java clone() semantics provide field by field copying. However, bop's need deep copy clone() semantics. I've introduced an AbstractBOp(AbstractBOp) constructor which does the deep copy for args[] and the annotations, but it still relies on AbstractBOp#clone() to handle recursive deep copying. I think that the right way to handle this is to implement clone() at those levels where new fields are defined. If the only place where we define fields is on AbstractBOp, then we could just implement clone() there and have it construct the appropriate object using a copy constructor for the concrete bop class whose signature is FooBop(FooBop). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-20 19:00:43 UTC (rev 3454) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-20 20:43:44 UTC (rev 3455) @@ -27,7 +27,6 @@ package com.bigdata.bop; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; @@ -95,6 +94,15 @@ * * @todo This will deep copy {@link BOp} structures but does not do a deep * copy of other kinds of embedded structures. + * + * FIXME Object#clone() is copying the reference to the {@link #args} + * [] rather than allocating a new array. Likewise, it is copying the + * reference to the {@link #annotations} {@link Map} references. I am + * working on a deep copy constructor (below). clone() will have to be + * modified to use the deep copy constructor, which means resolving + * the right constructor by reflection given the specific {@link BOp} + * class -or- implementing clone() on each concrete Bop class and + * having it apply the deep copy constructor for itself. */ public AbstractBOp clone() { try { @@ -125,6 +133,34 @@ } /** + * Deep copy constructor. + * + * @param op + * + * @todo This will deep copy {@link BOp} structures (both operands and + * annotations) but does not do a deep copy of other kinds of embedded + * structures. + */ + protected AbstractBOp(final AbstractBOp op) { + args = new BOp[op.args.length]; + for (int i = 0; i < args.length; i++) { + args[i] = op.args[i].clone(); + } + annotations = new LinkedHashMap<String, Object>(op.annotations.size()); + // deep copy the annotations. + { + final Iterator<Map.Entry<String, Object>> itr = op.annotations + .entrySet().iterator(); + while (itr.hasNext()) { + final Map.Entry<String, Object> e = itr.next(); + if (e.getValue() instanceof BOp) { + annotations.put(e.getKey(), ((BOp) e.getValue()).clone()); + } + } + } + } + + /** * @param args * The arguments to the operator. */ @@ -148,14 +184,6 @@ checkArgs(args); - final ArrayList<BOp> tmp = new ArrayList<BOp>(args.length); - - for (int i = 0; i < args.length; i++) { - - tmp.add(args[i]); - - } - this.args = args; this.annotations = (annotations == null ? new LinkedHashMap<String, Object>() @@ -175,7 +203,7 @@ } - final public int arity() { + public int arity() { return args.length; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-20 19:00:43 UTC (rev 3454) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-08-20 20:43:44 UTC (rev 3455) @@ -66,6 +66,9 @@ /** * The name of the relation on which the predicate will read. + * + * FIXME Change this to be a scalar value. It is currently an array for + * backwards compatibility. */ String RELATION_NAME = "relationName"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-20 19:00:43 UTC (rev 3454) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-08-20 20:43:44 UTC (rev 3455) @@ -105,7 +105,7 @@ final ISolutionExpander<E> expander) { super(values, NV.asMap(new NV[] {// - new NV(Annotations.RELATION_NAME,relationName),// + new NV(Annotations.RELATION_NAME,new String[]{relationName}),// new NV(Annotations.PARTITION_ID,partitionId),// new NV(Annotations.OPTIONAL,optional),// new NV(Annotations.CONSTRAINT,constraint),// @@ -125,33 +125,50 @@ public String getOnlyRelationName() { -// if (relationName.length != 1) -// throw new IllegalStateException(); + final String[] relationName = (String[]) annotations.get(Annotations.RELATION_NAME); + + if (relationName.length != 1) + throw new IllegalStateException(); - return (String) annotations.get(Annotations.RELATION_NAME); + return relationName[0]; } public String getRelationName(final int index) { + + final String[] relationName = (String[]) annotations.get(Annotations.RELATION_NAME); + + return relationName[index]; -// return relationName[index]; +// throw new UnsupportedOperationException(); - throw new UnsupportedOperationException(); + } + + public int getRelationCount() { + final String[] relationName = (String[]) annotations.get(Annotations.RELATION_NAME); + + return relationName.length; + } + public Predicate<E> setRelationName(final String[] relationName) { + +// throw new UnsupportedOperationException(); + final Predicate<E> tmp = this.clone(); + + tmp.annotations.put(Annotations.RELATION_NAME, relationName); + + return tmp; + + } + public int getPartitionId() { return (Integer)annotations.get(Annotations.PARTITION_ID); } - public int getRelationCount() { - - return 1;//relationName.length; - - } - @SuppressWarnings("unchecked") public IVariableOrConstant get(final int index) { @@ -276,9 +293,19 @@ for (int i = 0; i < args.length; i++) { - if (((IVariableOrConstant<?>) args[i]).isConstant()) + final IVariableOrConstant<?> t = (IVariableOrConstant<?>) args[i]; + + if (t == null) { + /* + * Note: t != null handles the case where the [c] position of an + * SPO is allowed to be null. + */ continue; + } + if (t.isConstant()) + continue; + final IVariable<?> var = (IVariable<?>) args[i]; final IConstant<?> val = bindingSet.get(var); @@ -319,13 +346,6 @@ } - public Predicate<E> setRelationName(final String[] relationName) { - - throw new UnsupportedOperationException(); -// return new Predicate<E>(this, relationName); - - } - @SuppressWarnings("unchecked") public IKeyOrder<E> getKeyOrder() { @@ -379,9 +399,9 @@ final IVariableOrConstant<?> v = get(i); - sb.append(v.isConstant() ? v.toString() - : (v + "=" + (bindingSet == null ? null : bindingSet - .get((IVariable<?>) v)))); + sb.append(v == null ? null : v.isConstant() ? v.toString() : (v + + "=" + (bindingSet == null ? null : bindingSet + .get((IVariable<?>) v)))); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-08-20 19:00:43 UTC (rev 3454) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-08-20 20:43:44 UTC (rev 3455) @@ -23,7 +23,6 @@ */ package com.bigdata.rdf.spo; -import com.bigdata.bop.AbstractBOp; import com.bigdata.bop.ArrayBindingSet; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; @@ -106,15 +105,19 @@ */ private static final long serialVersionUID = 1L; -// /** -// * The arity is 3 unless the context position was given (as either a -// * variable or bound to a constant) in which case it is 4. -// */ -// public final int arity() { -// -// return args[3/*c*/] == null ? 3 : 4; -// -// } + /** + * The arity is 3 unless the context position was given (as either a + * variable or bound to a constant) in which case it is 4. + * + * @todo rather than having a conditional arity, modify the SPOPredicate + * constructor to pass on either args[3] or args[3] depending on + * whether we are using triples or quads. + */ + public final int arity() { + + return args[3/*c*/] == null ? 3 : 4; + + } /** * Partly specified ctor. The context will be <code>null</code>. The @@ -266,9 +269,13 @@ final ISolutionExpander<ISPO> expander// ) { - super((c == null ? new IVariableOrConstant[] { s, p, o } - : new IVariableOrConstant[] { s, p, o, c }), relationName[0], - partitionId, optional, constraint, expander); + super( +// (c == null ? new IVariableOrConstant[] { s, p, o } +// : new IVariableOrConstant[] { s, p, o, c }), + + new IVariableOrConstant[] { s, p, o, c }, + + relationName[0], partitionId, optional, constraint, expander); // if (relationName == null) // throw new IllegalArgumentException(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-08-21 01:16:21
|
Revision: 3456 http://bigdata.svn.sourceforge.net/bigdata/?rev=3456&view=rev Author: thompsonbry Date: 2010-08-21 01:16:12 +0000 (Sat, 21 Aug 2010) Log Message: ----------- Further cleanup of the IJoinNexus and IRelation implementations. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -31,7 +31,10 @@ import java.util.Properties; import java.util.UUID; +import com.bigdata.bop.IPredicate; import com.bigdata.btree.IIndex; +import com.bigdata.btree.ILocalBTreeView; +import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.UnisolatedReadWriteIndex; import com.bigdata.journal.ConcurrencyManager; @@ -40,6 +43,9 @@ import com.bigdata.journal.Journal; import com.bigdata.journal.TemporaryRawStore; import com.bigdata.journal.TemporaryStore; +import com.bigdata.relation.accesspath.AccessPath; +import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.IKeyOrder; @@ -184,4 +190,166 @@ } + public IAccessPath<E> getAccessPath(final IPredicate<E> predicate) { + + // find the best key order. + final IKeyOrder<E> keyOrder = getKeyOrder(predicate); + + // get the corresponding index. + final IIndex ndx = getIndex(keyOrder); + + // default flags. + final int flags = IRangeQuery.DEFAULT; + + return new AccessPath<E>(this/* relation */, getIndexManager(), + getTimestamp(), predicate, keyOrder, ndx, flags, + getChunkOfChunksCapacity(), getChunkCapacity(), + getFullyBufferedReadThreshold()).init(); + + } + + /** + * This handles a request for an access path that is restricted to a + * specific index partition. + * <p> + * Note: This path is used with the scale-out JOIN strategy, which + * distributes join tasks onto each index partition from which it needs to + * read. Those tasks constrain the predicate to only read from the index + * partition which is being serviced by that join task. + * <p> + * Note: Since the relation may materialize the index views for its various + * access paths, and since we are restricted to a single index partition and + * (presumably) an index manager that only sees the index partitions local + * to a specific data service, we create an access path view for an index + * partition without forcing the relation to be materialized. + * <p> + * Note: Expanders ARE NOT applied in this code path. Expanders require a + * total view of the relation, which is not available during scale-out + * pipeline joins. + * + * @param indexManager + * This MUST be the data service local index manager so that the + * returned access path will read against the local shard. + * @param predicate + * The predicate. {@link IPredicate#getPartitionId()} MUST return + * a valid index partition identifier. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code>. + * @throws IllegalArgumentException + * unless the {@link IIndexManager} is a <em>local</em> index + * manager providing direct access to the specified shard. + * @throws IllegalArgumentException + * unless the predicate identifies a specific shard using + * {@link IPredicate#getPartitionId()}. + * + * @todo Raise this method into the {@link IRelation} interface. + */ + public IAccessPath<E> getAccessPathForIndexPartition( + final IIndexManager indexManager, // + final IPredicate<E> predicate// + ) { + + /* + * Note: getIndexManager() _always_ returns the federation's index + * manager because that is how we materialize an ILocatableResource when + * we locate it. However, the federation's index manager can not be used + * here because it addresses the scale-out indices. Instead, the caller + * must pass in the IIndexManager which has access to the local index + * objects so we can directly read on the shard. + */ +// final IIndexManager indexManager = getIndexManager(); + + if (indexManager == null) + throw new IllegalArgumentException(); + + if (indexManager instanceof IBigdataFederation<?>) { + + /* + * This will happen if you fail to re-create the JoinNexus within + * the target execution environment. + * + * This is disallowed because the predicate specifies an index + * partition and expects to have access to the local index objects + * for that index partition. However, the index partition is only + * available when running inside of the ConcurrencyManager and when + * using the IndexManager exposed by the ConcurrencyManager to its + * tasks. + */ + + throw new IllegalArgumentException( + "Expecting a local index manager, not: " + + indexManager.getClass().toString()); + + } + + if (predicate == null) + throw new IllegalArgumentException(); + + final int partitionId = predicate.getPartitionId(); + + if (partitionId == -1) // must be a valid partition identifier. + throw new IllegalArgumentException(); + + /* + * @todo This condition should probably be an error since the expander + * will be ignored. + */ +// if (predicate.getSolutionExpander() != null) +// throw new IllegalArgumentException(); + + if (predicate.getRelationCount() != 1) { + + /* + * This is disallowed. The predicate must be reading on a single + * local index partition, not a view comprised of more than one + * index partition. + * + * @todo In fact, we could allow a view here as long as all parts of + * the view are local. That could be relevant when the other view + * component was a shard of a focusStore for parallel decomposition + * of RDFS closure, etc. The best way to handle such views when the + * components are not local is to use a UNION of the JOIN. When both + * parts are local we can do better using a UNION of the + * IAccessPath. + */ + + throw new IllegalStateException(); + + } + + final String namespace = getNamespace();//predicate.getOnlyRelationName(); + + /* + * Find the best access path for that predicate. + */ + final IKeyOrder<E> keyOrder = getKeyOrder(predicate); + + // The name of the desired index partition. + final String name = DataService.getIndexPartitionName(namespace + "." + + keyOrder.getIndexName(), predicate.getPartitionId()); + + /* + * Note: whether or not we need both keys and values depends on the + * specific index/predicate. + * + * Note: If the timestamp is a historical read, then the iterator will + * be read only regardless of whether we specify that flag here or not. + */ +// * Note: We can specify READ_ONLY here since the tail predicates are not +// * mutable for rule execution. + final int flags = IRangeQuery.KEYS | IRangeQuery.VALS;// | IRangeQuery.READONLY; + + final long timestamp = getTimestamp();//getReadTimestamp(); + + // MUST be a local index view. + final ILocalBTreeView ndx = (ILocalBTreeView) indexManager + .getIndex(name, timestamp); + + return new AccessPath<E>(this/* relation */, indexManager, timestamp, + predicate, keyOrder, ndx, flags, getChunkOfChunksCapacity(), + getChunkCapacity(), getFullyBufferedReadThreshold()).init(); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -61,15 +61,86 @@ /** * The {@link IIndexManager} for the {@link IRelation}. */ - public IIndexManager getIndexManager(); + IIndexManager getIndexManager(); /** * The service used to run asynchronous or parallel tasks for the * {@link IRelation}. */ - public ExecutorService getExecutorService(); + ExecutorService getExecutorService(); /** + * Return the class for the generic type of this relation. This information + * is used to dynamically create arrays of that generic type. + */ + Class<E> getElementClass(); + + /** + * Create and return a new element. The element is constructed from the + * predicate given the bindings. Typically, this is used when generating an + * {@link ISolution} for an {@link IRule} during either a query or mutation + * operations. The element is NOT inserted into the relation. + * + * @param predicate + * The predicate that is the head of some {@link IRule}. + * @param bindingSet + * A set of bindings for that {@link IRule}. + * + * @return The new element. + * + * @throws IllegalArgumentException + * if any parameter is <code>null</code>. + * @throws IllegalStateException + * if the predicate is not fully bound given those bindings. + */ + E newElement(IPredicate<E> predicate, IBindingSet bindingSet); + + /** + * Return the {@link IKeyOrder} for the primary index for the relation. + */ + IKeyOrder<E> getPrimaryKeyOrder(); + + /** + * Return the fully qualified name of each index maintained by this + * relation. + * + * @return An immutable set of the index names for the relation. + * + * @deprecated Replace with getKeyOrders() (see below). + */ + Set<String> getIndexNames(); + +// /** +// * Return the {@link IKeyOrder}s corresponding to the registered indices for +// * this relation. [rather than getIndexNames?] +// */ +// Iterator<IKeyOrder<E>> getKeyOrders(); + + /** + * Return the {@link IKeyOrder} for the predicate corresponding to the + * perfect access path. A perfect access path is one where the bound values + * in the predicate form a prefix in the key space of the corresponding + * index. + * + * @param p + * The predicate. + * + * @return The {@link IKeyOrder} for the perfect access path -or- + * <code>null</code> if there is no index which provides a perfect + * access path for that predicate. + * + * @todo What about "best" versus "perfect"? Perfect is more a concept from + * RDF with covering indices. For other schemas we will often just + * have "best". If you only have one index then it is always "best". + * <p> + * Note that one of the main uses for this is query optimization. + * However, runtime query optimization can just work through the + * possible indices and join orders and get to a "best" query plan + * given the actual indices and foreign keys. + */ + IKeyOrder<E> getKeyOrder(IPredicate<E> p); + + /** * Return the best {@link IAccessPath} for a relation given a predicate with * zero or more unbound variables. * <p> @@ -104,7 +175,11 @@ */ IAccessPath<E> getAccessPath(IPredicate<E> predicate); - // @todo raise this method into this interface. + /* + * @todo raise this method into this interface. it is currently implemented + * by AbstractRelation and overridden by SPORelation to handle the different + * index families for triples versus quads. + */ // IAccessPath<E> getAccessPathForIndexPartition(IIndexManager indexManager, IPredicate<E> predicate); /** @@ -131,73 +206,4 @@ */ IIndex getIndex(IKeyOrder<? extends E> keyOrder); - /** - * Return the fully qualified name of each index maintained by this - * relation. - * - * @return An immutable set of the index names for the relation. - * - * @todo replace with getKeyOrders()? - */ - Set<String> getIndexNames(); - - /* - * New methods. - */ - - /** - * Return the {@link IKeyOrder} for the primary index for the relation. - */ - IKeyOrder<E> getPrimaryKeyOrder(); - -// /** -// * Return the {@link IKeyOrder}s corresponding to the registered indices for -// * this relation. [rather than getIndexNames?] -// */ -// Iterator<IKeyOrder<E>> getKeyOrders(); -// -// /** -// * Return the {@link IKeyOrder} for the predicate corresponding to the -// * perfect (best?) access path. A perfect access path is one where the bound values -// * in the predicate form a prefix in the key space of the corresponding -// * index. -// * -// * @param p -// * The predicate. -// * @return The {@link IKeyOrder} for the perfect access path -or- -// * <code>null</code> if there is no index which provides a perfect -// * access path for that predicate. -// */ -// IKeyOrder<E> getKeyOrder(IPredicate<E> p); - - /* - * End new methods. - */ - - /** - * Create and return a new element. The element is constructed from the - * predicate given the bindings. Typically, this is used when generating an - * {@link ISolution} for an {@link IRule} during either a query or mutation - * operations. The element is NOT inserted into the relation. - * - * @param predicate - * The predicate that is the head of some {@link IRule}. - * @param bindingSet - * A set of bindings for that {@link IRule}. - * - * @return The new element. - * - * @throws IllegalArgumentException - * if any parameter is <code>null</code>. - * @throws IllegalStateException - * if the predicate is not fully bound given those bindings. - */ - E newElement(IPredicate<E> predicate, IBindingSet bindingSet); - - /** - * Return the class for the generic type of this relation. This information - * is used to dynamically create arrays of that generic type. - */ - Class<E> getElementClass(); - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -204,4 +204,8 @@ throw new UnsupportedOperationException(); } + public IKeyOrder<E> getKeyOrder(IPredicate<E> p) { + throw new UnsupportedOperationException(); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -42,6 +42,8 @@ import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.btree.keys.DelegateSortKeyBuilder; +import com.bigdata.btree.keys.ISortKeyBuilder; import com.bigdata.config.Configuration; import com.bigdata.config.IValidator; import com.bigdata.config.IntegerValidator; @@ -52,9 +54,11 @@ import com.bigdata.journal.Journal; import com.bigdata.journal.TemporaryStore; import com.bigdata.mdi.PartitionLocator; +import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.AbstractResource; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; +import com.bigdata.relation.RelationFusedView; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAccessPath; @@ -70,6 +74,8 @@ import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.ndx.IClientIndex; +import com.bigdata.striterator.ChunkedConvertingIterator; +import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkedOrderedIterator; /** @@ -305,9 +311,6 @@ */ public IRelation getHeadRelationView(final IPredicate pred) { -// if (pred == null) -// throw new IllegalArgumentException(); - if (pred.getRelationCount() != 1) throw new IllegalArgumentException(); @@ -316,78 +319,38 @@ final long timestamp = (getAction().isMutation() ? getWriteTimestamp() : getReadTimestamp(/*relationName*/)); - final IRelation relation = (IRelation) resourceLocator.locate( - relationName, timestamp); - - if(log.isDebugEnabled()) { - - log.debug("predicate: "+pred+", head relation: "+relation); - - } - - return relation; - + return (IRelation<?>) resourceLocator.locate(relationName, timestamp); + } -// /** -// * The tail relations are the views from which we read. This method depends -// * solely on the name(s) of the relation(s) and the timestamp of interest -// * for the view. -// * -// * @todo we can probably get rid of the cache used by this method now that -// * calling this method has been factored out of the join loops. -// */ -// @SuppressWarnings("unchecked") -// public IRelation getTailRelationView(final IPredicate pred) { -// -//// if (pred == null) -//// throw new IllegalArgumentException(); -// -// final int nsources = pred.getRelationCount(); -// -// final IRelation relation; -// -// if (nsources == 1) { -// -// final String relationName = pred.getOnlyRelationName(); -// -// relation = (IRelation) resourceLocator.locate(relationName, -// readTimestamp); -// -// } else if (nsources == 2) { -// -// final String relationName0 = pred.getRelationName(0); -// -// final String relationName1 = pred.getRelationName(1); -// -//// final long timestamp0 = getReadTimestamp(/*relationName0*/); -//// -//// final long timestamp1 = getReadTimestamp(/*relationName1*/); -// -// final IRelation relation0 = (IRelation) resourceLocator.locate( -// relationName0, readTimestamp);//timestamp0); -// -// final IRelation relation1 = (IRelation) resourceLocator.locate( -// relationName1, readTimestamp);//timestamp1); -// -// relation = new RelationFusedView(relation0, relation1).init(); -// -// } else { -// -// throw new UnsupportedOperationException(); -// -// } -// -// if(log.isDebugEnabled()) { -// -// log.debug("predicate: "+pred+", tail relation: "+relation); -// -// } -// -// return relation; -// -// } + @SuppressWarnings("unchecked") + public IRelation getTailRelationView(final IPredicate pred) { + final int nsources = pred.getRelationCount(); + + if (nsources == 1) { + + return (IRelation) resourceLocator.locate(pred + .getOnlyRelationName(), getReadTimestamp()); + + } else if (nsources == 2) { + + final IRelation<?> relation0 = (IRelation) resourceLocator.locate( + pred.getRelationName(0), readTimestamp); + + final IRelation<?> relation1 = (IRelation) resourceLocator.locate( + pred.getRelationName(1), readTimestamp); + + return new RelationFusedView(relation0, relation1).init(); + + } else { + + throw new UnsupportedOperationException(); + + } + + } + /** * @deprecated by {@link #getTailAccessPath(IRelation, IPredicate)} * @@ -402,94 +365,43 @@ } -// /** -// * When {@link #backchain} is <code>true</code> and the tail predicate is -// * reading on the {@link SPORelation}, then the {@link IAccessPath} is -// * wrapped so that the iterator will visit the backchained inferences as -// * well. On the other hand, if {@link IPredicate#getPartitionId()} is -// * defined (not <code>-1</code>) then the returned access path will be for -// * the specified shard using the data service local index manager ( -// * {@link #indexManager} MUST be the data service local index manager for -// * this case) and expanders WILL NOT be applied (they require a view of the -// * total relation, not just a shard). -// * -// * @see InferenceEngine -// * @see BackchainAccessPath -// * -// * @todo consider encapsulating the {@link IRangeCountFactory} in the -// * returned access path for non-exact range count requests. this will -// * make it slightly harder to write the unit tests for the -// * {@link IEvaluationPlanFactory} -// */ -// public IAccessPath getTailAccessPath(final IRelation relation, -// final IPredicate predicate) { -// -// if (predicate.getPartitionId() != -1) { -// -// /* -// * Note: This handles a read against a local index partition. For -// * scale-out, the [indexManager] will be the data service's local -// * index manager. -// * -// * Note: Expanders ARE NOT applied in this code path. Expanders -// * require a total view of the relation, which is not available -// * during scale-out pipeline joins. Likewise, the [backchain] -// * property will be ignored since it is handled by an expander. -// * -// * @todo If getAccessPathForIndexPartition() is raised into the -// * IRelation interface, then we can get rid of the cast to the -// * SPORelation implementation. -// */ -// -// return ((SPORelation) relation).getAccessPathForIndexPartition( -// indexManager, predicate); -// -// } -// -//// // Find the best access path for the predicate for that relation. -// IAccessPath accessPath = relation.getAccessPath(predicate); -//// -//// if (predicate.getPartitionId() != -1) { -//// -//// /* -//// * Note: The expander can not run against a shard since it assumes -//// * access to the full key range of the index. Expanders are -//// * convenient and work well for stand alone indices, but they should -//// * be replaced by rule rewrites for scale-out. -//// */ -//// -//// return accessPath; -//// -//// } -// + public IAccessPath getTailAccessPath(final IRelation relation, + final IPredicate predicate) { + + if (predicate.getPartitionId() != -1) { + + /* + * Note: This handles a read against a local index partition. For + * scale-out, the [indexManager] will be the data service's local + * index manager. + * + * Note: Expanders ARE NOT applied in this code path. Expanders + * require a total view of the relation, which is not available + * during scale-out pipeline joins. Likewise, the [backchain] + * property will be ignored since it is handled by an expander. + */ + + return ((AbstractRelation<?>) relation) + .getAccessPathForIndexPartition(indexManager, predicate); + + } + + // Find the best access path for the predicate for that relation. + final IAccessPath<?> accessPath = relation.getAccessPath(predicate); + + // Note: No expander's for bops, at least not right now. // final ISolutionExpander expander = predicate.getSolutionExpander(); // // if (expander != null) { // -// // allow the predicate to wrap the access path : @todo caching on AP? +// // allow the predicate to wrap the access path // accessPath = expander.getAccessPath(accessPath); // // } -// -// if(backchain && relation instanceof SPORelation) { -// -// if (expander == null || expander.backchain()) { -// -// final SPORelation spoRelation = (SPORelation)relation; -// -// accessPath = new BackchainAccessPath( -// spoRelation.getContainer(), accessPath, -// joinNexusFactory.isOwlSameAsUsed ? Boolean.TRUE -// : Boolean.FALSE); -// -// } -// -// } -// -// // return that access path. -// return accessPath; -// -// } + + // return that access path. + return accessPath; + } public Iterator<PartitionLocator> locatorScan( final AbstractScaleOutFederation<?> fed, @@ -505,15 +417,8 @@ * Find the best access path for the predicate for that relation. * * Note: All we really want is the [fromKey] and [toKey] for that - * predicate and index. In general, that information is available from - * IKeyOrder#getFromKey() and IKeyOrder#getToKey(). However, we also - * need to know whether quads or triples are being used for RDF and that - * information is carried by the AbstractTripleStore container or the - * SPORelation. - * - * Note: This MUST NOT layer on expander or backchain access path - * overlays. Those add overhead during construction and the layering - * also hides the [fromKey] and [toKey]. + * predicate and index. This MUST NOT layer on expanders since the + * layering also hides the [fromKey] and [toKey]. */ @SuppressWarnings("unchecked") final AccessPath<?> accessPath = (AccessPath<?>) relation @@ -614,54 +519,6 @@ } -// /** -// * FIXME unit tests for DISTINCT with a head and ELEMENT, with bindings and -// * a head, with bindings but no head, and with a head but no bindings -// * (error). See {@link #runQuery(IStep)} -// * -// * FIXME unit tests for SORT with and without DISTINCT and with the various -// * combinations used in the unit tests for DISTINCT. Note that SORT, unlike -// * DISTINCT, requires that all solutions are materialized before any -// * solutions can be returned to the caller. A lot of optimization can be -// * done for SORT implementations, including merge sort of large blocks (ala -// * map/reduce), using compressed sort keys or word sort keys with 2nd stage -// * disambiguation, etc. -// * -// * FIXME Add property for sort {ascending,descending,none} to {@link IRule}. -// * The sort order can also be specified in terms of a sequence of variables. -// * The choice of the variable order should be applied here. -// * -// * FIXME The properties that govern the Unicode collator for the generated -// * sort keys should be configured by the {@link RDFJoinNexusFactory}. In -// * particular, Unicode should be handled however it is handled for the -// * {@link LexiconRelation}. -// */ -// public ISortKeyBuilder<IBindingSet> newBindingSetSortKeyBuilder(final IRule rule) { -// -// final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(); -// -// final int nvars = rule.getVariableCount(); -// -// final IVariable[] vars = new IVariable[nvars]; -// -// { -// -// final Iterator<IVariable> itr = rule.getVariables(); -// -// int i = 0; -// -// while (itr.hasNext()) { -// -// vars[i++] = itr.next(); -// -// } -// -// } -// -// return new BindingSetSortKeyBuilder(keyBuilder, vars); -// -// } - /** * FIXME Custom serialization for solution sets, especially since there * tends to be a lot of redundancy in the data arising from how bindings are @@ -807,201 +664,38 @@ } -// /** -// * Buffer writes on {@link IMutableRelation#insert(IChunkedIterator)} when it is -// * {@link #flush() flushed}. -// * -// * @author <a href="mailto:tho...@us...">Bryan Thompson</a> -// * @version $Id$ -// * @param <E> -// */ -// public static class InsertSPOAndJustificationBuffer<E> extends AbstractSolutionBuffer<E> { -// -// /** -// * @param capacity -// * @param relation -// */ -// public InsertSPOAndJustificationBuffer(final int capacity, -// final IMutableRelation<E> relation) { -// -// super(capacity, relation); -// -// } -// -// @Override -// protected long flush(final IChunkedOrderedIterator<ISolution<E>> itr) { -// -// try { -// -// /* -// * The mutation count is the #of SPOs written (there is one -// * justification written per solution generated, but the -// * mutation count does not reflect duplicate justifications - -// * only duplicate statements). -// * -// * Note: the optional filter for the ctor was already applied. -// * If an element/solution was rejected, then it is not in the -// * buffer and we will never see it during flush(). -// */ -// -// long mutationCount = 0; -// -// while (itr.hasNext()) { -// -// final ISolution<E>[] chunk = itr.nextChunk(); -// -// mutationCount += writeChunk(chunk); -// -// } -// -// return mutationCount; -// -// } finally { -// -// itr.close(); -// -// } -// -// } -// -// private long writeChunk(final ISolution<E>[] chunk) { -// -// final int n = chunk.length; -// -// if(log.isDebugEnabled()) -// log.debug("chunkSize="+n); -// -// final long begin = System.currentTimeMillis(); -// -// final SPO[] a = new SPO[ n ]; -// -// final Justification[] b = new Justification[ n ]; -// -// for(int i=0; i<chunk.length; i++) { -// -// if(log.isDebugEnabled()) { -// -// log.debug("chunk["+i+"] = "+chunk[i]); -// -// } -// -// final ISolution<SPO> solution = (ISolution<SPO>) chunk[i]; -// -// a[i] = solution.get(); -// -// b[i] = new Justification(solution); -// -// } -// -// final SPORelation r = (SPORelation) (IMutableRelation) getRelation(); -// -// /* -// * Use a thread pool to write out the statement and the -// * justifications concurrently. This drammatically reduces the -// * latency when also writing justifications. -// */ -// -// final List<Callable<Long>> tasks = new ArrayList<Callable<Long>>(2); -// -// /* -// * Note: we reject using the filter before stmts or justifications -// * make it into the buffer so we do not need to apply the filter -// * again here. -// */ -// -// tasks.add(new Callable<Long>(){ -// public Long call() { -// return r.insert(a,a.length,null/*filter*/); -// } -// }); -// -// tasks.add(new Callable<Long>(){ -// public Long call() { -// return r -// .addJustifications(new ChunkedArrayIterator<Justification>( -// b.length, b, null/* keyOrder */)); -// } -// }); -// -// final List<Future<Long>> futures; -// -// /* -// * @todo The timings for the tasks that we run here are not being -// * reported up to this point. -// */ -// final long mutationCount; -// try { -// -// futures = r.getExecutorService().invokeAll(tasks); -// -// mutationCount = futures.get(0).get(); -// -// futures.get(1).get(); -// -// } catch (InterruptedException ex) { -// -// throw new RuntimeException(ex); -// -// } catch (ExecutionException ex) { -// -// throw new RuntimeException(ex); -// -// } -// -// final long elapsed = System.currentTimeMillis() - begin; -// -// if (log.isInfoEnabled()) -// log.info("Wrote " + mutationCount -// + " statements and justifications in " -// + elapsed + "ms"); -// -// return mutationCount; -// -// } -// -// } - -// /** -// * Note: {@link #getSolutionFilter()} is applied by -// * {@link #newUnsynchronizedBuffer(IBuffer, int)} and NOT by the buffer -// * returned by this method. -// */ -// @SuppressWarnings("unchecked") -// public IBuffer<ISolution[]> newInsertBuffer(final IMutableRelation relation) { -// -// if (getAction() != ActionEnum.Insert) -// throw new IllegalStateException(); -// -// if (log.isDebugEnabled()) { -// -// log.debug("relation=" + relation); -// -// } -// -// if(justify) { -// -// /* -// * Buffer knows how to write the computed elements on the statement -// * indices and the computed binding sets on the justifications -// * indices. -// */ -// -// return new InsertSPOAndJustificationBuffer(chunkOfChunksCapacity, -// relation); -// -// } -// -// /* -// * Buffer resolves the computed elements and writes them on the -// * statement indices. -// */ -// -// return new AbstractSolutionBuffer.InsertSolutionBuffer( -// chunkOfChunksCapacity, relation); -// -// } + /** + * {@inheritDoc} + * <p> + * Note: {@link #getSolutionFilter()} is applied by + * {@link #newUnsynchronizedBuffer(IBuffer, int)} and NOT by the buffer + * returned by this method. + */ + @SuppressWarnings("unchecked") + public IBuffer<ISolution[]> newInsertBuffer(final IMutableRelation relation) { + if (getAction() != ActionEnum.Insert) + throw new IllegalStateException(); + + if (log.isDebugEnabled()) { + + log.debug("relation=" + relation); + + } + + /* + * Buffer resolves the computed elements and writes them on the + * statement indices. + */ + + return new AbstractSolutionBuffer.InsertSolutionBuffer( + chunkOfChunksCapacity, relation); + + } + /** + * {@inheritDoc} + * <p> * Note: {@link #getSolutionFilter()} is applied by * {@link #newUnsynchronizedBuffer(IBuffer, int)} and NOT by the buffer * returned by this method. @@ -1023,117 +717,125 @@ } -// @SuppressWarnings("unchecked") -// public IChunkedOrderedIterator<ISolution> runQuery(final IStep step) -// throws Exception { -// -// if (step == null) -// throw new IllegalArgumentException(); -// -// if(log.isInfoEnabled()) -// log.info("program="+step.getName()); -// -// if(isEmptyProgram(step)) { -// -// log.warn("Empty program"); -// -// return (IChunkedOrderedIterator<ISolution>) new EmptyProgramTask( -// ActionEnum.Query, step).call(); -// -// } -// -// final IChunkedOrderedIterator<ISolution> itr = (IChunkedOrderedIterator<ISolution>) runProgram( -// ActionEnum.Query, step); -// -// if (step.isRule() && ((IRule) step).getQueryOptions().isDistinct()) { -// -// /* -// * Impose a DISTINCT constraint based on the variable bindings -// * selected by the head of the rule. The DistinctFilter will be -// * backed by a TemporaryStore if more than one chunk of solutions is -// * generated. That TemporaryStore will exist on the client where -// * this method (runQuery) was executed. The TemporaryStore will be -// * finalized and deleted when it is no longer referenced. -// */ -// -// final ISortKeyBuilder<ISolution> sortKeyBuilder; -// -// if (((IRule) step).getHead() != null -// && (solutionFlags & ELEMENT) != 0) { -// -// /* -// * Head exists and elements are requested, so impose DISTINCT -// * based on the materialized elements. -// * -// * FIXME The SPOSortKeyBuilder should be obtained from the head -// * relation. Of course there is one sort key for each access -// * path, but for the purposes of DISTINCT we want the sort key -// * to correspond to the notion of a "primary key" (the -// * distinctions that matter) and it does not matter which sort -// * order but the SPO sort order probably has the least factor of -// * "surprise". -// */ -// -// final int arity = ((IRule)step).getHead().arity(); -// -// sortKeyBuilder = new DelegateSortKeyBuilder<ISolution, ISPO>( -// new SPOSortKeyBuilder(arity)) { -// -// protected ISPO resolve(ISolution solution) { -// -// return (ISPO) solution.get(); -// -// } -// -// }; -// -// } else { -// -// if ((solutionFlags & BINDINGS) != 0) { -// -// /* -// * Bindings were requested so impose DISTINCT based on those -// * bindings. -// */ -// -// sortKeyBuilder = new DelegateSortKeyBuilder<ISolution, IBindingSet>( -// newBindingSetSortKeyBuilder((IRule) step)) { -// -// protected IBindingSet resolve(ISolution solution) { -// -// return solution.getBindingSet(); -// -// } -// -// }; -// -// } else { -// -// throw new UnsupportedOperationException( -// "You must specify BINDINGS since the rule does not have a head: " -// + step); -// -// } -// -// } -// -// return new ChunkedConvertingIterator<ISolution, ISolution>(itr, -// new DistinctFilter<ISolution>(indexManager) { -// -// protected byte[] getSortKey(ISolution e) { -// -// return sortKeyBuilder.getSortKey(e); -// -// } -// -// }); -// -// } -// -// return itr; -// -// } + /** + * Return the {@link ISortKeyBuilder} used to impose DISTINCT on the + * solutions generated by a query. + * + * @param head + * The head of the rule. + * + * @return The {@link ISortKeyBuilder}. + * + * @todo This should be based on bop annotations and a hash table for + * distinct unless it is very high volume and you can wait for the + * first result, in which case a SORT should be selected. For high + * volume with low latency to the first result, use a persistent hash + * table on a temporary store. + */ + abstract protected ISortKeyBuilder<?> newSortKeyBuilder( + final IPredicate<?> head); + @SuppressWarnings("unchecked") + public IChunkedOrderedIterator<ISolution> runQuery(final IStep step) + throws Exception { + + if (step == null) + throw new IllegalArgumentException(); + + if(log.isInfoEnabled()) + log.info("program="+step.getName()); + + if(isEmptyProgram(step)) { + + log.warn("Empty program"); + + return (IChunkedOrderedIterator<ISolution>) new EmptyProgramTask( + ActionEnum.Query, step).call(); + + } + + final IChunkedOrderedIterator<ISolution> itr = (IChunkedOrderedIterator<ISolution>) runProgram( + ActionEnum.Query, step); + + if (step.isRule() && ((IRule) step).getQueryOptions().isDistinct()) { + + /* + * Impose a DISTINCT constraint based on the variable bindings + * selected by the head of the rule. The DistinctFilter will be + * backed by a TemporaryStore if more than one chunk of solutions is + * generated. That TemporaryStore will exist on the client where + * this method (runQuery) was executed. The TemporaryStore will be + * finalized and deleted when it is no longer referenced. + */ + + final ISortKeyBuilder<ISolution> sortKeyBuilder; + + if (((IRule) step).getHead() != null + && (solutionFlags & ELEMENT) != 0) { + + /* + * Head exists and elements are requested, so impose DISTINCT + * based on the materialized elements. + */ + + sortKeyBuilder = new DelegateSortKeyBuilder( + newSortKeyBuilder(((IRule) step).getHead())) { + + protected Object resolve(Object solution) { + + return ((ISolution) solution).get(); + + } + + }; + + } else { + + if ((solutionFlags & BINDINGS) != 0) { + + /* + * Bindings were requested so impose DISTINCT based on those + * bindings. + */ + + sortKeyBuilder = new DelegateSortKeyBuilder<ISolution, IBindingSet>( + newBindingSetSortKeyBuilder((IRule) step)) { + + protected IBindingSet resolve(ISolution solution) { + + return solution.getBindingSet(); + + } + + }; + + } else { + + throw new UnsupportedOperationException( + "You must specify BINDINGS since the rule does not have a head: " + + step); + + } + + } + + return new ChunkedConvertingIterator<ISolution, ISolution>(itr, + new DistinctFilter<ISolution>(indexManager) { + + protected byte[] getSortKey(ISolution e) { + + return sortKeyBuilder.getSortKey(e); + + } + + }); + + } + + return itr; + + } + final public long runMutation(final IStep step) throws Exception { if (step == null) @@ -1284,4 +986,101 @@ // // } +// /** +// * Return <code>true</code> if the <i>relationName</i> is on a +// * {@link TempTripleStore} +// * +// * @todo Rather than parsing the relation name, it would be better to have +// * the temporary store UUIDs explicitly declared. +// */ +// protected boolean isTempStore(String relationName) { +// +// /* This is a typical UUID-based temporary store relation name. +// * +// * 1 2 3 +// * 01234567890123456789012345678901234567 +// * 81ad63b9-2172-45dc-bd97-03b63dfe0ba0kb.spo +// */ +// +// if (relationName.length() > 37) { +// +// /* +// * Could be a relation on a temporary store. +// */ +// if ( relationName.charAt( 8) == '-' // +// && relationName.charAt(13) == '-' // +// && relationName.charAt(18) == '-' // +// && relationName.charAt(23) == '-' // +// && relationName.charAt(38) == '.' // +// ) { +// +// /* +// * Pretty certain to be a relation on a temporary store. +// */ +// +// return true; +// +// } +// +// } +// +// return false; +// +// } + +//// /** +//// * A per-relation reentrant read-write lock allows either concurrent readers +//// * or an writer on the unisolated view of a relation. When we use this lock +//// * we also use {@link ITx#UNISOLATED} reads and writes and +//// * {@link #makeWriteSetsVisible()} is a NOP. +//// */ +//// final private static boolean useReentrantReadWriteLockAndUnisolatedReads = true; +// +// public long getReadTimestamp(String relationName) { +// +//// if (useReentrantReadWriteLockAndUnisolatedReads) { +// +//// if (action.isMutation()) { +//// +//// assert readTimestamp == ITx.UNISOLATED : "readTimestamp="+readTimestamp; +//// +//// } +// +// return readTimestamp; +// +//// } else { +//// +//// /* +//// * When the relation is the focusStore choose {@link ITx#UNISOLATED}. +//// * Otherwise choose whatever was specified to the +//// * {@link RDFJoinNexusFactory}. This is because we avoid doing a +//// * commit on the focusStore and instead just its its UNISOLATED +//// * indices. This is more efficient since they are already buffered +//// * and since we can avoid touching disk at all for small data sets. +//// */ +//// +//// if (isTempStore(relationName)) { +//// +//// return ITx.UNISOLATED; +//// +//// } +//// +//// if (lastCommitTime != 0L && action.isMutation()) { +//// +//// /* +//// * Note: This advances the read-behind timestamp for a local +//// * Journal configuration without the ConcurrencyManager (the +//// * only scenario where we do an explicit commit). +//// */ +//// +//// return TimestampUtility.asHistoricalRead(lastCommitTime); +//// +//// } +//// +//// return readTimestamp; +//// +//// } +// +// } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -253,16 +253,6 @@ final protected static transient Logger log = Logger .getLogger(FullTextIndex.class); -// /** -// * True iff the {@link #log} level is INFO or less. -// */ -// final protected static boolean INFO = log.isInfoEnabled(); -// -// /** -// * True iff the {@link #log} level is DEBUG or less. -// */ -// final protected static boolean DEBUG = log.isDebugEnabled(); - /** * The backing index. */ @@ -1376,5 +1366,9 @@ public IKeyOrder getPrimaryKeyOrder() { throw new UnsupportedOperationException(); } - + + public IKeyOrder getKeyOrder(IPredicate p) { + throw new UnsupportedOperationException(); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexus.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexus.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/MockJoinNexus.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -33,17 +33,10 @@ import com.bigdata.bop.Var; import com.bigdata.btree.keys.ISortKeyBuilder; import com.bigdata.journal.IIndexManager; -import com.bigdata.relation.IMutableRelation; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.IStep; import com.bigdata.relation.rule.eval.AbstractJoinNexus; import com.bigdata.relation.rule.eval.IJoinNexus; import com.bigdata.relation.rule.eval.IJoinNexusFactory; -import com.bigdata.relation.rule.eval.ISolution; -import com.bigdata.striterator.IChunkedOrderedIterator; /** * Mock object. @@ -53,9 +46,11 @@ */ class MockJoinNexus extends AbstractJoinNexus implements IJoinNexus { - protected MockJoinNexus(IJoinNexusFactory joinNexusFactory, - IIndexManager indexManager) { + protected MockJoinNexus(final IJoinNexusFactory joinNexusFactory, + final IIndexManager indexManager) { + super(joinNexusFactory, indexManager); + } public IConstant fakeBinding(IPredicate predicate, Var var) { @@ -63,30 +58,15 @@ return null; } - public IAccessPath getTailAccessPath(IRelation relation, IPredicate pred) { - // TODO Auto-generated method stub - return null; - } - - public IRelation getTailRelationView(IPredicate pred) { - // TODO Auto-generated method stub - return null; - } - public ISortKeyBuilder<IBindingSet> newBindingSetSortKeyBuilder(IRule rule) { // TODO Auto-generated method stub return null; } - public IBuffer<ISolution[]> newInsertBuffer(IMutableRelation relation) { + @Override + protected ISortKeyBuilder<?> newSortKeyBuilder(IPredicate<?> head) { // TODO Auto-generated method stub return null; } - - public IChunkedOrderedIterator<ISolution> runQuery(IStep step) - throws Exception { - // TODO Auto-generated method stub - return null; - } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -36,13 +36,10 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; import com.bigdata.btree.IIndex; -import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.IndexMetadata; import com.bigdata.journal.IIndexManager; import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.IMutableRelation; -import com.bigdata.relation.accesspath.AccessPath; -import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.locator.ILocatableResource; import com.bigdata.striterator.AbstractKeyOrder; import com.bigdata.striterator.IChunkedOrderedIterator; @@ -161,6 +158,12 @@ } + public IKeyOrder<E> getKeyOrder(final IPredicate<E> p) { + + return primaryKeyOrder; + + } + /** * Simple insert procedure works fine for a local journal. */ @@ -219,27 +222,4 @@ } - public IAccessPath<E> getAccessPath(final IPredicate<E> predicate) { - - // assume the key order (there is only one) vs looking @ predicate. - final IKeyOrder<E> keyOrder = primaryKeyOrder; - - // get the corresponding index. - final IIndex ndx = getIndex(keyOrder); - - // default flags. - final int flags = IRangeQuery.DEFAULT; - - final AccessPath<E> accessPath = new AccessPath<E>( - this/* relation */, getIndexManager(), getTimestamp(), - predicate, keyOrder, ndx, flags, getChunkOfChunksCapacity(), - getChunkCapacity(), getFullyBufferedReadThreshold()) { - }; - - accessPath.init(); - - return accessPath; - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -224,10 +224,6 @@ * (we can join with an incoming binding set easily enough using * only a single primary index), distincts, selecting only * certain columns, etc. - * - * @todo This is failing because the MockJoinNexus does not have - * the necessary stuff to resolve the relation. Is it time to - * clean IJoinNexus up? */ { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -339,6 +339,11 @@ // TODO Auto-generated method stub return null; } + + public IKeyOrder getKeyOrder(IPredicate p) { + // TODO Auto-generated method stub + return null; + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -2707,5 +2707,14 @@ return lexiconConfiguration; } - + + public IKeyOrder<BigdataValue> getKeyOrder(final IPredicate<BigdataValue> p) { + if (p.get(0/* term */).isConstant()) { + return LexiconKeyOrder.TERM2ID; + } else if (p.get(1/* id */).isConstant()) { + return LexiconKeyOrder.ID2TERM; + } + return null; + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2010-08-20 20:43:44 UTC (rev 3455) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2010-08-21 01:16:12 UTC (rev 3456) @@ -44,7 +44,6 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.Var; -import com.bigdata.btree.keys.DelegateSortKeyBuilder; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.ISortKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; @@ -56,14 +55,12 @@ import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.relation.rule.BindingSetSortKeyBuilder; -import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.spo.SPORelation; import com.bigdata.rdf.spo.SPOSortKeyBuilder; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; -import com.bigdata.relation.RelationFusedView; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBuffer; @@ -75,7 +72,6 @@ import com.bigdata.relation.rule.eval.AbstractJoinNexus; import com.bigdata.relation.rule.eval.AbstractSolutionBuffer; import com.bigdata.relation.rule.eval.ActionEnum; -import com.bigdata.relation.rule.eval.EmptyProgramTask; import com.bigdata.relation.rule.eval.IEvaluationPlanFactory; import com.bigdata.relation.rule.eval.IJoinNexus; import com.bigdata.relation.rule.eval.IRangeCountFactory; @@ -83,10 +79,7 @@ import com.bigdata.relation.rule.eval.IRuleStatisticsFactory; import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.relation.rule.eval.RuleStats; -import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.ChunkedConvertingIterator; -import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkedIterator; import com.bigdata.striterator.IChunkedOrderedIterator; @@ -126,9 +119,6 @@ * thread-safe and that is designed to store a single chunk of elements, e.g., * in an array E[N]). * - * @todo add an {@link IBinding... [truncated message content] |
From: <tho...@us...> - 2010-08-27 14:28:13
|
Revision: 3466 http://bigdata.svn.sourceforge.net/bigdata/?rev=3466&view=rev Author: thompsonbry Date: 2010-08-27 14:28:04 +0000 (Fri, 27 Aug 2010) Log Message: ----------- Working through evaluation control for joins against the query engine. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpList.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/QuoteOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bop-notes.txt branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NE.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NEConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/OR.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/SampleLocalBTree.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/SampleLocalShard.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/AbstractJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/IJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/ProgramTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/E.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/EComparator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/AbstractRuleTestCase.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/eval/TestDefaultEvaluationPlan.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/OwlSameAsPropertiesExpandingIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineEQ.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineGT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineLT.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/InlineNE.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/IRISUtils.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RejectAnythingSameAsItself.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOStarJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/spo/TestSPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/bench/NanoSparqlServer.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadBOpIdTypeException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/DuplicateBOpIdException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/NoSuchBOpException.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineStartOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/UnsyncLocalOutputBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BufferService.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/Haltable.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryClient.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryPeer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/ManagedBufferService.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MapBindingSetsOverShards.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/PipelineDelayOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/ReceiveBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestCase3.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestHaltable.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestReceiveBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestReceiveFile.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/IJoinMaster.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/JoinStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin2.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -1,240 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 16, 2010 - */ - -package com.bigdata.bop; - -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import com.bigdata.bop.constraint.EQ; - -/** - * Abstract base class for {@link BOp}s. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -abstract public class AbstractBOp implements BOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * The argument values. - * <p> - * Note: This field is reported out as a {@link List} so we can make it - * thread safe and, if desired, immutable. However, it is internally a - * simple array and exposed to subclasses so they can benefit from fast - * positional access to the arguments in operations which would otherwise - * become hot, such as {@link EQ#accept(IBindingSet)}. - * <p> - * If we allow mutation of the arguments then caching of the arguments (or - * annotations) by classes such as {@link EQ} will cause {@link #clone()} to - * fail because (a) it will do a field-by-field copy on the concrete - * implementation class; and (b) it will not consistently update the cached - * references. In order to "fix" this problem, any classes which cache - * arguments or annotations would have to explicitly overrides - * {@link #clone()} in order to set those fields based on the arguments on - * the cloned {@link AbstractBOp} class. - */ - protected final BOp[] args; - - /** - * The operator annotations. - */ - protected final Map<String,Object> annotations; - - /** - * Check the operator argument. - * - * @param args - * The arguments. - * - * @throws IllegalArgumentException - * if the arguments are not valid for the operator. - */ - protected void checkArgs(final Object[] args) { - - } - - /** - * Deep copy clone semantics for {@link #args} and {@link #annotations}. - * <p> - * {@inheritDoc} - * - * @todo This will deep copy {@link BOp} structures but does not do a deep - * copy of other kinds of embedded structures. - * - * FIXME Object#clone() is copying the reference to the {@link #args} - * [] rather than allocating a new array. Likewise, it is copying the - * reference to the {@link #annotations} {@link Map} references. I am - * working on a deep copy constructor (below). clone() will have to be - * modified to use the deep copy constructor, which means resolving - * the right constructor by reflection given the specific {@link BOp} - * class -or- implementing clone() on each concrete Bop class and - * having it apply the deep copy constructor for itself. - */ - public AbstractBOp clone() { - try { - final AbstractBOp tmp = (AbstractBOp) super.clone(); - // deep copy the arguments. - { - final int arity = arity(); - for (int i = 0; i < arity; i++) { - tmp.args[i] = (BOp) (args[i].clone()); - } - } - // deep copy the annotations. - { - final Iterator<Map.Entry<String, Object>> itr = annotations - .entrySet().iterator(); - while (itr.hasNext()) { - final Map.Entry<String, Object> e = itr.next(); - if (e.getValue() instanceof BOp) { - tmp.annotations.put(e.getKey(), ((BOp) e.getValue()) - .clone()); - } - } - } - return tmp; - } catch (CloneNotSupportedException e) { - throw new AssertionError(e); - } - } - - /** - * Deep copy constructor. - * - * @param op - * - * @todo This will deep copy {@link BOp} structures (both operands and - * annotations) but does not do a deep copy of other kinds of embedded - * structures. - */ - protected AbstractBOp(final AbstractBOp op) { - args = new BOp[op.args.length]; - for (int i = 0; i < args.length; i++) { - args[i] = op.args[i].clone(); - } - annotations = new LinkedHashMap<String, Object>(op.annotations.size()); - // deep copy the annotations. - { - final Iterator<Map.Entry<String, Object>> itr = op.annotations - .entrySet().iterator(); - while (itr.hasNext()) { - final Map.Entry<String, Object> e = itr.next(); - if (e.getValue() instanceof BOp) { - annotations.put(e.getKey(), ((BOp) e.getValue()).clone()); - } - } - } - } - - /** - * @param args - * The arguments to the operator. - */ - protected AbstractBOp(final BOp[] args) { - - this(args, null/* annotations */); - - } - - /** - * @param args - * The arguments to the operator. - * @param annotations - * The annotations for the operator (optional). - */ - protected AbstractBOp(final BOp[] args, - final Map<String, Object> annotations) { - - if (args == null) - throw new IllegalArgumentException(); - - checkArgs(args); - - this.args = args; - - this.annotations = (annotations == null ? new LinkedHashMap<String, Object>() - : annotations); - - } - - final public Map<String, Object> annotations() { - - return Collections.unmodifiableMap(annotations); - - } - - public BOp get(final int index) { - - return args[index]; - - } - - public int arity() { - - return args.length; - - } - - final public List<BOp> args() { - - return Collections.unmodifiableList(Arrays.asList(args)); - - } - - /** - * Return the value of the named annotation. - * - * @param name - * The name of the annotation. - * @param defaultValue - * The default value. - * @return The annotation value -or- the <i>defaultValue</i> if the - * annotation was not bound. - * @param <T> - * The generic type of the annotation value. - */ - @SuppressWarnings("unchecked") - public <T> T getProperty(final String name, final T defaultValue) { - - if (!annotations.containsKey(name)) - return defaultValue; - - return (T) annotations.get(name); - - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -30,11 +30,13 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import com.bigdata.bop.ap.Predicate; + /** * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -abstract public class AbstractChunkedOrderedIteratorOp<E> extends AbstractBOp +abstract public class AbstractChunkedOrderedIteratorOp<E> extends BOpBase implements ChunkedOrderedIteratorOp<E> { /** @@ -54,6 +56,8 @@ } /** + * Required shallow copy constructor. + * * @param args * @param annotations */ @@ -64,6 +68,16 @@ } + /** + * Required deep copy constructor. + * + * @param op + */ + public AbstractChunkedOrderedIteratorOp( + final AbstractChunkedOrderedIteratorOp<E> op) { + super(op); + } + protected int getChunkCapacity() { return getProperty(Annotations.CHUNK_CAPACITY, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -30,6 +30,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import com.bigdata.bop.engine.BOpStats; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -37,7 +38,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -abstract public class AbstractPipelineOp<E> extends AbstractBOp implements +abstract public class AbstractPipelineOp<E> extends BOpBase implements PipelineOp<E> { /** @@ -45,11 +46,24 @@ */ private static final long serialVersionUID = 1L; - public interface Annotations extends BindingSetPipelineOp.Annotations { + public interface Annotations extends PipelineOp.Annotations { } - + /** + * Deep copy constructor. + * + * @param op + */ + protected AbstractPipelineOp(final AbstractPipelineOp<E> op) { + + super(op); + + } + + /** + * Shallow copy constructor. + * * @param args * @param annotations */ @@ -60,21 +74,21 @@ } - protected int getChunkCapacity() { + public int getChunkCapacity() { return getProperty(Annotations.CHUNK_CAPACITY, Annotations.DEFAULT_CHUNK_CAPACITY); } - protected int getChunkOfChunksCapacity() { + public int getChunkOfChunksCapacity() { return getProperty(Annotations.CHUNK_OF_CHUNKS_CAPACITY, Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); } - protected long getChunkTimeout() { + public long getChunkTimeout() { return getProperty(Annotations.CHUNK_TIMEOUT, Annotations.DEFAULT_CHUNK_TIMEOUT); @@ -86,6 +100,12 @@ */ protected static transient final TimeUnit chunkTimeoutUnit = TimeUnit.MILLISECONDS; + public BOpStats newStats() { + + return new BOpStats(); + + } + public IBlockingBuffer<E[]> newBuffer() { return new BlockingBuffer<E[]>(getChunkOfChunksCapacity(), Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -418,7 +418,7 @@ } - public boolean equals(IBindingSet o) { + public boolean equals(final IBindingSet o) { if (o == this) return true; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -45,8 +45,10 @@ * set of variables which are selected by a join or distributed hash table), or * those shared by all operators (such as a cost model). * <p> - * Operators are immutable, {@link Serializable} to facilitate distributed - * computing, and {@link Cloneable} to facilitate non-destructive tree rewrites. + * Operators are effectively immutable (mutation APIs always return a deep copy + * of the operator to which the mutation has been applied), {@link Serializable} + * to facilitate distributed computing, and {@link Cloneable} to facilitate + * non-destructive tree rewrites. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -73,12 +75,48 @@ */ List<BOp> args(); + /** A shallow copy of the operator's arguments. */ + BOp[] toArray(); + /** + * A shallow copy of the operator's arguments using the generic type of the + * caller's array. If the array has sufficient room, then the arguments are + * copied into the caller's array. If there is space remaining, a + * <code>null</code> is appended to mark the end of the data. + */ + <T> T[] toArray(final T[] a); + + /** * The operator's annotations. */ Map<String,Object> annotations(); /** + * Return the value of the named annotation. + * + * @param name + * The name of the annotation. + * @param defaultValue + * The default value. + * @return The annotation value -or- the <i>defaultValue</i> if the + * annotation was not bound. + * @param <T> + * The generic type of the annotation value. + */ + <T> T getProperty(final String name, final T defaultValue); + + /** + * Return the value of the named annotation. + * + * @param name + * The name of the annotation. + * + * @return The value of the annotation and <code>null</code> if the + * annotation is not bound. + */ + Object getProperty(final String name); + + /** * Deep copy clone of the operator. */ BOp clone(); @@ -87,13 +125,14 @@ * Interface declaring well known annotations. */ public interface Annotations { - - /** - * The unique identifier for a query. This is used to collect all - * runtime state for a query within the session on a node. - */ - String QUERY_ID = "queryId"; +// /** +// * A cross reference to the query identifier. This is required on +// * operators which associate distributed state with a query. [We can +// * probably get this from the evaluation context.] +// */ +// String QUERY_REF = "queryRef"; + /** * The unique identifier within a query for a specific {@link BOp}. The * {@link #QUERY_ID} and the {@link #BOP_ID} together provide a unique Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java (from rev 3455, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractBOp.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -0,0 +1,308 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 16, 2010 + */ + +package com.bigdata.bop; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import com.bigdata.bop.constraint.EQ; + +/** + * Abstract base class for {@link BOp}s. + * <p> + * <h2>Constructor patterns</h2> + * <p> + * {@link BOp}s should define the following public constructors + * <dl> + * <dt>{@link #AbstractBOp(BOp[], Map)}</dt> + * <dd>A shallow copy constructor. This is used when initializing a {@link BOp} + * from the caller's data or when generated a query plan from Prolog. There are + * some exceptions to this rule. For example, {@link Constant} does not define a + * shallow copy constructor because that would not provide a means to set the + * constant's value.</dd> + * <dt>{@link #AbstractBOp(BOp[])}</dt> + * <dd>A deep copy constructor. Mutation methods make a deep copy of the + * {@link BOp}, apply the mutation to the copy, and then return the copy. This + * is the "effectively immutable" contract. Again, there are some exceptions. + * For example, {@link Var} provides a canonicalized mapping such that reference + * tests may be used to determine if two {@link Var}s are the same. In order to + * support that contract it overrides {@link Var#clone()}.</dd> + * </dl> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class BOpBase implements BOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * The argument values - <strong>direct access to this field is + * discouraged</strong> - the field is protected to support + * <em>mutation</em> APIs and should not be relied on for other purposes. + * <p> + * Note: This field is reported out as a {@link List} so we can make it + * thread safe and, if desired, immutable. However, it is internally a + * simple array and exposed to subclasses so they can implement mutation + * operations which return deep copies in which the argument values have + * been modified. + * <p> + * If we allow mutation of the arguments then caching of the arguments (or + * annotations) by classes such as {@link EQ} will cause {@link #clone()} to + * fail because (a) it will do a field-by-field copy on the concrete + * implementation class; and (b) it will not consistently update the cached + * references. In order to "fix" this problem, any classes which cache + * arguments or annotations would have to explicitly overrides + * {@link #clone()} in order to set those fields based on the arguments on + * the cloned {@link BOpBase} class. + */ + protected final BOp[] args; + + /** + * The operator annotations. + */ + protected final Map<String,Object> annotations; + + /** + * Check the operator argument. + * + * @param args + * The arguments. + * + * @throws IllegalArgumentException + * if the arguments are not valid for the operator. + */ + protected void checkArgs(final Object[] args) { + + } + + /** + * Deep copy clone semantics for {@link #args} and {@link #annotations}. + * <p> + * {@inheritDoc} + */ + public BOpBase clone() { + final Class<? extends BOpBase> cls = getClass(); + final Constructor<? extends BOpBase> ctor; + try { + ctor = cls.getConstructor(new Class[] { cls }); + return ctor.newInstance(new Object[] { this }); + } catch (SecurityException e) { + throw new RuntimeException(e); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } catch (InstantiationException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } catch (InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + /** + * Each {@link BOp} MUST implement a public copy constructor with the + * signature: + * + * <pre> + * public Foo(Foo) + * </pre> + * + * This construct is invoked by {@link #clone()} using reflection and is + * responsible for the deep copy semantics for the {@link BOp}. + * <p> + * The default implementation makes a deep copy of {@link #args()} and + * {@link #annotations()} but DOES NOT perform field-by-field copying. + * Subclasses may simply delegate the constructor to their super class + * unless they have additional fields which need to be copied. + * <p> + * This design pattern was selected because it preserves the immutable + * contract of the {@link BOp} which gives us our thread safety and + * visibility guarantees. Since the deep copy is realized by the {@link BOp} + * implementation classes, it is important that each class take + * responsibility for the deep copy semantics of any fields it may declare. + * + * @param op + * A deep copy will be made of this {@link BOp}. + * + * @throws NullPointerException + * if the argument is <code>null</code>. + */ + protected BOpBase(final BOpBase op) { + // deep copy the arguments. + args = deepCopy(op.args); + // deep copy the annotations. + annotations = deepCopy(op.annotations); + } + + /** + * @param args + * The arguments to the operator. + */ + protected BOpBase(final BOp[] args) { + + this(args, null/* annotations */); + + } + + /** + * @param args + * The arguments to the operator. + * @param annotations + * The annotations for the operator (optional). + */ + protected BOpBase(final BOp[] args, + final Map<String, Object> annotations) { + + if (args == null) + throw new IllegalArgumentException(); + + checkArgs(args); + + this.args = args; + + this.annotations = (annotations == null ? new LinkedHashMap<String, Object>() + : annotations); + + } + + final public Map<String, Object> annotations() { + + return Collections.unmodifiableMap(annotations); + + } + + public BOp get(final int index) { + + return args[index]; + + } + + public int arity() { + + return args.length; + + } + + final public List<BOp> args() { + + return Collections.unmodifiableList(Arrays.asList(args)); + + } + + // shallow copy + public BOp[] toArray() { + + final BOp[] a = new BOp[args.length]; + + return Arrays.copyOf(args, args.length, a.getClass()); + + } + + // shallow copy + @SuppressWarnings("unchecked") + public <T> T[] toArray(final T[] a) { + if (a.length < args.length) + return (T[]) Arrays.copyOf(args, args.length, a.getClass()); + System.arraycopy(args, 0, a, 0, args.length); + if (a.length > args.length) + a[args.length] = null; + return a; + } + + /** deep copy the arguments. */ + static private BOp[] deepCopy(final BOp[] a) { + final BOp[] t = new BOp[a.length]; + for (int i = 0; i < a.length; i++) { + t[i] = a[i] == null ? null : a[i].clone(); + } + return t; + } + + /** + * Deep copy the annotations. + * <p> + * Note: This does not know how to deep copy annotations which are not + * {@link BOp}s or immutable objects such as {@link String}s or + * {@link Number}s. Such objects should not be used as annotations. + * + * @todo When attaching large data sets to a query plan they should be + * attached using a light weight reference object which allows them to + * be demanded by a node so deep copy remains a light weight + * operation. This also has the advantage that the objects are + * materialized on a node only when they are needed, which keeps the + * query plan small. Examples would be sending a temporary graph + * containing an ontology or some conditional assertions with a query + * plan. + */ + static private Map<String,Object> deepCopy(final Map<String,Object> a) { + // allocate map. + final Map<String, Object> t = new LinkedHashMap<String, Object>(a + .size()); + // copy map's entries. + final Iterator<Map.Entry<String, Object>> itr = a.entrySet().iterator(); + while (itr.hasNext()) { + final Map.Entry<String, Object> e = itr.next(); + if (e.getValue() instanceof BOp) { + // deep copy bop annotations. + t.put(e.getKey(), ((BOp) e.getValue()).clone()); + } else { + // shallow copy anything else. + t.put(e.getKey(), e.getValue()); + } + } + // return the copy. + return t; + } + + @SuppressWarnings("unchecked") + public <T> T getProperty(final String name, final T defaultValue) { + + if (!annotations.containsKey(name)) + return defaultValue; + + return (T) annotations.get(name); + + } + + public Object getProperty(final String name) { + + return annotations.get(name); + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -0,0 +1,434 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Aug 26, 2010 + */ +package com.bigdata.bop; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.btree.ILocalBTreeView; +import com.bigdata.journal.IIndexManager; +import com.bigdata.journal.ITx; +import com.bigdata.journal.TimestampUtility; +import com.bigdata.relation.AbstractRelation; +import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.locator.IResourceLocator; +import com.bigdata.relation.rule.IRule; +import com.bigdata.relation.rule.eval.IJoinNexus; +import com.bigdata.service.IBigdataFederation; + +/** + * The evaluation context for the operator (NOT serializable). + * + * @param <E> + * The generic type of the objects processed by the operator. + * + * @todo Make it easy to obtain another {@link BOpContext} in which the source + * or sink are different? E.g., for the evaluation of the right operand in + * a join. + */ +public class BOpContext<E> { + + static private final Logger log = Logger.getLogger(BOpContext.class); + + private final IBigdataFederation<?> fed; + + private final IIndexManager indexManager; + + private final long readTimestamp; + + private final long writeTimestamp; + + private final int partitionId; + + private final BOpStats stats; + + private final IAsynchronousIterator<E[]> source; + + private final IBlockingBuffer<E[]> sink; + + private final IBlockingBuffer<E[]> sink2; + + /** + * The {@link IBigdataFederation} IFF the operator is being evaluated on an + * {@link IBigdataFederation}. When evaluating operations against an + * {@link IBigdataFederation}, this reference provides access to the + * scale-out view of the indices and to other bigdata services. + */ + public IBigdataFederation<?> getFederation() { + return fed; + } + + /** + * The <strong>local</strong> {@link IIndexManager}. Query evaluation occurs + * against the local indices. In scale-out, query evaluation proceeds shard + * wise and this {@link IIndexManager} MUST be able to read on the + * {@link ILocalBTreeView}. + */ + public final IIndexManager getIndexManager() { + return indexManager; + } + + /** + * The timestamp or transaction identifier against which the query is + * reading. + */ + public final long getReadTimestamp() { + return readTimestamp; + } + + /** + * The timestamp or transaction identifier against which the query is + * writing. + */ + public final long getWriteTimestamp() { + return writeTimestamp; + } + + /** + * The index partition identifier -or- <code>-1</code> if the index is not + * sharded. + */ + public final int getPartitionId() { + return partitionId; + } + + /** + * The object used to collect statistics about the evaluation of this + * operator. + */ + public final BOpStats getStats() { + return stats; + } + + /** + * Where to read the data to be consumed by the operator. + */ + public final IAsynchronousIterator<E[]> getSource() { + return source; + } + + /** + * Where to write the output of the operator. + */ + public final IBlockingBuffer<E[]> getSink() { + return sink; + } + + /** + * Optional alternative sink for the output of the operator. This is used by + * things like SPARQL optional joins to route failed joins outside of the + * join group. + */ + public final IBlockingBuffer<E[]> getSink2() { + return sink2; + } + + /** + * + * @param fed + * The {@link IBigdataFederation} IFF the operator is being + * evaluated on an {@link IBigdataFederation}. When evaluating + * operations against an {@link IBigdataFederation}, this + * reference provides access to the scale-out view of the indices + * and to other bigdata services. + * @param indexManager + * The <strong>local</strong> {@link IIndexManager}. Query + * evaluation occurs against the local indices. In scale-out, + * query evaluation proceeds shard wise and this + * {@link IIndexManager} MUST be able to read on the + * {@link ILocalBTreeView}. + * @param readTimestamp + * The timestamp or transaction identifier against which the + * query is reading. + * @param writeTimestamp + * The timestamp or transaction identifier against which the + * query is writing. + * @param partitionId + * The index partition identifier -or- <code>-1</code> if the + * index is not sharded. + * @param stats + * The object used to collect statistics about the evaluation of + * this operator. + * @param source + * Where to read the data to be consumed by the operator. + * @param sink + * Where to write the output of the operator. + * @param sink2 + * Alternative sink for the output of the operator (optional). + * This is used by things like SPARQL optional joins to route + * failed joins outside of the join group. + * + * @throws IllegalArgumentException + * if the <i>indexManager</i> is <code>null</code> + * @throws IllegalArgumentException + * if the <i>readTimestamp</i> is {@link ITx#UNISOLATED} + * (queries may not read on the unisolated indices). + * @throws IllegalArgumentException + * if the <i>writeTimestamp</i> is neither + * {@link ITx#UNISOLATED} nor a read-write transaction + * identifier. + * @throws IllegalArgumentException + * if the <i>stats</i> is <code>null</code> + * @throws IllegalArgumentException + * if the <i>source</i> is <code>null</code> (use an empty + * source if the source will be ignored). + * @throws IllegalArgumentException + * if the <i>sink</i> is <code>null</code> + */ + public BOpContext(final IBigdataFederation<?> fed, + final IIndexManager indexManager, final long readTimestamp, + final long writeTimestamp, final int partitionId, + final BOpStats stats, final IAsynchronousIterator<E[]> source, + final IBlockingBuffer<E[]> sink, final IBlockingBuffer<E[]> sink2) { + if (indexManager == null) + throw new IllegalArgumentException(); + if (readTimestamp == ITx.UNISOLATED) + throw new IllegalArgumentException(); + if (TimestampUtility.isReadOnly(writeTimestamp)) + throw new IllegalArgumentException(); + if (stats == null) + throw new IllegalArgumentException(); + if (source == null) + throw new IllegalArgumentException(); + if (sink == null) + throw new IllegalArgumentException(); + this.fed = fed; // may be null + this.indexManager = indexManager; + this.readTimestamp = readTimestamp; + this.writeTimestamp = writeTimestamp; + this.partitionId = partitionId; + this.stats = stats; + this.source = source; + this.sink = sink; + this.sink2 = sink2; // may be null + } + + /** + * Locate and return the view of the relation(s) identified by the + * {@link IPredicate}. + * <p> + * Note: This method is responsible for returning a fused view when more + * than one relation name was specified for the {@link IPredicate}. It + * SHOULD be used whenever the {@link IRelation} is selected based on a + * predicate in the tail of an {@link IRule} and could therefore be a fused + * view of more than one relation instance. (The head of the {@link IRule} + * must be a simple {@link IRelation} and not a view.) + * <p> + * Note: The implementation should choose the read timestamp for each + * relation in the view using {@link #getReadTimestamp(String)}. + * + * @param pred + * The {@link IPredicate}, which MUST be a tail from some + * {@link IRule}. + * + * @return The {@link IRelation}. + * + * @todo Replaces {@link IJoinNexus#getTailRelationView(IPredicate)}. In + * order to support mutation operator we will also have to pass in the + * {@link #writeTimestamp} or differentiate this in the method name. + */ + public IRelation getReadRelation(final IPredicate<?> pred) { + + return (IRelation) getIndexManager().getResourceLocator().locate( + pred.getOnlyRelationName(), getReadTimestamp()); + + } + + /** + /** + * Obtain an access path reading from relation for the specified predicate + * (from the tail of some rule). + * <p> + * Note that passing in the {@link IRelation} is important since it + * otherwise must be discovered using the {@link IResourceLocator}. By + * requiring the caller to resolve it before hand and pass it into this + * method the contention and demand on the {@link IResourceLocator} cache is + * reduced. + * + * @param relation + * The relation. + * @param pred + * The predicate. When {@link IPredicate#getPartitionId()} is + * set, the returned {@link IAccessPath} MUST read on the + * identified local index partition (directly, not via RMI). + * + * @return The access path. + * + * @todo replaces {@link IJoinNexus#getTailAccessPath(IRelation, IPredicate)}. + */ + @SuppressWarnings("unchecked") + public IAccessPath<?> getAccessPath(final IRelation<?> relation, + final IPredicate<?> predicate) { + + if (predicate.getPartitionId() != -1) { + + /* + * Note: This handles a read against a local index partition. For + * scale-out, the [indexManager] will be the data service's local + * index manager. + * + * Note: Expanders ARE NOT applied in this code path. Expanders + * require a total view of the relation, which is not available + * during scale-out pipeline joins. Likewise, the [backchain] + * property will be ignored since it is handled by an expander. + */ + + return ((AbstractRelation<?>) relation) + .getAccessPathForIndexPartition(indexManager, + (IPredicate) predicate); + + } + + // Find the best access path for the predicate for that relation. + final IAccessPath<?> accessPath = relation + .getAccessPath((IPredicate) predicate); + + /* + * @todo No expander's for bops, at least not right now. They could be + * added in easily enough, which would support additional features for + * standalone query evaluation (runtime materialization of some + * entailments). + */ + // final ISolutionExpander expander = predicate.getSolutionExpander(); + // + // if (expander != null) { + // + // // allow the predicate to wrap the access path + // accessPath = expander.getAccessPath(accessPath); + // + // } + + // return that access path. + return accessPath; + } + + /** + * Binds variables from a visited element. + * <p> + * Note: The bindings are propagated before the constraints are verified so + * this method will have a side-effect on the bindings even if the + * constraints were not satisfied. Therefore you should clone the bindings + * before calling this method. + * + * @param pred + * The {@link IPredicate} from which the element was read. + * @param constraint + * A constraint which must be satisfied (optional). + * @param e + * An element materialized by the {@link IAccessPath} for that + * {@link IPredicate}. + * @param bindingSet + * the bindings to which new bindings from the element will be + * applied. + * + * @return <code>true</code> unless the new bindings would violate any of + * the optional {@link IConstraint}. + * + * @throws NullPointerException + * if an argument is <code>null</code>. + */ + final public boolean bind(final IPredicate<?> pred, + final IConstraint[] constraints, final Object e, + final IBindingSet bindings) { + + // propagate bindings from the visited object into the binding set. + copyValues((IElement) e, pred, bindings); + + if (constraints != null) { + + // verify constraint. + return isConsistent(constraints, bindings); + + } + + // no constraint. + return true; + + } + + /** + * Check constraints. + * + * @param constraints + * @param bindingSet + * + * @return <code>true</code> iff the constraints are satisfied. + */ + private boolean isConsistent(final IConstraint[] constraints, + final IBindingSet bindingSet) { + + for (int i = 0; i < constraints.length; i++) { + + final IConstraint constraint = constraints[i]; + + if (!constraint.accept(bindingSet)) { + + if (log.isDebugEnabled()) { + + log.debug("Rejected by " + + constraint.getClass().getSimpleName() + " : " + + bindingSet); + + } + + return false; + + } + + } + + return true; + + } + + @SuppressWarnings("unchecked") + final private void copyValues(final IElement e, final IPredicate<?> pred, + final IBindingSet bindingSet) { + + for (int i = 0; i < pred.arity(); i++) { + + final IVariableOrConstant<?> t = pred.get(i); + + if (t.isVar()) { + + final IVariable<?> var = (IVariable<?>) t; + + final Constant<?> newval = new Constant(e.get(i)); + + bindingSet.set(var, newval); + + } + + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpList.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpList.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpList.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -27,7 +27,6 @@ package com.bigdata.bop; -import java.util.Arrays; import java.util.Map; /** @@ -36,35 +35,31 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class BOpList extends AbstractBOp { +public class BOpList extends BOpBase { /** * */ private static final long serialVersionUID = 1L; - public BOpList(final BOp[] args) { - super(args); + /** + * Deep copy constructor. + */ + public BOpList(final BOpList op) { + super(op); } + /** + * Shallow copy constructor. + */ public BOpList(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); } - public BOp[] toArray() { - return args.clone(); + public BOpList(final BOp[] args) { + super(args); } - @SuppressWarnings("unchecked") - public <T> T[] toArray(final T[] a) { - if (a.length < args.length) - return (T[]) Arrays.copyOf(args, args.length, a.getClass()); - System.arraycopy(args, 0, a, 0, args.length); - if (a.length > args.length) - a[args.length] = null; - return a; - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-08-27 13:54:57 UTC (rev 3465) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-08-27 14:28:04 UTC (rev 3466) @@ -27,11 +27,16 @@ package com.bigdata.bop; -import java.util.Arrays; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import com.bigdata.bop.BOp.Annotations; +import com.bigdata.btree.AbstractNode; +import cutthecrap.utils.striterators.Expander; import cutthecrap.utils.striterators.Filter; +import cutthecrap.utils.striterators.SingleValueIterator; import cutthecrap.utils.striterators.Striterator; /** @@ -39,26 +44,277 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo In general recursive traversal iterators do not protect against loops + * in the operator tree, but see {@link #getIndex(BOp)}. */ public class BOpUtility { /** - * Return all variables recursively present whether in the operator tree or - * on {@link IConstraint}s attached to operators. + * Pre-order recursive visitation of the operator tree (arguments only, no + * annotations). */ @SuppressWarnings("unchecked") - public static Iterator<IVariable<?>> getSpannedVariables(final BOp op) { + public static Iterator<BOp> preOrderIterator(final BOp op) { - return new Striterator(getArgumentVariables(op)).append(new Striterator(op - .annotations().values().iterator()).addFilter(new Filter() { + return new Striterator(new SingleValueIterator(op)) + .append(preOrderIterator2(op)); + + } + + /** + * Visits the children (recursively) using pre-order traversal, but does + * NOT visit this node. + */ + @SuppressWarnings("unchecked") + static private Iterator<AbstractNode> preOrderIterator2(final BOp op) { + + /* + * Iterator visits the direct children, expanding them in turn with a + * recursive application of the pre-order iterator. + */ + + return new Striterator(op.args().iterator()).addFilter(new Expander() { + private static final long serialVersionUID = 1L; + /* + * Expand each child in turn. + */ + protected Iterator expand(final Object childObj) { + + /* + * A child of this node. + */ + + final BOp child = (BOp) childObj; + + if (child.arity() > 0) { + + /* + * The child is a Node (has children). + * + * Visit the children (recursive pre-order traversal). + */ + + final Striterator itr = new Striterator( + new SingleValueIterator(child)); + + // append this node in post-order position. + itr.append(preOrderIterator2(child)); + + return itr; + + } else { + + /* + * The child is a leaf. + */ + + // Visit the leaf itself. + return new SingleValueIterator(child); + + } + } + }); + + } + + /** + * Post-order recursive visitation of the operator tree (arguments only, no + * annotations). + */ + @SuppressWarnings("unchecked") + public static Iterator<BOp> postOrderIterator(final BOp op) { + + return new Striterator(postOrderIterator2(op)) + .append(new SingleValueIterator(op)); + + } + + /** + * Visits the children (recursively) using post-order traversal, but does + * NOT visit this node. + */ + @SuppressWarnings("unchecked") + static private Iterator<AbstractNode> postOrderIterator2(final BOp op) { + + /* + * Iterator visits the direct children, expanding them in turn with a + * recursive application of the post-order iterator. + */ + + return new Striterator(op.args().iterator()).addFilter(new Expander() { + + private static final long serialVersionUID = 1L; + + /* + * Expand each child in turn. + */ + protected Iterator expand(final Object childObj) { + + /* + * A child of this node. + */ + + final BOp child = (BOp) childObj; + + if (child.arity() > 0) { + + /* + * The child is a Node (has children). + * + * Visit the children (recursive post-order traversal). + */ + + final Striterator itr = new Striterator( + postOrderIterator2(child)); + + // append this node in post-order position. + itr.append(new SingleValueIterator(child)); + + return itr; + + } else { + + /* + * The child is a leaf. + */ + + // Visit the leaf itself. + return new SingleValueIterator(child); + + } + } + }); + + } + + /** + * Visit all annotations which are {@link BOp}s (non-recursive). + * + * @param op + * An operator. + * + * @return An iterator which visits the {@link BOp} annotations in an + * arbitrary order. + */ + @SuppressWarnings("unchecked") + public static Iterator<BOp> annotationOpIterator(final BOp op) { + + return new Striterator(op.annotations().values().iterator()) + .addFilter(new Filter() { + private static final long serialVersionUID = 1L; + + @Override + protected boolean isValid(Object arg0) { + return arg0 instanceof BOp; + } + }); + + } + +// /** +// * Pre-order traversal of the annotations of the operator which are +// * themselves operators without recursion through the children of the given +// * operator (the children of each annotation are visited, but the +// * annotations of annotations are not). +// * +// * @param op +// * An operator. +// * +// * @return An iterator which visits the pre-order traversal or the operator +// * annotations. +// */ +// @SuppressWarnings("unchecked") +// public static Iterator<BOp> annotationOpPreOrderIterator(final BOp op) { +// +// // visit the node's operator annotations. +// final Striterator itr = new Striterator(annotationOpIterator(op)); +// +// // expand each operator annotation with a pre-order traversal. +// itr.addFilter(new Expander() { +// private static final long serialVersionUID = 1L; +// +// @Override +// protected Iterator<?... [truncated message content] |
From: <tho...@us...> - 2010-08-31 16:21:56
|
Revision: 3472 http://bigdata.svn.sourceforge.net/bigdata/?rev=3472&view=rev Author: thompsonbry Date: 2010-08-31 16:21:47 +0000 (Tue, 31 Aug 2010) Log Message: ----------- Unit tests with zero, one, two join queries are running against the query engine. Folded in updates from mroy for DistinctSPOIterator and DefaultGraphSolutionExpander which reduce the cost of default graph queries when there are a large number of graphs in the "default graph" data set. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryClient.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DefaultGraphSolutionExpander.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/DistinctSPOIterator.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BindingSetChunk.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -141,6 +141,18 @@ */ String BOP_ID = "bopId"; + /** + * The timeout for the operator evaluation (milliseconds). + * + * @see #DEFAULT_TIMEOUT + */ + String TIMEOUT = "timeout"; + + /** + * The default timeout for operator evaluation. + */ + long DEFAULT_TIMEOUT = Long.MAX_VALUE; + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -305,4 +305,24 @@ } + public String toString() { + + final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append("("); + for (int i = 0; i < args.length; i++) { + final BOp t = args[i]; + if (i > 0) + sb.append(','); + sb.append(t.getClass().getSimpleName()); + } + sb.append(")["); + final Integer id = (Integer) annotations.get(Annotations.BOP_ID); + if (id != null) + sb.append("Annotations.BOP_ID=" + id); + sb.append("]"); + return sb.toString(); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -395,4 +395,51 @@ return map; } + /** + * Return the parent of the operator in the operator tree (this does not + * search the annotations, just the children). + * <p> + * Note that {@link Var} is a singleton pattern for each distinct variable + * node, so there can be multiple parents for a {@link Var}. + * + * @param root + * The root of the operator tree (or at least a known ancestor of + * the operator). + * @param op + * The operator. + * + * @return The parent -or- <code>null</code> if <i>op</i> is not found in + * the operator tree. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code>. + */ + static public BOp getParent(final BOp root, final BOp op) { + + if (root == null) + throw new IllegalArgumentException(); + + if (op == null) + throw new IllegalArgumentException(); + + final Iterator<BOp> itr = root.args().iterator(); + + while (itr.hasNext()) { + + final BOp current = itr.next(); + + if (current == op) + return root; + + final BOp found = getParent(current, op); + + if (found != null) + return found; + + } + + return null; + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -107,7 +107,10 @@ * key-range partitions, then the fan-in for the sources may be larger * than one as there will be one {@link JoinTask} for each index partition * touched by each join dimension. - * @todo provide more control over the access path (fully buffered read thresholds). + * + * @todo provide more control over the access path (fully buffered read + * thresholds). + * * @todo Do we need to hook the source and sink {@link Future}s? * * @todo Break the star join logic out into its own join operator. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AccessPath.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -157,6 +157,9 @@ * The maximum <em>limit</em> that is allowed for a fully-buffered read. * The {@link #asynchronousIterator(Iterator)} will always be used above * this limit. + * + * @todo This should probably be close to the branching factor or chunk + * capacity. */ protected static final int MAX_FULLY_BUFFERED_READ_LIMIT = 250000; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties 2010-08-31 16:21:47 UTC (rev 3472) @@ -89,6 +89,7 @@ log4j.logger.com.bigdata.bop=INFO log4j.logger.com.bigdata.bop.engine.QueryEngine=ALL +log4j.logger.com.bigdata.bop.engine.RunningQuery=ALL #log4j.logger.com.bigdata.relation.rule.eval.RuleLog=INFO #log4j.logger.com.bigdata.relation.rule.eval=INFO #log4j.logger.com.bigdata.relation.rule.eval.RuleState=DEBUG Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -417,8 +417,6 @@ /** * Unit test for {@link BOpUtility#getIndex(BOp)}. - * - * @todo test for correct detection of duplicates. */ public void test_getIndex() { @@ -511,4 +509,62 @@ } + /** + * Unit test for {@link BOpUtility#getParent(BOp, BOp)}. + */ + public void test_getParent() { + + final BOp a1 = new BOpBase(new BOp[]{Var.var("a")},NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,1),// + })); + final BOp a2 = new BOpBase(new BOp[]{Var.var("b")},NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,2),// + })); + // Note: [a3] tests recursion (annotations of annotations). + final BOp a3 = new BOpBase(new BOp[] { Var.var("z") , a1}, NV + .asMap( + new NV[] { // + new NV("baz", a2),// + new NV("baz2", "skip")// + }// + )); + + final BOp op2 = new BOpBase(new BOp[] { Var.var("x") , a3 }, NV.asMap(new NV[]{// + new NV("foo1",a1),// + new NV("foo3", "skip"),// + new NV(BOp.Annotations.BOP_ID,3),// + })); + + // root + final BOp root = new BOpBase(new BOp[] { // root args[] + new Constant<String>("12"), Var.var("y"), op2 }, NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID, 4),// + })); + + assertTrue(root == BOpUtility.getParent(root, op2)); + + assertTrue(op2 == BOpUtility.getParent(root, Var.var("x"))); + + assertTrue(op2 == BOpUtility.getParent(root, a3)); + + assertTrue(a3 == BOpUtility.getParent(root, a1)); + + try { + BOpUtility.getParent(null/* root */, op2); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + try { + BOpUtility.getParent(root, null/* op */); + fail("Expecting: " + IllegalArgumentException.class); + } catch (IllegalArgumentException ex) { + if (log.isInfoEnabled()) + log.info("Ignoring expected exception: " + ex); + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -106,7 +106,7 @@ sb.append("{chunksIn=" + chunksIn.estimate_get()); sb.append(",unitsIn=" + unitsIn.estimate_get()); sb.append(",chunksOut=" + chunksOut.estimate_get()); - sb.append(",unitsIn=" + unitsOut.estimate_get()); + sb.append(",unitsOut=" + unitsOut.estimate_get()); toString(sb); sb.append("}"); return sb.toString(); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BindingSetChunk.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BindingSetChunk.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BindingSetChunk.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -0,0 +1,51 @@ +package com.bigdata.bop.engine; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.relation.accesspath.IAsynchronousIterator; + +/** + * A chunk of intermediate results which are ready to be consumed by some + * {@link BOp} in a specific query. + */ +public class BindingSetChunk { + + /** + * The query identifier. + */ + final long queryId; + + /** + * The target {@link BOp}. + */ + final int bopId; + + /** + * The index partition which is being targeted for that {@link BOp}. + */ + final int partitionId; + + /** + * The binding sets to be consumed by that {@link BOp}. + */ + final IAsynchronousIterator<IBindingSet[]> source; + + public BindingSetChunk(final long queryId, final int bopId, + final int partitionId, + final IAsynchronousIterator<IBindingSet[]> source) { + if (source == null) + throw new IllegalArgumentException(); + this.queryId = queryId; + this.bopId = bopId; + this.partitionId = partitionId; + this.source = source; + } + + public String toString() { + + return getClass().getName() + "{queryId=" + queryId + ",bopId=" + bopId + + ",partitionId=" + partitionId + "}"; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BindingSetChunk.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryClient.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryClient.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/IQueryClient.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -5,10 +5,6 @@ import java.util.UUID; import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.striterator.IChunkedIterator; /** * Interface for a client executing queries. @@ -76,30 +72,69 @@ public BOp getQuery(long queryId) throws RemoteException; /** - * Notify the client that execution has started for some query, - * operator, node, and index partition. + * Notify the client that execution has started for some query, operator, + * node, and index partition. * * @param queryId + * The query identifier. * @param opId + * The operator identifier. + * @param partitionId + * The index partition identifier. * @param serviceId - * @param partitionId + * The node on which the operator will execute. + * @param nchunks + * The #of chunks which form the input to that operator (for the + * atomic termination condition decision). */ - public void startOp(long queryId, int opId, UUID serviceId, - int partitionId) throws RemoteException; + public void startOp(long queryId, int opId, int partitionId, UUID serviceId, final int nchunks) + throws RemoteException; /** * Notify the client that execution has halted for some query, operator, - * node and index partition. If execution halted abnormally, then the - * cause is sent as well. + * node and index partition. If execution halted abnormally, then the cause + * is sent as well. * * @param queryId + * The query identifier. * @param opId + * The operator whose execution phase has terminated for a + * specific index partition and input chunk. + * @param partitionId + * The index partition against which the operator was executed. * @param serviceId - * @param partitionId + * The node which executed the operator. * @param cause * <code>null</code> unless execution halted abnormally. + * @param nchunks + * The #of chunks which were output by the operator (for the + * atomic termination decision). This is ONE (1) for scale-up. + * For scale-out, this is one per index partition over which the + * intermediate results were mapped. + * @param taskStats + * The statistics for the execution of that bop on that shard and + * service. */ - public void haltOp(long queryId, int opId, UUID serviceId, - int partitionId, Throwable cause) throws RemoteException; + public void haltOp(long queryId, int opId, int partitionId, UUID serviceId, + Throwable cause, int nchunks, BOpStats taskStats) + throws RemoteException; -} \ No newline at end of file +// /** +// * Notify the query controller that a chunk of intermediate results is +// * available for the query. +// * +// * @param queryId +// * The query identifier. +// */ +// public void addChunk(long queryId) throws RemoteException; +// +// /** +// * Notify the query controller that a chunk of intermediate results was +// * taken for processing by the query. +// * +// * @param queryId +// * The query identifier. +// */ +// public void takeChunk(long queryId) throws RemoteException; + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java 2010-08-30 20:18:18 UTC (rev 3471) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -30,36 +30,23 @@ import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.rmi.RemoteException; -import java.util.Map; import java.util.UUID; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import org.apache.log4j.Logger; import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpUtility; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.NoSuchBOpException; -import com.bigdata.bop.PipelineOp; import com.bigdata.journal.IIndexManager; -import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.resources.ResourceManager; import com.bigdata.service.IBigdataFederation; -import com.bigdata.striterator.ICloseableIterator; /** * A class managing execution of concurrent queries against a local @@ -201,15 +188,13 @@ private final static transient Logger log = Logger .getLogger(QueryEngine.class); -// public static class Config { -// -// public int nIOThreads = 10; -// -// public int maxBuffers = (int) Runtime.getRuntime().maxMemory() / 2 -// / DirectBufferPool.INSTANCE.getBufferCapacity(); -// -// } - + /** + * The {@link IBigdataFederation} iff running in scale-out. + * <p> + * Note: The {@link IBigdataFederation} is required in scale-out in order to + * perform shard locator scans when mapping binding sets across the next + * join in a query plan. + */ private final IBigdataFederation<?> fed; /** @@ -219,7 +204,7 @@ * locks. The {@link QueryEngine} is intended to run only against committed * index views for which no locks are required. */ - private final IIndexManager indexManager; + private final IIndexManager localIndexManager; /** * A service used to expose {@link ByteBuffer}s and managed index resources @@ -260,370 +245,43 @@ // private final ForkJoinPool fjpool; /** - * A chunk of intermediate results which are ready to be consumed by some - * {@link BOp} in a specific query. + * The {@link UUID} of the service in which this {@link QueryEngine} is + * running. + * + * @return The {@link UUID} of the service in which this {@link QueryEngine} + * is running -or- <code>null</code> if the {@link QueryEngine} is + * not running against an {@link IBigdataFederation}. */ - public static class BindingSetChunk { + protected UUID getServiceId() { - /** - * The target {@link BOp}. - */ - final int bopId; - - /** - * The index partition which is being targetted for that {@link BOp}. - */ - final int partitionId; + return fed == null ? null : fed.getServiceUUID(); - /** - * The binding sets to be consumed by that {@link BOp}. - */ - final IAsynchronousIterator<IBindingSet[]> source; - - public BindingSetChunk(final int bopId, final int partitionId, - final IAsynchronousIterator<IBindingSet[]> source) { - if (source == null) - throw new IllegalArgumentException(); - this.bopId = bopId; - this.partitionId = partitionId; - this.source = source; - } - } /** - * Metadata about running queries. - * - * @todo Cache any resources materialized for the query on this node (e.g., - * temporary graphs materialized from a peer or the client). A bop - * should be able to demand those data from the cache and otherwise - * have them be materialized. - * - * @todo metadata for priority queues (e.g., time remaining or priority). - * [metadata about resource allocations is part of the query plan.] - * - * @todo HA aspects of running queries? - * - * @todo Cancelled queries must reject or drop new chunks, etc. Halted - * queries must release all of their resources. + * The {@link IBigdataFederation} iff running in scale-out. + * <p> + * Note: The {@link IBigdataFederation} is required in scale-out in order to + * perform shard locator scans when mapping binding sets across the next + * join in a query plan. */ - public static class RunningQuery<V extends BOpStats> implements Future<V> { - - /** - * The run state of the query and the result of the computation iff it - * completes execution normally (without being interrupted, cancelled, - * etc). - */ - final private Haltable<V> future = new Haltable<V>(); - - /** - * The runtime statistics for the query. - * - * @todo This has to be per-{@link BOp}. - */ - @SuppressWarnings("unchecked") - final private V stats = (V) new BOpStats(); + public IBigdataFederation<?> getFederation() { - /** - * The class executing the query on this node. - */ - final QueryEngine queryEngine; + return fed; - /** The unique identifier for this query. */ - final long queryId; - - /** - * The timestamp or transaction identifier against which the query - * is reading. - */ - final long readTimestamp; - - /** - * The timestamp or transaction identifier against which the query - * is writing. - */ - final long writeTimestamp; + } + + /** + * Access to the indices. + * <p> + * Note: You MUST NOT use unisolated indices without obtaining the necessary + * locks. The {@link QueryEngine} is intended to run only against committed + * index views for which no locks are required. + */ + public IIndexManager getLocalIndexManager() { - /** - * The timestamp when the query was accepted by this node (ns). - * - * @todo add a [deadline] field, which is when the query is due. - */ - final long begin; - - /** - * The client executing this query. - */ - final IQueryClient clientProxy; + return localIndexManager; - /** The query iff materialized on this node. */ - final AtomicReference<BOp> queryRef; - - /** - * The buffer used for the overall output of the query pipeline. - * - * @todo How does the pipeline get attached to this buffer? Via a - * special operator? Or do we just target the coordinating - * {@link QueryEngine} as the sink of the last operator so - * we can use NIO transfers? - */ - final IBlockingBuffer<IBindingSet[]> queryBuffer; - - /** - * A map associating resources with running queries. When a query halts, - * the resources listed in its resource map are released. Resources can - * include {@link ByteBuffer}s backing either incoming or outgoing - * {@link BindingSetChunk}s, temporary files associated with the query, - * hash tables, etc. - * - * @todo only use the values in the map for transient objects, such as a - * hash table which is not backed by the disk. For - * {@link ByteBuffer}s we want to make the references go through - * the {@link BufferService}. For files, through the - * {@link ResourceManager}. - * - * @todo We need to track the resources in use by the query so they can - * be released when the query terminates. This includes: buffers; - * joins for which there is a chunk of binding sets that are - * currently being executed; downstream joins (they depend on the - * source joins to notify them when they are complete in order to - * decide their own termination condition); local hash tables - * which are part of a DHT (especially when they are persistent); - * buffers and disk resources allocated to N-way merge sorts, etc. - * - * @todo The set of buffers having data which has been accepted for this - * query. - * - * @todo The set of buffers having data which has been generated for - * this query. - * - * @todo The counter for each open join of the #of active sources. This - * must be coordinated with the client to decide when a join is - * done. This decision determines when the sinks of the join will - * be notified that a given source is done and hence when the sink - * joins will decide that they are done. [Think this through more - * in terms of the client coordination for optional gotos.] - */ - private final ConcurrentHashMap<UUID, Object> resourceMap = new ConcurrentHashMap<UUID, Object>(); - - /** - * The chunks available for immediate processing. - */ - private final BlockingQueue<BindingSetChunk> chunksIn = new LinkedBlockingDeque<BindingSetChunk>(); - - /** - * The chunks generated by this query. - * - * @todo remove chucks from this queue when they are consumed, whether - * by a local process or by transferring the data to a remote - * service. When the data are transferred from a managed - * {@link ByteBuffer} to a remote service, release the - * {@link ByteBuffer} back to the {@link BufferService}. - */ - private final BlockingQueue<BindingSetChunk> chunksOut = new LinkedBlockingDeque<BindingSetChunk>(); - - /** - * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. - */ - private final Map<Integer,BOp> bopIndex; - - /** - * A collection of the currently executing futures. {@link Future}s are - * added to this collection by {@link #newChunkTask(BindingSetChunk)}. - * They are removed when they are {@link Future#isDone()}. - * {@link Future}s are cancelled if the {@link RunningQuery} is halted. - */ - private final ConcurrentHashMap<Future<?>, Future<?>> futures = new ConcurrentHashMap<Future<?>, Future<?>>(); - - /** - * - * @param queryId - * @param begin - * @param clientProxy - * @param query - * The query (optional). - */ - public RunningQuery(final QueryEngine queryEngine, final long queryId, - final long readTimestamp, final long writeTimestamp, - final long begin, final IQueryClient clientProxy, - final BOp query, - final IBlockingBuffer<IBindingSet[]> queryBuffer) { - this.queryEngine = queryEngine; - this.queryId = queryId; - this.readTimestamp = readTimestamp; - this.writeTimestamp = writeTimestamp; - this.begin = begin; - this.clientProxy = clientProxy; - this.queryRef = new AtomicReference<BOp>(query); - this.queryBuffer = queryBuffer; - this.bopIndex = BOpUtility.getIndex(query); - } - - /** - * Create a {@link BindingSetChunk} from a sink and add it to the queue. - * - * @param sinkId - * @param sink - * - * @todo In scale-out, this is where we need to map the binding sets - * over the shards for the target operator. - */ - private void add(final int sinkId, final IBlockingBuffer<?> sink) { - throw new UnsupportedOperationException(); - } - - /** - * Make a chunk of binding sets available for consumption by the query. - * - * @param chunk - * The chunk. - */ - public void add(final BindingSetChunk chunk) { - if (chunk == null) - throw new IllegalArgumentException(); - future.halted(); - chunksIn.add(chunk); - queryEngine.priorityQueue.add(this); - } - - /** - * Return the current statistics for the query. - */ - public BOpStats getStats() { - return stats; - } - - /** - * Return a {@link FutureTask} which will consume the binding set chunk. - * - * @param chunk - * - * FIXME The chunk task should notice if the {@link Haltable} - * on the {@link RunningQuery} is halted and should terminate - * eagerly. There can be more than one chunk task running at - * a time for the same query and even for the same - * {@link BOp} for a given query. We need to keep those - * {@link Future}s in a collection so we can cancel them if - * the query is halted. - */ - @SuppressWarnings("unchecked") - protected Future<Void> newChunkTask(final BindingSetChunk chunk) { - /* - * Look up the BOp in the index, create the BOpContext for that BOp, - * and return the value returned by BOp.eval(context). - * - * @todo We have to provide for the sink, which can be a backed by - * one, or many, NIO buffers for high volume query and which will be - * just a transient blocking buffer otherwise. - * - * @todo When eval of that chunk is done, the sink gets wrapped as a - * chunk for the next bop (by its bopId) and submitted back to the - * query engine (in scale-out, it gets mapped over the shards or - * nodes). - * - * @todo If eval of the chunk fails, halt() the query. - * - * @todo evaluation of element[] pipelines might run into type - * problems with the [queryBuffer]. - */ - final BOp bop = bopIndex.get(chunk.bopId); - if (bop == null) { - throw new NoSuchBOpException(chunk.bopId); - } - if (!(bop instanceof PipelineOp<?>)) { - throw new UnsupportedOperationException(bop.getClass() - .getName()); - } - // sink - final Integer sinkId = null;// @todo from annotation (it is the parent). - final IBlockingBuffer<?> sink = ((PipelineOp<?>) bop).newBuffer(); - // altSink - final Integer altSinkId = (Integer) bop - .getProperty(BindingSetPipelineOp.Annotations.ALT_SINK_REF); - if (altSinkId != null && !bopIndex.containsKey(altSinkId)) { - throw new NoSuchBOpException(altSinkId); - } - final IBlockingBuffer<?> altSink = altSinkId == null ? null - : ((PipelineOp<?>) bop).newBuffer(); - // context - final BOpContext context = new BOpContext(queryEngine.fed, - queryEngine.indexManager, readTimestamp, writeTimestamp, - chunk.partitionId, stats, chunk.source, sink, altSink); - // FutureTask for operator execution (not running yet). - final FutureTask<Void> f = ((PipelineOp)bop).eval(context); - // Hook the FutureTask. - final Runnable r = new Runnable() { - public void run() { - try { - f.run(); // run - f.get(); // verify success - add(sinkId, sink); // handle output chunk. - if (altSink != null) // handle alt sink output chunk. - add(altSinkId, altSink); - } catch (Throwable t) { - // operator failed on this chunk. - RunningQuery.this - .cancel(true/* mayInterruptIfRunning */); - log.error("queryId=" + queryId + ",bopId=" - + chunk.bopId + ",partitionId=" - + chunk.partitionId + " : " + t); - } - } - }; - // wrap runnable. - final FutureTask<Void> f2 = new FutureTask(r, null/* result */); - // add to list of active futures. - futures.put(f2, f2); - return f; - } - - /** - * Return an iterator which will drain the solutions from the query. The - * query will be cancelled if the iterator is - * {@link ICloseableIterator#close() closed}. - * - * @return - * - * @todo Do all queries produce solutions (mutation operations might - * return a mutation count, but they do not return solutions). - */ - public IAsynchronousIterator<IBindingSet[]> iterator() { - return queryBuffer.iterator(); - } - - /* - * Future - * - * Note: This is implemented using delegation to the Haltable so we can - * hook various methods in order to clean up the state of a completed - * query. - */ - - final public boolean cancel(final boolean mayInterruptIfRunning) { - for (Future<?> f : futures.keySet()) { - f.cancel(mayInterruptIfRunning); - } - return future.cancel(mayInterruptIfRunning); - } - - final public V get() throws InterruptedException, ExecutionException { - return future.get(); - } - - final public V get(long arg0, TimeUnit arg1) - throws InterruptedException, ExecutionException, - TimeoutException { - return future.get(arg0, arg1); - } - - final public boolean isCancelled() { - return future.isCancelled(); - } - - final public boolean isDone() { - return future.isDone(); - } - } /** @@ -638,18 +296,18 @@ * have not yet been demanded. Once we receive notice that a query has been * cancelled it is removed from this collection. * - * @todo if a query is halted, it needs to be removed from this collection. - * - * @todo a race is possible where a query is cancelled on a node where the - * node receives notice to start the query after the cancelled message - * has arrived. to avoid having such queries linger, we should have a - * a concurrent hash set with an approximate LRU policy containing the - * identifiers for queries which have been cancelled, possibly paired - * with the cause (null if normal execution). That will let us handle - * any reasonable concurrent indeterminism between cancel and start - * notices for a query. + * @todo If a query is halted, it needs to be removed from this collection. * <p> - * another way in which this might be addressed in involving the + * However, a race is possible where a query is cancelled on a node + * where the node receives notice to start the query after the + * cancelled message has arrived. to avoid having such queries linger, + * we should have a a concurrent hash set with an approximate LRU + * policy containing the identifiers for queries which have been + * cancelled, possibly paired with the cause (null if normal + * execution). That will let us handle any reasonable concurrent + * indeterminism between cancel and start notices for a query. + * <p> + * Another way in which this might be addressed in involving the * client each time a query start is propagated to a node. if we * notify the client that the query will start on the node first, then * the client can always issue the cancel notices [unless the client @@ -661,13 +319,13 @@ * active queries (their statistics) and administrative operations to * kill a query. */ - private final ConcurrentHashMap<Long/* queryId */, RunningQuery<?>> runningQueries = new ConcurrentHashMap<Long, RunningQuery<?>>(); + final ConcurrentHashMap<Long/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<Long, RunningQuery>(); /** * A priority queue of {@link RunningQuery}s having binding set chunks * available for consumption. */ - private final PriorityBlockingQueue<RunningQuery<?>> priorityQueue = new PriorityBlockingQueue<RunningQuery<?>>(); + final private PriorityBlockingQueue<RunningQuery> priorityQueue = new PriorityBlockingQueue<RunningQuery>(); /** * @@ -676,13 +334,9 @@ * @param indexManager * The <em>local</em> index manager. * @param bufferService - * @param nThreads - * - * @todo nThreads is not used right now since tasks are being run against - * the index manager's executor service. */ public QueryEngine(final IBigdataFederation<?> fed, final IIndexManager indexManager, - final ManagedBufferService bufferService, final int nThreads) { + final ManagedBufferService bufferService) { if (indexManager == null) throw new IllegalArgumentException(); @@ -690,7 +344,7 @@ throw new IllegalArgumentException(); this.fed = fed; // MAY be null. - this.indexManager = indexManager; + this.localIndexManager = indexManager; this.bufferService = bufferService; // this.iopool = new LatchedExecutor(indexManager.getExecutorService(), // nThreads); @@ -712,7 +366,7 @@ if (engineFuture.compareAndSet(null/* expect */, ft)) { - indexManager.getExecutorService().execute(ft); + localIndexManager.getExecutorService().execute(ft); } else { @@ -730,16 +384,35 @@ /** * Volatile flag is set for normal termination. */ - private volatile boolean shutdown = false; - + private volatile boolean shutdown = false; + /** * Runnable submits chunks available for evaluation against running queries. + * + * @todo Handle priority for selective queries based on the time remaining + * until the timeout. + * <p> + * Handle priority for unselective queries based on the order in which + * they are submitted? + * @todo The approach taken by the {@link QueryEngine} executes one task per + * pipeline bop per chunk. Outside of how the tasks are scheduled, + * this corresponds closely to the historical pipeline query + * evaluation. The other difference is that there is less opportunity + * for concatenation of chunks. However, chunk concatenation could be + * performed here if we (a) mark the BindingSetChunk with a flag to + * indicate when it has been accepted; and (b) rip through the + * incoming chunks for the query for the target bop and combine them + * to feed the task. Chunks which have already been assigned would be + * dropped when take() discovers them above. [The chunk combination + * could also be done when we output the chunk if the sink has not + * been taken, e.g., by combining the chunk into the same target + * ByteBuffer, or when we add the chunk to the RunningQuery.] */ private class QueryEngineTask implements Runnable { public void run() { try { while (true) { - final RunningQuery<?> q = priorityQueue.take(); + final RunningQuery q = priorityQueue.take(); if (q.isCancelled()) continue; final BindingSetChunk chunk = q.chunksIn.poll(); @@ -750,39 +423,13 @@ continue; } if (log.isTraceEnabled()) - log.trace("Accepted chunk: queryId=" + q.queryId); + log.trace("Accepted chunk: queryId=" + q.queryId + + ", bopId=" + chunk.bopId); try { - /* - * @todo The approach taken by the {@link QueryEngine} - * executes one task per pipeline bop per chunk. Outside - * of how the tasks are scheduled, this corresponds - * closely to the historical pipeline query evaluation. - * The other difference is that there is less - * opportunity for concatenation of chunks. However, - * chunk concatenation could be performed here if we (a) - * mark the BindingSetChunk with a flag to indicate when - * it has been accepted; and (b) rip through the - * incoming chunks for the query for the target bop and - * combine them to feed the task. Chunks which have - * already been assigned would be dropped when take() - * discovers them above. [The chunk combination could - * also be done when we output the chunk if the sink has - * not been taken, e.g., by combining the chunk into the - * same target ByteBuffer, or when we add the chunk to - * the RunningQuery.] - * - * Note: newChunkTask() returns a Future which is - * already executing against a thread pool. - * - * @todo Do we need to watch the futures? [Yes, since we - * need to mark the RunningQuery as halted if the Future - * reports an error. It would be best to do that using - * a hook on the Future.] - */ - final Future<?> ft = q.newChunkTask(chunk); -// iopool.execute(ft); - if (log.isDebugEnabled()) - log.debug("Running chunk: queryId=" + q.queryId); + // create task. + final FutureTask<?> ft = q.newChunkTask(chunk); + // execute task. + localIndexManager.getExecutorService().execute(ft); } catch (RejectedExecutionException ex) { // shutdown of the pool (should be an unbounded pool). log.warn("Dropping chunk: queryId=" + q.queryId); @@ -799,6 +446,32 @@ } } } // QueryEngineTask + + /** + * Add a chunk of intermediate results for consumption by some query. The + * chunk will be attached to the query and the query will be scheduled for + * execution. + * + * @param chunk + * A chunk of intermediate results. + */ + void add(final BindingSetChunk chunk) { + + if (chunk == null) + throw new IllegalArgumentException(); + + final RunningQuery q = runningQueries.get(chunk.queryId); + + if(q == null) + throw new IllegalStateException(); + + // add chunk to the query's input queue on this node. + q.add(chunk); + + // add query to the engine's task queue. + priorityQueue.add(q); + + } /** * Do not accept new queries, but run existing queries to completion. @@ -829,13 +502,22 @@ f.cancel(true/* mayInterruptIfRunning */); } + /** + * @todo SCALEOUT: Override in scale-out to release buffers associated with + * chunks buffered for this query (buffers may be for received chunks + * or chunks which are awaiting transfer to another node). + */ + protected void releaseResources(final RunningQuery q) { + + } + /* * IQueryPeer */ public void bufferReady(IQueryClient clientProxy, InetSocketAddress serviceAddr, long queryId, int bopId) { - // TODO Auto-generated method stub + // TODO SCALEOUT } @@ -843,34 +525,37 @@ * IQueryClient */ - public BOp getQuery(long queryId) throws RemoteException { - // TODO Auto-generated method stub + /** + * @todo Define the behavior for these methods if the queryId is not found + * whether because the caller has the wrong value or because the query + * has terminated. + */ + public BOp getQuery(final long queryId) throws RemoteException { + final RunningQuery q = runningQueries.get(queryId); + if (q != null) { + return q.queryRef.get(); + } return null; } - public void startOp(long queryId, int opId, UUID serviceId, int partitionId) + public void startOp(final long queryId, final int opId, + final int partitionId, final UUID serviceId, final int nchunks) throws RemoteException { - // TODO Auto-generated method stub - + final RunningQuery q = runningQueries.get(queryId); + if (q != null) { + q.startOp(opId, partitionId, serviceId, nchunks); + } } - public void haltOp(long queryId, int opId, UUID serviceId, int partitionId, - Throwable cause) throws RemoteException { - // TODO Auto-generated method stub - + public void haltOp(final long queryId, final int opId, + final int partitionId, final UUID serviceId, final Throwable cause, + final int nchunks, final BOpStats taskStats) throws RemoteException { + final RunningQuery q = runningQueries.get(queryId); + if (q != null) { + q.haltOp(opId, partitionId, serviceId, cause, nchunks, taskStats); + } } -// public IChunkedIterator<?> eval(final long queryId, final long timestamp, -// final BOp query) throws Exception { -// -// runningQueries.put(queryId, new RunningQuery(queryId, -// System.nanoTime()/* begin */, this/* clientProxy */, query, -// null/* source */)); -// -// return null; -// -// } - /** * Evaluate a query which visits {@link IBindingSet}s, such as a join. This * node will serve as the controller for the query. @@ -894,19 +579,28 @@ * annotations. Closure would then rewrite the query plan for each * pass, replacing the readTimestamp with the new read-behind * timestamp. + * + * @todo The initial binding set used to declare the variables used by a + * rule. With this refactor we should pay attention instead to the + * binding sets output by each {@link BOp} and compressed + * representations of those binding sets. */ - public RunningQuery<?> eval(final long queryId, final long readTimestamp, + public RunningQuery eval(final long queryId, final long readTimestamp, final long writeTimestamp, final BindingSetPipelineOp query) throws Exception { if (query == null) throw new IllegalArgumentException(); - - @SuppressWarnings("unchecked") - final RunningQuery<?> runningQuery = new RunningQuery(this, queryId, - readTimestamp, writeTimestamp, System.nanoTime()/* begin */, - this/* clientProxy */, query, newQueryBuffer(query)); + final long timeout = query.getProperty(BOp.Annotations.TIMEOUT, + BOp.Annotations.DEFAULT_TIMEOUT); + + final RunningQuery runningQuery = new RunningQuery(this, queryId, + readTimestamp, writeTimestamp, + System.currentTimeMillis()/* begin */, timeout, + true/* controller */, this/* clientProxy */, query, + newQueryBuffer(query)); + runningQueries.put(queryId, runningQuery); return runningQuery; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java 2010-08-31 16:21:47 UTC (rev 3472) @@ -0,0 +1,654 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Aug 31, 2010 + */ +package com.bigdata.bop.engine; + +import java.nio.ByteBuffer; +import java.rmi.RemoteException; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NoSuchBOpException; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.resources.ResourceManager; +import com.bigdata.striterator.ICloseableIterator; + +/** + * Metadata about running queries. + * + * @todo HA aspects of running queries? Checkpoints for long running queries? + */ +public class RunningQuery implements Future<Map<Integer,BOpStats>> { + + private final static transient Logger log = Logger + .getLogger(RunningQuery.class); + + /** + * The run state of the query and the result of the computation iff it + * completes execution normally (without being interrupted, cancelled, etc). + */ + final private Haltable<Map<Integer,BOpStats>> future = new Haltable<Map<Integer,BOpStats>>(); + + /** + * The runtime statistics for each {@link BOp} in the query and + * <code>null</code> unless this is the query controller. + */ + final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; + + /** + * The class executing the query on this node. + */ + final QueryEngine queryEngine; + + /** The unique identifier for this query. */ + final long queryId; + + /** + * The timestamp or transaction identifier against which the query is + * reading. + */ + final long readTimestamp; + + /** + * The timestamp or transaction identifier against which the query is + * writing. + */ + final long writeTimestamp; + + /** + * The timestamp when the query was accepted by this node (ms). + */ + final long begin; + + /** + * How long the query is allowed to run (elapsed milliseconds) -or- + * {@link Long#MAX_VALUE} if there is no deadline. + */ + final long timeout; + + /** + * <code>true</code> iff the outer {@link QueryEngine} is the controller for + * this query. + */ + final boolean controller; + + /** + * The client executing this query. + */ + final IQueryClient clientProxy; + + /** The query iff materialized on this node. */ + final AtomicReference<BOp> queryRef; + + /** + * The buffer used for the overall output of the query pipeline. + * + * @todo How does the pipeline get attached to this buffer? Via a special + * operator? Or do we just target the coordinating {@link QueryEngine} + * as the sink of the last operator so we can use NIO transfers? + */ + final IBlockingBuffer<IBindingSet[]> queryBuffer; + + /** + * A map associating resources with running queries. When a query halts, the + * resources listed in its resource map are released. Resources can include + * {@link ByteBuffer}s backing either incoming or outgoing + * {@link BindingSetChunk}s, temporary files associated with the query, hash + * tables, etc. + * + * @todo Cache any resources materialized for the query on this node (e.g., + * temporary graphs materialized from a peer or the client). A bop + * should be able to demand those data from the cache and otherwise + * have them be materialized. + * + * @todo only use the values in the map for transient objects, such as a + * hash table which is not backed by the disk. For {@link ByteBuffer}s + * we want to make the references go through the {@link BufferService} + * . For files, through the {@link ResourceManager}. + * + * @todo We need to track the resources in use by the query so they can be + * released when the query terminates. This includes: buffers; joins + * for which there is a chunk of binding sets that are currently being + * executed; downstream joins (they depend on the source joins to + * notify them when they are complete in order to decide their own + * termination condition); local hash tables which are part of a DHT + * (especially when they are persistent); buffers and disk resources + * allocated to N-way merge sorts, etc. + * + * @todo The set of buffers having data which has been accepted for this + * query. + * + * @todo The set of buffers having data which has been generated for this + * query. + */ + private final ConcurrentHashMap<UUID, Object> resourceMap = new ConcurrentHashMap<UUID, Object>(); + + /** + * The chunks available for immediate processing. + */ + final BlockingQueue<BindingSetChunk> chunksIn = new LinkedBlockingDeque<BindingSetChunk>(); + + /** + * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. + */ + private final Map<Integer, BOp> bopIndex; + + /** + * A collection of the currently executing future for operators for this + * query. + */ + private final ConcurrentHashMap<Future<?>, Future<?>> operatorFutures = new ConcurrentHashMap<Future<?>, Future<?>>(); + + /** + * A lock guarding {@link #runningTaskCount} and + * {@link #availableChunkCount}. + */ + private final Lock runStateLock = new ReentrantLock(); + + /** + * The #of tasks for this query which have started but not yet halted and + * ZERO (0) if this is not the query coordinator. + */ + private long runningTaskCount = 0; + + /** + * The #of chunks for this query of which a running task has made available + * but which have not yet been accepted for processing by another task and + * ZERO (0) if this is not the query coordinator. + */ + private long availableChunkCount = 0; + + /** + * Return <code>true</code> iff this is the query controller. + */ + public boolean isController() { + + return controller; + + } + + /** + * Return the current statistics for the query and <code>null</code> unless + * this is the query controller. + * + * @todo When the query is done, there will be one entry in this map for + * each operator in the pipeline. Non-pipeline operators such as + * {@link Predicate}s do not currently make it into this map. + */ + public Map<Integer/*bopId*/,BOpStats> getStats() { + + return statsMap; + + } + + /** + * + * @param queryId + * @param begin + * @param clientProxy + * @param query + * The query (optional). + */ + public RunningQuery(final QueryEngine queryEngine, final long queryId, + final long readTimestamp, final long writeTimestamp, + final long begin, final long timeout, final boolean controller, + final IQueryClient clientProxy, final BOp query, + final IBlockingBuffer<IBindingSet[]> queryBuffer) { + this.queryEngine = queryEngine; + this.queryId = queryId; + this.readTimestamp = readTimestamp; + this.writeTimestamp = writeTimestamp; + this.begin = begin; + this.timeout = timeout; + this.controller = controller; + this.clientProxy = clientProxy; + this.queryRef = new AtomicReference<BOp>(query); + if (controller && quer... [truncated message content] |
From: <tho...@us...> - 2010-09-02 18:52:46
|
Revision: 3495 http://bigdata.svn.sourceforge.net/bigdata/?rev=3495&view=rev Author: thompsonbry Date: 2010-09-02 18:52:36 +0000 (Thu, 02 Sep 2010) Log Message: ----------- Test suite for distinct binding sets operator and for an optional pipeline join using only the default sink. Reorganized the operator hierarchy somewhat. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineStartOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NE.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/NEConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/OR.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/ThreadLocalBufferFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/IElementFilter.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestDeepCopy.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MapBindingSetsOverShards.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/PipelineDelayOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/ReceiveBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/inf/OwlSameAsPropertiesExpandingIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/constraints/AbstractInlineConstraint.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/IRISUtils.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/MatchRule.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RejectAnythingSameAsItself.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOpConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INConstraint.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestUnionBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestEQConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestInBinarySearch.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestInHashMap.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestNE.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestNEConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/constraint/TestOR.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -30,8 +30,6 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -import com.bigdata.bop.ap.Predicate; - /** * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -35,6 +35,9 @@ import com.bigdata.relation.accesspath.IBlockingBuffer; /** + * Abstract base class for pipelined operators regardless of the type of data + * moving along the pipeline. + * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ArrayBindingSet.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -50,19 +50,9 @@ private static final long serialVersionUID = -6468905602211956490L; - protected static final Logger log = Logger.getLogger(ArrayBindingSet.class); - - /** - * True iff the {@link #log} level is INFO or less. - */ - protected static final boolean INFO = log.isInfoEnabled(); + private static final Logger log = Logger.getLogger(ArrayBindingSet.class); /** - * True iff the {@link #log} level is DEBUG or less. - */ - protected static final boolean DEBUG = log.isDebugEnabled(); - - /** * A dense array of the bound variables. */ private final IVariable[] vars; @@ -77,7 +67,7 @@ /** * Copy constructor. */ - protected ArrayBindingSet(ArrayBindingSet bindingSet) { + protected ArrayBindingSet(final ArrayBindingSet bindingSet) { if (bindingSet == null) throw new IllegalArgumentException(); @@ -100,18 +90,17 @@ * @param vals * Their bound values. */ - public ArrayBindingSet(IVariable[] vars, IConstant[] vals) { + public ArrayBindingSet(final IVariable[] vars, final IConstant[] vals) { -// if (vars == null) -// throw new IllegalArgumentException(); -// -// if (vals == null) -// throw new IllegalArgumentException(); + if (vars == null) + throw new IllegalArgumentException(); - assert vars != null; - assert vals != null; - assert vars.length == vals.length; + if (vals == null) + throw new IllegalArgumentException(); + if(vars.length != vals.length) + throw new IllegalArgumentException(); + // for (int i = 0; i < vars.length; i++) { // // if (vars[i] == null) @@ -139,7 +128,7 @@ * @throws IllegalArgumentException * if the <i>capacity</i> is negative. */ - public ArrayBindingSet(int capacity) { + public ArrayBindingSet(final int capacity) { if (capacity < 0) throw new IllegalArgumentException(); @@ -244,7 +233,7 @@ * Since the array is dense (no gaps), {@link #clear(IVariable)} requires * that we copy down any remaining elements in the array by one position. */ - public void clear(IVariable var) { + public void clear(final IVariable var) { if (var == null) throw new IllegalArgumentException(); @@ -282,7 +271,7 @@ } - public IConstant get(IVariable var) { + public IConstant get(final IVariable var) { if (var == null) throw new IllegalArgumentException(); @@ -301,7 +290,7 @@ } - public boolean isBound(IVariable var) { + public boolean isBound(final IVariable var) { return get(var) != null; @@ -315,10 +304,11 @@ if (val == null) throw new IllegalArgumentException(); - if(DEBUG) { - - log.debug("var=" + var + ", val=" + val + ", nbound=" + nbound+", capacity="+vars.length); - + if (log.isTraceEnabled()) { + + log.trace("var=" + var + ", val=" + val + ", nbound=" + nbound + + ", capacity=" + vars.length); + } for (int i = 0; i < nbound; i++) { @@ -343,7 +333,7 @@ public String toString() { - StringBuilder sb = new StringBuilder(); + final StringBuilder sb = new StringBuilder(); sb.append("{"); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -1,15 +1,51 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 2, 2010 + */ + package com.bigdata.bop; +import java.util.Map; + /** - * Interface for evaluating pipeline operations producing and consuming chunks - * of binding sets. + * Abstract base class for pipeline operators where the data moving along the + * pipeline is chunks of {@link IBindingSet}s. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ */ -public interface BindingSetPipelineOp extends PipelineOp<IBindingSet> { +abstract public class BindingSetPipelineOp extends + AbstractPipelineOp<IBindingSet> { - public interface Annotations extends PipelineOp.Annotations { + /** + * + */ + private static final long serialVersionUID = 1L; + public interface Annotations extends AbstractPipelineOp.Annotations { + /** * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of * the ancestor in the operator tree which serves as an alternative sink @@ -20,4 +56,26 @@ } + /** + * Required deep copy constructor. + * + * @param op + */ + protected BindingSetPipelineOp(AbstractPipelineOp<IBindingSet> op) { + super(op); + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + protected BindingSetPipelineOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/HashBindingSet.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -32,15 +32,20 @@ import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.Map; import java.util.Map.Entry; - /** * {@link IBindingSet} backed by a {@link HashMap}. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ + * + * @todo Since {@link Var}s allow reference testing, a faster implementation + * could be written based on a {@link LinkedList}. Just scan the list + * until the entry is found with the desired {@link Var} reference and + * then return it. */ public class HashBindingSet implements IBindingSet { @@ -93,7 +98,28 @@ } - public boolean isBound(IVariable var) { + public HashBindingSet(final IVariable[] vars, final IConstant[] vals) { + + if (vars == null) + throw new IllegalArgumentException(); + + if (vals == null) + throw new IllegalArgumentException(); + + if (vars.length != vals.length) + throw new IllegalArgumentException(); + + map = new LinkedHashMap<IVariable, IConstant>(vars.length); + + for (int i = 0; i < vars.length; i++) { + + map.put(vars[i], vals[i]); + + } + + } + + public boolean isBound(final IVariable var) { if (var == null) throw new IllegalArgumentException(); @@ -102,7 +128,7 @@ } - public IConstant get(IVariable var) { + public IConstant get(final IVariable var) { if (var == null) throw new IllegalArgumentException(); @@ -111,7 +137,7 @@ } - public void set(IVariable var, IConstant val) { + public void set(final IVariable var, final IConstant val) { if (var == null) throw new IllegalArgumentException(); @@ -123,7 +149,7 @@ } - public void clear(IVariable var) { + public void clear(final IVariable var) { if (var == null) throw new IllegalArgumentException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -30,7 +30,7 @@ import java.io.Serializable; -import com.bigdata.bop.ap.Union; +import com.bigdata.bop.join.PipelineJoin; import com.bigdata.mdi.PartitionLocator; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; @@ -85,6 +85,8 @@ /** * <code>true</code> iff the predicate is optional (the right operand of * a left join). + * + * @deprecated This flag is being moved to the join operator. */ String OPTIONAL = "optional"; @@ -131,8 +133,8 @@ * @param index * The index into the array of relation names in the view. * - * @deprecated Unions of predicates must be handled explicitly. See - * {@link Union}. + * @deprecated Unions of predicates must be handled explicitly as a union of + * pipeline operators reading against the different predicate. */ public String getRelationName(int index); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineStartOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineStartOp.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineStartOp.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -29,7 +29,6 @@ import java.util.Map; import java.util.concurrent.Callable; -import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import com.bigdata.bop.engine.BOpStats; @@ -44,8 +43,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class PipelineStartOp extends AbstractPipelineOp<IBindingSet> implements - BindingSetPipelineOp { +public class PipelineStartOp extends BindingSetPipelineOp { /** * @@ -82,8 +80,6 @@ */ static private class CopyTask implements Callable<Void> { -// private final BOpContext<IBindingSet> context; - private final BOpStats stats; private final IAsynchronousIterator<IBindingSet[]> source; @@ -91,8 +87,6 @@ private final IBlockingBuffer<IBindingSet[]> sink; CopyTask(final BOpContext<IBindingSet> context) { - -// this.context = context; stats = context.getStats(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -7,10 +7,10 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.FutureTask; -import com.bigdata.bop.AbstractPipelineOp; -import com.bigdata.bop.ArrayBindingSet; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.HashBindingSet; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; @@ -25,14 +25,14 @@ * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z * thompsonbry $ */ -public class DistinctBindingSetOp extends AbstractPipelineOp<IBindingSet>{ +public class DistinctBindingSetOp extends BindingSetPipelineOp { /** * */ private static final long serialVersionUID = 1L; - public interface Annotations extends BOp.Annotations { + public interface Annotations extends BindingSetPipelineOp.Annotations { /** * The initial capacity of the {@link ConcurrentHashMap} used to impose @@ -40,7 +40,7 @@ * * @see #DEFAULT_INITIAL_CAPACITY */ - String INITIAL_CAPACITY = "initialCapacity"; + String INITIAL_CAPACITY = DistinctBindingSetOp.class.getName()+".initialCapacity"; int DEFAULT_INITIAL_CAPACITY = 16; @@ -50,7 +50,7 @@ * * @see #DEFAULT_LOAD_FACTOR */ - String LOAD_FACTOR = "loadFactor"; + String LOAD_FACTOR = DistinctBindingSetOp.class.getName()+".loadFactor"; float DEFAULT_LOAD_FACTOR = .75f; @@ -60,7 +60,7 @@ * * @see #DEFAULT_CONCURRENCY_LEVEL */ - String CONCURRENCY_LEVEL = "concurrencyLevel"; + String CONCURRENCY_LEVEL = DistinctBindingSetOp.class.getName()+".concurrencyLevel"; int DEFAULT_CONCURRENCY_LEVEL = 16; @@ -170,7 +170,7 @@ if (vals.length != t.vals.length) return false; for (int i = 0; i < vals.length; i++) { - // @todo allow for nulls. + // @todo verify that this allows for nulls with a unit test. if (vals[i] == t.vals[i]) continue; if (vals[i] == null) @@ -185,7 +185,7 @@ /** * Task executing on the node. */ - private class DistinctTask implements Callable<Void> { + static private class DistinctTask implements Callable<Void> { private final BOpContext<IBindingSet> context; @@ -209,8 +209,8 @@ this.vars = op.getVariables(); this.map = new ConcurrentHashMap<Solution, Solution>( - getInitialCapacity(), getLoadFactor(), - getConcurrencyLevel()); + op.getInitialCapacity(), op.getLoadFactor(), + op.getConcurrencyLevel()); } @@ -230,16 +230,13 @@ for (int i = 0; i < vars.length; i++) { - if ((r[i] = bset.get(vars[i])) == null) { + /* + * Note: This allows null's. + * + * @todo write a unit test when some variables are not bound. + */ + r[i] = bset.get(vars[i]); - /* - * @todo probably allow nulls, but write a unit test for it. - */ - - throw new RuntimeException("Not bound: " + vars[i]); - - } - } final Solution s = new Solution(r); @@ -283,14 +280,7 @@ // System.err.println("accepted: " // + Arrays.toString(vals)); - /* - * @todo This may cause problems since the - * ArrayBindingSet does not allow mutation with - * variables not declared up front. In that case use - * new HashBindingSet( new ArrayBindingSet(...)). - */ - - accepted.add(new ArrayBindingSet(vars, vals)); + accepted.add(new HashBindingSet(vars, vals)); naccepted++; Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Union.java (from rev 3466, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Union.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Union.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -0,0 +1,135 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 18, 2010 + */ + +package com.bigdata.bop.aggregation; + +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.engine.Haltable; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.rdf.rules.TMUtility; +import com.bigdata.relation.RelationFusedView; + +/** + * The union of two or more {@link BindingSetPipelineOp} operators. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo I have some basic questions about the ability to use a UNION of two + * predicates in scale-out. I think that this might be more accurately + * modeled as the UNION of two joins. That is, rather than: + * + * <pre> + * JOIN( ..., + * UNION( foo.spo(A,loves,B), + * bar.spo(A,loves,B) ) + * ) + * </pre> + * + * using + * + * <pre> + * UNION( JOIN( ..., foo.spo(A,loves,B) ), + * JOIN( ..., bar.spo(A,loves,B) ) + * ) + * </pre> + * + * which would be a binding set union rather than an element union. + * + * @todo The union of access paths was historically handled by + * {@link RelationFusedView}. That class should be removed once queries + * are rewritten to use the union of joins. + * + * @todo The {@link TMUtility} will have to be updated to use this operator + * rather than specifying multiple source "names" for the relation of the + * predicate. + * + * @todo The FastClosureRuleTask will also need to be updated to use a + * {@link Union} over the joins rather than a {@link RelationFusedView}. + */ +public class Union extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * @param args + * Two or more operators whose union is desired. + * @param annotations + */ + public Union(final BindingSetPipelineOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + if (args.length < 2) + throw new IllegalArgumentException(); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new UnionTask(this, context)); + + } + + /** + * Pipeline union impl. + * + * FIXME All this does is copy its inputs to its outputs. Since we only run + * one chunk of input at a time, it seems that the easiest way to implement + * a union is to have the operators in the union just target the same sink. + */ + private static class UnionTask extends Haltable<Void> implements Callable<Void> { + + public UnionTask(// + final Union op,// + final BOpContext<IBindingSet> context + ) { + + if (op == null) + throw new IllegalArgumentException(); + if (context == null) + throw new IllegalArgumentException(); + } + + public Void call() throws Exception { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/DistinctElementFilter.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -1,58 +1,46 @@ package com.bigdata.bop.ap; -import java.util.UUID; -import java.util.concurrent.Callable; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.FutureTask; +import com.bigdata.bop.BOp; import com.bigdata.bop.BOpBase; -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpList; +import com.bigdata.bop.BOpContext; import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.NV; -import com.bigdata.bop.aggregation.DistinctBindingSetOp; -import com.bigdata.btree.keys.KeyBuilder; -import com.bigdata.rdf.relation.rule.BindingSetSortKeyBuilder; import com.bigdata.rdf.spo.DistinctSPOIterator; -import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.rdf.spo.SPO; +import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkConverter; import com.bigdata.striterator.MergeFilter; /** - * A DISTINCT operator based on a hash table. + * A DISTINCT operator based for elements in a relation. The operator is based + * on a hash table. New elements are constructed for each original element in + * which only the distinct fields are preserved. If the new element is distinct + * then it is passed by the filter. + * <p> + * The filter is capable of changing the type of the accepted elements. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z * thompsonbry $ + * * @param <E> + * The generic type of the source elements for the filter. + * @param <F> + * The generic type of the elements passed by the filter. * - * @todo could have an implementation backed by a persistent hash map using an - * extensible hash function to automatically grow the persistence store. - * This could be a general purpose persistent hash functionality, but it - * could also operate against a temporary file when used in the context of - * a query (the backing file can be destroyed afterwards or the data can - * be simply written onto the current temporary store). + * @todo support changing the generic type as part of the filter. this is + * similar to the {@link IChunkConverter}. * - * @todo Consider the use of lock amortization (batching) to reduce contention - * for the backing map. Alternatively, we could accept entire blocks of - * elements from a single source at a time, which would single thread us - * through the map. Or bound the #of threads hitting the map at once, - * increase the map concurrency level, etc. - * * @todo Reconcile with {@link IChunkConverter}, {@link DistinctFilter} (handles * solutions) and {@link MergeFilter} (handles comparables), - * {@link DistinctSPOIterator}, {@link DistinctBindingSetOp}, etc. + * {@link DistinctSPOIterator}, etc. */ -public class DistinctElementFilter<E> -extends BOpBase -//extends AbstractChunkedIteratorOp<E> -//implements IElementFilter<E>, -// implements IConstraint, -// implements ChunkedIteratorOp<E> -{ +public class DistinctElementFilter<E> extends BOpBase implements + IElementFilter<E> { /** * @@ -61,104 +49,272 @@ public interface Annotations extends BOp.Annotations { - String INITIAL_CAPACITY = "initialCapacity"; + /** + * The initial capacity of the {@link ConcurrentHashMap} used to impose + * the distinct constraint. + * + * @see #DEFAULT_INITIAL_CAPACITY + */ + String INITIAL_CAPACITY = DistinctElementFilter.class.getName() + + ".initialCapacity"; - String LOAD_FACTOR = "loadFactor"; + int DEFAULT_INITIAL_CAPACITY = 16; - String CONCURRENCY_LEVEL = "concurrencyLevel"; + /** + * The load factor of the {@link ConcurrentHashMap} used to impose the + * distinct constraint. + * + * @see #DEFAULT_LOAD_FACTOR + */ + String LOAD_FACTOR = DistinctElementFilter.class.getName() + + ".loadFactor"; - } + float DEFAULT_LOAD_FACTOR = .75f; - public DistinctElementFilter(final IVariable<?>[] distinctList, - final UUID masterUUID) { + /** + * The concurrency level of the {@link ConcurrentHashMap} used to impose + * the distinct constraint. + * + * @see #DEFAULT_CONCURRENCY_LEVEL + */ + String CONCURRENCY_LEVEL = DistinctElementFilter.class.getName() + + ".concurrencyLevel"; - super(distinctList, NV.asMap(new NV[] { - // new NV(Annotations.QUERY_ID, masterUUID), - // new NV(Annotations.BOP_ID, bopId) - })); + int DEFAULT_CONCURRENCY_LEVEL = 16; - if (masterUUID == null) - throw new IllegalArgumentException(); + /** + * The set of fields whose values must be distinct. + * + * @todo abstract base class to allow easy override for specific element + * types such as {@link SPO}. + */ + String FIELDS = DistinctElementFilter.class.getName() + ".fields"; + /** + * An optional constraint on the runtime type of the elements which are + * acceptable to this filter. + * + * @see IElementFilter#canAccept(Object) + * + * @todo I am not convinced that we need this. It parallels something + * which was introduced into the {@link IElementFilter} interface, + * but I suspect that we do not need that either. + */ + String CLASS_CONSTRAINT = DistinctElementFilter.class.getName() + + ".classConstraint"; + } -// public Future<Void> eval(final IBigdataFederation<?> fed, -// final IJoinNexus joinNexus, final IBlockingBuffer<E[]> buffer) { -// -// final FutureTask<Void> ft = new FutureTask<Void>(new DHTTask(joinNexus, -// buffer)); -// -// joinNexus.getIndexManager().getExecutorService().execute(ft); -// -// return ft; -// -// } + /** + * Required deep copy constructor. + */ + public DistinctElementFilter(final DistinctElementFilter<E> op) { + super(op); + } /** - * Task executing on the node. + * Required shallow copy constructor. */ - private class DHTTask implements Callable<Void> { + public DistinctElementFilter(final BOp[] args, + final Map<String, Object> annotations) { - private final IJoinNexus joinNexus; + super(args, annotations); - private final IBlockingBuffer<E[]> buffer; + final int[] fields = getFields(); - private final ConcurrentHashMap<byte[], Void> map; + if (fields == null) + throw new IllegalArgumentException(); - /* Note: This is NOT thread safe! */ - private final BindingSetSortKeyBuilder sortKeyBuilder; - - DHTTask(final IJoinNexus joinNexus, - final IBlockingBuffer<E[]> buffer) { + if (fields.length == 0) + throw new IllegalArgumentException(); - this.joinNexus = joinNexus; - - this.buffer = buffer; + } - final IVariable<?>[] vars = ((BOpList) get(0/* distinctList */)) - .toArray(new IVariable[0]); + /** + * @see Annotations#INITIAL_CAPACITY + */ + public int getInitialCapacity() { - this.sortKeyBuilder = new BindingSetSortKeyBuilder(KeyBuilder - .newInstance(), vars); + return getProperty(Annotations.INITIAL_CAPACITY, + Annotations.DEFAULT_INITIAL_CAPACITY); - this.map = new ConcurrentHashMap<byte[], Void>(/* - * @todo initialCapacity using annotations - * @todo loadFactor ... - * @todo concurrencyLevel ... - */); - } + } - private boolean accept(final IBindingSet bset) { + /** + * @see Annotations#LOAD_FACTOR + */ + public float getLoadFactor() { - return map.putIfAbsent(sortKeyBuilder.getSortKey(bset), null) == null; + return getProperty(Annotations.LOAD_FACTOR, + Annotations.DEFAULT_LOAD_FACTOR); - } + } - public Void call() throws Exception { + /** + * @see Annotations#CONCURRENCY_LEVEL + */ + public int getConcurrencyLevel() { - /* - * FIXME Setup to drain binding sets from the source. Note that the - * sort key builder is not thread safe, so a pool of key builders - * with a non-default initial capacity (LT 1024) might be used to - * allow higher concurrency for key building. - * - * Alternatively, the caller could generate the keys (SOUNDS GOOD) - * and just ship the byte[] keys to the DHTFilter. - * - * The DHTFilter needs to send back its boolean[] responses bit - * coded or run length coded. See AbstractArrayIndexProcedure which - * already does some of that. Those responses should move through - * NIO Buffers just like everything else, but the response will be - * much smaller than the incoming byte[][] (aka IRaba). - */ - throw new UnsupportedOperationException(); + return getProperty(Annotations.CONCURRENCY_LEVEL, + Annotations.DEFAULT_CONCURRENCY_LEVEL); - } + } + public int[] getFields() { + + return (int[]) getProperty(Annotations.FIELDS); + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - // public ResultBitBuffer bulkFilter(final K[] elements) { - // - // } +// return new FutureTask<Void>(new DistinctTask<E>(this, context)); + throw new UnsupportedOperationException(); + + } + public boolean accept(E e) { + // TODO Auto-generated method stub + return false; + } + + public boolean canAccept(Object o) { + // @todo by annotation giving an optional type constraint. + return true; + } + +// /** +// * Task executing on the node. +// */ +// static private class DistinctTask<E> implements Callable<Void> { +// +// private final BOpContext<IBindingSet> context; +// +// /** +// * A concurrent map whose keys are the bindings on the specified +// * variables (the keys and the values are the same since the map +// * implementation does not allow <code>null</code> values). +// */ +// private /*final*/ ConcurrentHashMap<E, E> map; +// +// /** +// * The variables used to impose a distinct constraint. +// */ +// private final int[] fields; +// +// DistinctTask(final DistinctElementFilter<E> op, +// final BOpContext<IBindingSet> context) { +// +// this.context = context; +// +// this.fields = op.getFields(); +// +// this.map = new ConcurrentHashMap<E, E>( +// op.getInitialCapacity(), op.getLoadFactor(), +// op.getConcurrencyLevel()); +// +// } +// +// /** +// * Construct an element are distinct for the configured variables then return +// * those bindings. +// * +// * @param bset +// * The binding set to be filtered. +// * +// * @return The distinct as bound values -or- <code>null</code> if the +// * binding set duplicates a solution which was already accepted. +// */ +// private E accept(final E e) { +// +// final E e2 = newElement(e); +// +// final boolean distinct = map.putIfAbsent(e2, e2) == null; +// +// return distinct ? e2 : null; +// +// } +// +// public Void call() throws Exception { +// +// final BOpStats stats = context.getStats(); +// +// final IAsynchronousIterator<IBindingSet[]> itr = context +// .getSource(); +// +// final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); +// +// try { +// +// while (itr.hasNext()) { +// +// final IBindingSet[] a = itr.next(); +// +// stats.chunksIn.increment(); +// stats.unitsIn.add(a.length); +// +// final List<IBindingSet> accepted = new LinkedList<IBindingSet>(); +// +// int naccepted = 0; +// +// for (IBindingSet bset : a) { +// +//// System.err.println("considering: " + bset); +// +// final IConstant<?>[] vals = accept(bset); +// +// if (vals != null) { +// +//// System.err.println("accepted: " +//// + Arrays.toString(vals)); +// +// /* +// * @todo This may cause problems since the +// * ArrayBindingSet does not allow mutation with +// * variables not declared up front. In that case use +// * new HashBindingSet( new ArrayBindingSet(...)). +// */ +// +// accepted.add(new ArrayBindingSet(vars, vals)); +// +// naccepted++; +// +// } +// +// } +// +// if (naccepted > 0) { +// +// final IBindingSet[] b = accepted +// .toArray(new IBindingSet[naccepted]); +// +//// System.err.println("output: " +//// + Arrays.toString(b)); +// +// sink.add(b); +// +// stats.unitsOut.add(naccepted); +// stats.chunksOut.increment(); +// +// } +// +// } +// +// // done. +// return null; +// +// } finally { +// +// sink.flush(); +// sink.close(); +// +// // discard the map. +// map = null; +// +// } +// +// } +// +// } + } Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Union.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -1,139 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 18, 2010 - */ - -package com.bigdata.bop.ap; - -import java.util.Map; - -import com.bigdata.bop.AbstractChunkedOrderedIteratorOp; -import com.bigdata.bop.BOp; -import com.bigdata.bop.ChunkedOrderedIteratorOp; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.engine.MapBindingSetsOverShards; -import com.bigdata.rdf.rules.TMUtility; -import com.bigdata.relation.RelationFusedView; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.service.IBigdataFederation; -import com.bigdata.service.proxy.IRemoteChunkedIterator; -import com.bigdata.striterator.ChunkedOrderedStriterator; -import com.bigdata.striterator.IChunkedOrderedIterator; -import com.ibm.icu.impl.ByteBuffer; - -/** - * An operator which returns the union of two {@link IPredicate}s. Elements are - * consumed first from the left predicate and then from the right predicate. - * This operator does not cross network boundaries. An intermediate send / - * receive operator pattern must be applied when this operator is used in a - * scale-out context. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo I have some basic questions about the ability to use a UNION of two - * predicates in scale-out. I think that this might be more accurately - * modeled as the UNION of two joins. That is, rather than: - * - * <pre> - * JOIN( ..., - * UNION( foo.spo(A,loves,B), - * bar.spo(A,loves,B) ) - * ) - * </pre> - * using - * <pre> - * UNION( JOIN( ..., foo.spo(A,loves,B) ), - * JOIN( ..., bar.spo(A,loves,B) ) - * ) - * </pre> - * which would be a binding set union rather than an element union. - * - * @todo This was historically handled by {@link RelationFusedView} which should - * be removed when this class is implemented. - * - * @todo The {@link TMUtility} will have to be updated to use this operator - * rather than specifying multiple source "names" for the relation of the - * predicate. - * - * @todo The FastClosureRuleTask will also need to be updated to use a - * {@link Union} rather than a {@link RelationFusedView}. - * - * @todo It would be a trivial generalization to make this an N-ary union. - * - * @todo A similar operator could be defined where child operands to execute - * concurrently and the result is no longer strongly ordered. - * - * @todo Implement the send/receive pattern. - * <p> - * This COULD be done using {@link IRemoteChunkedIterator} if the send and - * receive operators are appropriately decorated in order to pass the - * proxy object along. - * <p> - * This SHOULD be implemented using an NIO direct {@link ByteBuffer} - * pattern similar to {@link MapBindingSetsOverShards}. - */ -public class Union<E> extends AbstractChunkedOrderedIteratorOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * @param left - * @param rigtht - * @param annotations - */ - public Union(final ChunkedOrderedIteratorOp<E> left, - final ChunkedOrderedIteratorOp<E> right, - final Map<String, Object> annotations) { - - super(new BOp[] { left, right }, annotations); - - } - - @SuppressWarnings("unchecked") - protected ChunkedOrderedIteratorOp<E> left() { - return (ChunkedOrderedIteratorOp<E>)get(0); - } - - @SuppressWarnings("unchecked") - protected ChunkedOrderedIteratorOp<E> right() { - return (ChunkedOrderedIteratorOp<E>)get(1); - } - - @SuppressWarnings("unchecked") - public IChunkedOrderedIterator<E> eval(final IBigdataFederation<?> fed, - final IJoinNexus joinNexus) { - - return (IChunkedOrderedIterator<E>) new ChunkedOrderedStriterator<IChunkedOrderedIterator<E>, E>(// - left().eval(fed, joinNexus)).append(// - right().eval(fed, joinNexus)// - ); - - } - -} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOpConstraint.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOpConstraint.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOpConstraint.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 2, 2010 + */ + +package com.bigdata.bop.constraint; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IConstraint; + +/** + * Abstract base class for constraint operators. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class BOpConstraint extends BOpBase implements IConstraint { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + * @param op + */ + public BOpConstraint(BOpBase op) { + super(op); + } + + /** + * Required shallow copy constructor. + * @param args + * @param annotations + */ + public BOpConstraint(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/BOpConstraint.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQ.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -26,11 +26,9 @@ import java.util.Map; -import com.bigdata.bop.BOpBase; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; import com.bigdata.bop.IVariable; /** @@ -39,7 +37,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class EQ extends BOpBase implements IConstraint { +public class EQ extends BOpConstraint { private static final long serialVersionUID = 1L; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/EQConstant.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -26,11 +26,9 @@ import java.util.Map; -import com.bigdata.bop.BOpBase; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; import com.bigdata.bop.IVariable; /** @@ -39,7 +37,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class EQConstant extends BOpBase implements IConstraint { +public class EQConstant extends BOpConstraint { /** * @@ -62,7 +60,7 @@ public EQConstant(final IVariable<?> var, final IConstant<?> val) { - super(new BOp[] { var, val }); + super(new BOp[] { var, val }, null/*annotations*/); if (var == null) throw new IllegalArgumentException(); Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java 2010-09-02 13:49:55 UTC (rev 3494) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -1,197 +0,0 @@ -/** - -The Notice below must appear in each file of the Source Code of any -copy you distribute of the Licensed Product. Contributors to any -Modifications may add their own copyright notices to identify their -own contributions. - -License: - -The contents of this file are subject to the CognitiveWeb Open Source -License Version 1.1 (the License). You may not copy or use this file, -in either source code or executable form, except in compliance with -the License. You may obtain a copy of the License from - - http://www.CognitiveWeb.org/legal/license/ - -Software distributed under the License is distributed on an AS IS -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See -the License for the specific language governing rights and limitations -under the License. - -Copyrights: - -Portions created by or assigned to CognitiveWeb are Copyright -(c) 2003-2003 CognitiveWeb. All Rights Reserved. Contact -information for CognitiveWeb is available at - - http://www.CognitiveWeb.org - -Portions Copyright (c) 2002-2003 Bryan Thompson. - -Acknowledgements: - -Special thanks to the developers of the Jabber Open Source License 1.0 -(JOSL), from which this License was derived. This License contains -terms that differ from JOSL. - -Special thanks to the CognitiveWeb Open Source Contributors for their -suggestions and support of the Cognitive Web. - -Modifications: - -*/ -/* - * Created on Jun 17, 2008 - */ - -package com.bigdata.bop.constraint; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Map; - -import com.bigdata.bop.BOpBase; -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpList; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IVariable; -import com.bigdata.rdf.spo.InGraphBinarySearchFilter; -import com.bigdata.rdf.spo.InGraphHashSetFilter; - -/** - * A constraint that a variable may only take on the bindings enumerated by some - * set. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo This uses binary search, which is thread-safe. It could also use a - * {@link HashSet}, but the {@link HashSet} needs to be thread-safe since - * the filter could be applied concurrently during evaluation. - * - * FIXME Reconcile this with {@link InGraphBinarySearchFilter} and - * {@link InGraphHashSetFilter} and also with the use of an in-memory join - * against the incoming binding sets to handle SPARQL data sets. - */ -public class IN<T> extends BOpBase implements IConstraint { - -// /** -// * -// */ -// private static final long serialVersionUID = 5805883429399100605L; -// -// private final IVariable<T> x; -// -// private final T[] set; - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * The sorted data (cached). - * <p> - * Note: This cache is redundant with the 2nd argument to the operator. It - * is not serialized and is compiled on demand when the operator is used. - */ - private transient volatile T[] set; - - /** - * Deep copy constructor. - */ - public IN(final IN<T> op) { - super(op); - } - - /** - * Shallow copy constructor. - */ - public IN(final BOp[] args, final Map<String, Object> annotations) { - - // @todo validate args? - super(args, annotations); - - } - - /** - * - * @param x - * Some variable. - * @param set - * A set of legal term identifiers providing a constraint on the - * allowable values for that variable. - */ - public IN(final IVariable<T> x, final IConstant<T>[] set) { - - super(new BOp[] { x, new BOpList(set) }); - - if (x == null || set == null) - throw new IllegalArgumentException(); - - if (set.length == 0) - throw new IllegalArgumentException(); - - } - - @SuppressWarnings("unchecked") - static private <T> T[] sort(final BOpList set) { - - final int n = set.arity(); - - if (n == 0) - throw new IllegalArgumentException(); - - final T firstValue = ((IConstant<T>) set.get(0)).get(); - - // allocate an array of the correct type. - final T[] tmp = (T[]) java.lang.reflect.Array.newInstance(firstValue - .getClass(), n); - - for (int i = 0; i < n; i++) { - - // dereference the constants to their bound values. - tmp[i] = ((IConstant<T>) set.get(i)).get(); - - } - - // sort the bound values. - Arrays.sort(tmp); - - return tmp; - - } - - public boolean accept(final IBindingSet bindingSet) { - - if(set == null) { - - set = sort((BOpList) get(1)); - - } - - // get binding for "x". - @SuppressWarnings("unchecked") - final IConstant<T> x = bindingSet.get((IVariable<?>) get(0)/* x */); - - if (x == null) { - - // not yet bound. - return true; - - } - - final T v = x.get(); - - // lookup the bound value in the set of values. - final int pos = Arrays.binarySearch(set, v); - - // true iff the bound value was found in the set. - return pos >= 0; - - } - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java (from rev 3466, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/IN.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/constraint/INBinarySearch.java 2010-09-02 18:52:36 UTC (rev 3495) @@ -0,0 +1,201 @@ +/** + +The Notice below must appear in each file of the Source Code of any +copy you distribute of the Licensed Product. Contributors to any +Modifications may add their own copyright notices to identify their +own contributions. + +License: + +The contents of this file are subject to the CognitiveWeb Open Source +License Version 1.1 (the License). You may not copy or use this file, +in either source code or executable form, except in compliance with +the License. You may obtain a copy of the License from + + http://www.CognitiveWeb.org/legal/license/ + +Software distributed under the License is distributed on an AS IS +basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +the License for the specific language governing rights and limitations +under the License. + +Copyrights: + +Portion... [truncated message content] |
From: <tho...@us...> - 2010-09-02 20:42:50
|
Revision: 3498 http://bigdata.svn.sourceforge.net/bigdata/?rev=3498&view=rev Author: thompsonbry Date: 2010-09-02 20:42:43 +0000 (Thu, 02 Sep 2010) Log Message: ----------- Added IKeyOrder#getKey(IKeyBuilder,E element) to format a key from an element. Added support and unit tests for optional pipeline joins in which the alternative sink is specified. This is to support jumping out of an optional join group. (Note that the star join does not currently support this feature and does not have a unit test for this feature). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -65,8 +65,11 @@ /** * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + * + * @todo was 100. dialed down to reduce heap consumption for arrays. + * test performance @ 100 and 1000. */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 1000; + int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; /** * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -87,7 +87,8 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ * - * @todo Break the star join logic out into its own join operator. + * @todo Break the star join logic out into its own join operator and test + * suite. */ public class PipelineJoin extends BindingSetPipelineOp { @@ -324,12 +325,12 @@ /** * The join that is being executed. */ - final protected PipelineJoin joinOp; + final private PipelineJoin joinOp; /** * The constraint (if any) specified for the join operator. */ - final IConstraint[] constraints; + final private IConstraint[] constraints; /** * The maximum parallelism with which the {@link JoinTask} will @@ -337,101 +338,59 @@ * * @see Annotations#MAX_PARALLEL */ - final int maxParallel; + final private int maxParallel; /** * The service used for executing subtasks (optional). * * @see #maxParallel */ - final Executor service; + final private Executor service; /** * True iff the {@link #right} operand is an optional pattern (aka if * this is a SPARQL style left join). */ - final boolean optional; + final private boolean optional; /** - * The alternative sink to use when the join is {@link #optional} but - * the failed joined needs to jump out of a join group rather than - * routing directly to the ancestor in the operator tree. - * - * FIXME Support for the {@link #optionalSink} is not finished. When the - * optional target is not simply the direct ancestor in the operator - * tree then we need to have a separate thread local buffer in front of - * the optional sink for the join task. This means that we need to use - * two {@link #threadLocalBufferFactory}s, one for the default sink and - * one for the alternative sink. All of this only matters when the - * binding sets are being routed out of an optional join group. When the - * tails are independent optionals then the target is the same as the - * target for binding sets which do join. - */ - final IBlockingBuffer<IBindingSet[]> optionalSink; - - /** * The variables to be retained by the join operator. Variables not * appearing in this list will be stripped before writing out the - * binding set onto the {@link #sink}. + * binding set onto the output sink(s). */ - final IVariable<?>[] variablesToKeep; + final private IVariable<?>[] variablesToKeep; - /** - * The source for the binding sets. - */ - final BindingSetPipelineOp left; +// /** +// * The source for the binding sets. +// */ +// final BindingSetPipelineOp left; /** * The source for the elements to be joined. */ - final IPredicate<?> right; + final private IPredicate<?> right; /** * The relation associated with the {@link #right} operand. */ - final IRelation<?> relation; + final private IRelation<?> relation; /** * The partition identifier -or- <code>-1</code> if we are not reading * on an index partition. */ - final int partitionId; + final private int partitionId; /** * The evaluation context. */ - final protected BOpContext<IBindingSet> context; + final private BOpContext<IBindingSet> context; /** * The statistics for this {@link JoinTask}. */ - final PipelineJoinStats stats; + final private PipelineJoinStats stats; - final private ThreadLocalBufferFactory<AbstractUnsynchronizedArrayBuffer<IBindingSet>, IBindingSet> threadLocalBufferFactory = new ThreadLocalBufferFactory<AbstractUnsynchronizedArrayBuffer<IBindingSet>, IBindingSet>() { - - @Override - protected AbstractUnsynchronizedArrayBuffer<IBindingSet> initialValue() { - - // new buffer created by the concrete JoinClass impl. - return newUnsyncOutputBuffer(); - - } - - @Override - protected void halted() { - - JoinTask.this.halted(); - - } - - }; - - public String toString() { - - return getClass().getName() + "{ joinOp=" + joinOp + "}"; - - } - /** * The source from which we read the binding set chunks. * <p> @@ -459,17 +418,36 @@ final private IBlockingBuffer<IBindingSet[]> sink; /** + * The alternative sink to use when the join is {@link #optional} AND + * {@link BOpContext#getSink2()} returns a distinct buffer for the + * alternative sink. The binding sets from the source are copied onto the + * alternative sink for an optional join if the join fails. Normally the + * {@link BOpContext#getSink()} can be used for both the joins which + * succeed and those which fail. The alternative sink is only necessary + * when the failed join needs to jump out of a join group rather than + * routing directly to the ancestor in the operator tree. + */ + final private IBlockingBuffer<IBindingSet[]> sink2; + + /** + * The thread-local buffer factory for the default sink. + */ + final private TLBFactory threadLocalBufferFactory; + + /** + * The thread-local buffer factory for the optional sink (iff the + * optional sink is defined). + */ + final private TLBFactory threadLocalBufferFactory2; + + /** * Instances of this class MUST be created in the appropriate execution * context of the target {@link DataService} so that the federation and * the joinNexus references are both correct and so that it has access * to the local index object for the specified index partition. * * @param joinOp - * @param joinNexus - * @param sink - * The sink on which the {@link IBindingSet} chunks are - * written. - * @param requiredVars + * @param context */ public JoinTask(// final PipelineJoin joinOp,// @@ -483,7 +461,7 @@ // this.fed = context.getFederation(); this.joinOp = joinOp; - this.left = joinOp.left(); +// this.left = joinOp.left(); this.right = joinOp.right(); this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); @@ -506,15 +484,26 @@ this.relation = context.getReadRelation(right); this.source = context.getSource(); this.sink = context.getSink(); - this.optionalSink = context.getSink2(); + this.sink2 = context.getSink2(); this.partitionId = context.getPartitionId(); this.stats = (PipelineJoinStats) context.getStats(); + this.threadLocalBufferFactory = new TLBFactory(sink); + + this.threadLocalBufferFactory2 = sink2 == null ? null + : new TLBFactory(sink2); + if (log.isDebugEnabled()) log.debug("joinOp=" + joinOp); } + public String toString() { + + return getClass().getName() + "{ joinOp=" + joinOp + "}"; + + } + /** * Runs the {@link JoinTask}. * @@ -536,6 +525,8 @@ * Flush and close the thread-local output buffers. */ threadLocalBufferFactory.flush(); + if (threadLocalBufferFactory2 != null) + threadLocalBufferFactory2.flush(); // flush the sync buffer flushAndCloseBuffersAndAwaitSinks(); @@ -560,6 +551,8 @@ try { // resetUnsyncBuffers(); threadLocalBufferFactory.reset(); + if (threadLocalBufferFactory2 != null) + threadLocalBufferFactory2.reset(); } catch (Throwable t2) { log.error(t2.getLocalizedMessage(), t2); } @@ -642,27 +635,6 @@ } /** - * A method used by the {@link #threadLocalBufferFactory} to create new - * output buffer as required. The output buffer will be used to - * aggregate {@link IBindingSet}s generated by this {@link JoinTask}. - */ - final protected AbstractUnsynchronizedArrayBuffer<IBindingSet> newUnsyncOutputBuffer() { - - /* - * The index is not key-range partitioned. This means that there is - * ONE (1) JoinTask per predicate in the rule. The bindingSets are - * aggregated into chunks by this buffer. On overflow, the buffer - * writes onto a BlockingBuffer. The sink JoinTask reads from that - * BlockingBuffer's iterator. - */ - - // flushes to the syncBuffer. - return new UnsyncLocalOutputBuffer<IBindingSet>(stats, joinOp - .getChunkCapacity(), sink); - - } - - /** * Flush and close all output buffers and await sink {@link JoinTask} * (s). * <p> @@ -694,6 +666,11 @@ sink.flush(); sink.close(); + if(sink2!=null) { + sink2.flush(); + sink2.close(); + } + } /** @@ -709,7 +686,19 @@ if (sink.getFuture() != null) { sink.getFuture().cancel(true/* mayInterruptIfRunning */); + + } + + if (sink2 != null) { + sink2.reset(); + + if (sink2.getFuture() != null) { + + sink2.getFuture().cancel(true/* mayInterruptIfRunning */); + + } + } } @@ -1300,6 +1289,10 @@ final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer = threadLocalBufferFactory .get(); + // Thread-local buffer iff optional sink is in use. + final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer2 = threadLocalBufferFactory2 == null ? null + : threadLocalBufferFactory2.get(); + while (itr.hasNext()) { final Object[] chunk = itr.nextChunk(); @@ -1329,7 +1322,13 @@ for (IBindingSet bs : this.bindingSets) { - unsyncBuffer.add(bs); + if (unsyncBuffer2 == null) { + // use the default sink. + unsyncBuffer.add(bs); + } else { + // use the alternative sink. + unsyncBuffer2.add(bs); + } } @@ -1358,6 +1357,10 @@ final IStarJoin starJoin = (IStarJoin) accessPath .getPredicate(); + /* + * FIXME The star join does not handle the alternative sink yet. + * See the ChunkTask for the normal join. + */ final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer = threadLocalBufferFactory .get(); @@ -1727,6 +1730,54 @@ }// class ChunkTask + /** + * Concrete implementation with hooks to halt a join. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + */ + private class TLBFactory + extends + ThreadLocalBufferFactory<AbstractUnsynchronizedArrayBuffer<IBindingSet>, IBindingSet> { + + final private IBlockingBuffer<IBindingSet[]> sink; + + /** + * + * @param sink + * The thread-safe buffer onto which the thread-local + * buffer overflow. + */ + public TLBFactory(final IBlockingBuffer<IBindingSet[]> sink) { + + if (sink == null) + throw new IllegalArgumentException(); + + this.sink = sink; + + } + + @Override + protected AbstractUnsynchronizedArrayBuffer<IBindingSet> initialValue() { + + /* + * Wrap the buffer provider to the constructor with a thread + * local buffer. + */ + + return new UnsyncLocalOutputBuffer<IBindingSet>(stats, joinOp + .getChunkCapacity(), sink); + + } + + @Override + protected void halted() { + + JoinTask.this.halted(); + + } + + } // class TLBFactory + }// class JoinTask } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -209,41 +209,13 @@ } /** - * This handles a request for an access path that is restricted to a - * specific index partition. + * {@inheritDoc} * <p> - * Note: This path is used with the scale-out JOIN strategy, which - * distributes join tasks onto each index partition from which it needs to - * read. Those tasks constrain the predicate to only read from the index - * partition which is being serviced by that join task. - * <p> * Note: Since the relation may materialize the index views for its various * access paths, and since we are restricted to a single index partition and * (presumably) an index manager that only sees the index partitions local * to a specific data service, we create an access path view for an index * partition without forcing the relation to be materialized. - * <p> - * Note: Expanders ARE NOT applied in this code path. Expanders require a - * total view of the relation, which is not available during scale-out - * pipeline joins. - * - * @param indexManager - * This MUST be the data service local index manager so that the - * returned access path will read against the local shard. - * @param predicate - * The predicate. {@link IPredicate#getPartitionId()} MUST return - * a valid index partition identifier. - * - * @throws IllegalArgumentException - * if either argument is <code>null</code>. - * @throws IllegalArgumentException - * unless the {@link IIndexManager} is a <em>local</em> index - * manager providing direct access to the specified shard. - * @throws IllegalArgumentException - * unless the predicate identifies a specific shard using - * {@link IPredicate#getPartitionId()}. - * - * @todo Raise this method into the {@link IRelation} interface. */ public IAccessPath<E> getAccessPathForIndexPartition( final IIndexManager indexManager, // Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -175,13 +175,35 @@ */ IAccessPath<E> getAccessPath(IPredicate<E> predicate); - /* - * @todo raise this method into this interface. it is currently implemented - * by AbstractRelation and overridden by SPORelation to handle the different - * index families for triples versus quads. + /** + * This handles a request for an access path that is restricted to a + * specific index partition. This access path is used with the scale-out + * JOIN strategy, which distributes join tasks onto each index partition + * from which it needs to read. Those tasks constrain the predicate to only + * read from the index partition which is being serviced by that join task. + * <p> + * Note: Expanders ARE NOT applied in this code path. Expanders require a + * total view of the relation, which is not available during scale-out + * pipeline joins. + * + * @param indexManager + * This MUST be the data service local index manager so that the + * returned access path will read against the local shard. + * @param predicate + * The predicate. {@link IPredicate#getPartitionId()} MUST return + * a valid index partition identifier. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code>. + * @throws IllegalArgumentException + * unless the {@link IIndexManager} is a <em>local</em> index + * manager providing direct access to the specified shard. + * @throws IllegalArgumentException + * unless the predicate identifies a specific shard using + * {@link IPredicate#getPartitionId()}. */ -// IAccessPath<E> getAccessPathForIndexPartition(IIndexManager indexManager, IPredicate<E> predicate); - + IAccessPath<E> getAccessPathForIndexPartition(IIndexManager indexManager, + IPredicate<E> predicate); /** * The fully qualified name of the index. * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -208,4 +208,9 @@ throw new UnsupportedOperationException(); } + public IAccessPath<E> getAccessPathForIndexPartition( + IIndexManager indexManager, IPredicate<E> predicate) { + throw new UnsupportedOperationException(); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -27,6 +27,7 @@ package com.bigdata.striterator; +import com.bigdata.bop.IElement; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.keys.IKeyBuilder; @@ -42,13 +43,36 @@ abstract public class AbstractKeyOrder<E> implements IKeyOrder<E> { /** - * This implementation should work fine unless you need to override the - * manner in which a bound value in the {@link IPredicate} is converted into - * a key. - * <p> * {@inheritDoc} + * + * @todo While you can override + * {@link #appendKeyComponent(IKeyBuilder, int, Object)} to use a + * different encoding, this does not really let you handle something + * which does not implement {@link IElement} without overriding + * {@link #getKey(IKeyBuilder, Object)} as well. */ - public byte[] getFromKey(final IKeyBuilder keyBuilder, + public byte[] getKey(final IKeyBuilder keyBuilder, final E element) { + + keyBuilder.reset(); + + final int keyArity = getKeyArity(); // use the key's "arity". + + for (int i = 0; i < keyArity; i++) { + + /* + * Note: If you need to override the default IKeyBuilder behavior do + * it in the invoked method. + */ + appendKeyComponent(keyBuilder, i, ((IElement) element) + .get(getKeyOrder(i))); + + } + + return keyBuilder.getKey(); + + } + + final public byte[] getFromKey(final IKeyBuilder keyBuilder, final IPredicate<E> predicate) { keyBuilder.reset(); @@ -67,7 +91,7 @@ /* * Note: If you need to override the default IKeyBuilder behavior do - * it here. + * it in the invoked method. */ appendKeyComponent(keyBuilder, i, term.get()); @@ -79,6 +103,15 @@ } + final public byte[] getToKey(final IKeyBuilder keyBuilder, + final IPredicate<E> predicate) { + + final byte[] from = getFromKey(keyBuilder, predicate); + + return from == null ? null : SuccessorUtil.successor(from); + + } + /** * Encodes an value into the key. This implementation uses the default * behavior of {@link IKeyBuilder}. If you need to specialize how a value @@ -90,14 +123,5 @@ keyBuilder.append(keyComponent); } - - public byte[] getToKey(final IKeyBuilder keyBuilder, - final IPredicate<E> predicate) { - final byte[] from = getFromKey(keyBuilder, predicate); - - return from == null ? null : SuccessorUtil.successor(from); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -73,16 +73,44 @@ /* * New methods. */ - + /** + * Return the key for an element of the relation. + * + * @param keyBuilder + * The object which will be used to construct the key. + * @param element + * An element for the associated relation. + * + * @return The key for the index associated with this {@link IKeyOrder}. + */ + byte[] getKey(IKeyBuilder keyBuilder, E element); + + /** * Return the inclusive lower bound which would be used for a query against * this {@link IKeyOrder} for the given {@link IPredicate}. + * + * @param keyBuilder + * The object which will be used to construct the key. + * @param predicate + * A predicate describing bound and unbound fields for the key. + * + * @return The key corresponding to the inclusive lower bound for a query + * against that {@link IPredicate}. */ byte[] getFromKey(IKeyBuilder keyBuilder, IPredicate<E> predicate); /** * Return the exclusive upper bound which would be used for a query against * this {@link IKeyOrder} for the given {@link IPredicate}. + * + * @param keyBuilder + * The object which will be used to construct the key. + * @param predicate + * A predicate describing bound and unbound fields for the key. + * + * @return The key corresponding to the exclusive upper bound for a query + * against that {@link IPredicate}. */ byte[] getToKey(IKeyBuilder keyBuilder, IPredicate<E> predicate); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -181,9 +181,7 @@ final E e = itr.next(); - // @todo this is not declarative! - final byte[] key = keyBuilder.reset().append(e.name) - .append(e.value).getKey(); + final byte[] key = primaryKeyOrder.getKey(keyBuilder, e); if (!ndx.contains(key)) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -58,6 +58,7 @@ import com.bigdata.bop.NoSuchBOpException; import com.bigdata.bop.aggregation.Union; import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IElementFilter; @@ -189,9 +190,6 @@ * <p> * This is guarded by the {@link #runningStateLock}. * - * FIXME Declarative generation of a key for an index from an element (see - * {@link R}). - * * FIXME Unit tests for non-distinct {@link IElementFilter}s on an * {@link IPredicate}, unit tests for distinct element filter on an * {@link IPredicate} which is capable of distributed operations, handling Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -590,6 +590,10 @@ for (int i = 0; i < e.length; i++) { if (log.isInfoEnabled()) log.info(n + " : " + e[i]); + if (n >= expected.length) { + fail("Willing to deliver too many solutions: n=" + n + + " : " + e[i]); + } if (!expected[n].equals(e[i])) { fail("n=" + n + ", expected=" + expected[n] + ", actual=" + e[i]); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -74,9 +74,6 @@ * source binding sets or in the access path. Joins are pretty quick so it * is really difficult to test this outside of a stress test. BSBM is a * good way to validate this (run the qualification trials). - * - * @todo Write unit tests for star-joins (in their own test suite and ideally - * factor them out from the standard {@link PipelineJoin} operator). */ public class TestPipelineJoin extends TestCase2 { @@ -483,8 +480,6 @@ * * @throws ExecutionException * @throws InterruptedException - * - * @todo test w/ and w/o the alternative sink. */ public void test_optionalJoin() throws InterruptedException, ExecutionException { @@ -593,5 +588,128 @@ ft.get(); // verify nothing thrown. } + + /** + * Unit test for an optional {@link PipelineJoin} when the + * {@link BOpContext#getSink2() alternative sink} is specified. + * + * @throws InterruptedException + * @throws ExecutionException + */ + public void test_optionalJoin_withAltSink() throws InterruptedException, + ExecutionException { + final Var<?> x = Var.var("x"); + + final int startId = 1; + final int joinId = 2; + final int predId = 3; + + final PipelineJoin query = new PipelineJoin( + // left + new PipelineStartOp(new BOp[] {}, NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + })), + // right + new Predicate<E>(new IVariableOrConstant[] { + new Constant<String>("Mary"), x }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.PARTITION_ID, + Integer.valueOf(-1)),// + new NV(Predicate.Annotations.OPTIONAL, + Boolean.FALSE),// + new NV(Predicate.Annotations.CONSTRAINT, null),// + new NV(Predicate.Annotations.EXPANDER, null),// + new NV(Predicate.Annotations.BOP_ID, predId),// + })), + // join annotations + NV + .asMap(new NV[] { // + new NV(BOpBase.Annotations.BOP_ID, + joinId), + new NV(PipelineJoin.Annotations.OPTIONAL, + Boolean.TRUE),// +// + })// + ); + + /* + * Setup the source with two initial binding sets. One has nothing bound + * and will join with (Mary,x:=John) and (Mary,x:=Paul). The other has + * x:=Luke which does not join. However, this is an optional join so + * x:=Luke should output anyway. + */ + final IAsynchronousIterator<IBindingSet[]> source; + { + final IBindingSet bset1 = new HashBindingSet(); + final IBindingSet bset2 = new HashBindingSet(); + { + + bset2.set(x, new Constant<String>("Luke")); + + } + source = new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bset1, bset2 } }); + } + + // the expected solutions for the default sink. + final IBindingSet[] expected = new IBindingSet[] {// + new ArrayBindingSet(// + new IVariable[] { x },// + new IConstant[] { new Constant<String>("John") }// + ),// + new ArrayBindingSet(// + new IVariable[] { x },// + new IConstant[] { new Constant<String>("Paul") }// + ),// + }; + + // the expected solutions for the alternative sink. + final IBindingSet[] expected2 = new IBindingSet[] {// + new ArrayBindingSet(// + new IVariable[] { x },// + new IConstant[] { new Constant<String>("Luke") }// + ),// + }; + + final IBlockingBuffer<IBindingSet[]> sink = query.newBuffer(); + + final IBlockingBuffer<IBindingSet[]> sink2 = query.newBuffer(); + + final PipelineJoinStats stats = query.newStats(); + + final BOpContext<IBindingSet> context = new BOpContext<IBindingSet>( + null/* fed */, jnl/* indexManager */, + ITx.READ_COMMITTED/* readTimestamp */, + ITx.UNISOLATED/* writeTimestamp */, -1/* partitionId */, stats, + source, sink, sink2); + + // get task. + final FutureTask<Void> ft = query.eval(context); + + // execute task. + jnl.getExecutorService().execute(ft); + + TestQueryEngine.assertSolutions(expected, sink.iterator()); + TestQueryEngine.assertSolutions(expected2, sink2.iterator()); + + // join task + assertEquals(1L, stats.chunksIn.get()); + assertEquals(2L, stats.unitsIn.get()); + assertEquals(3L, stats.unitsOut.get()); + assertEquals(2L, stats.chunksOut.get()); + // access path + assertEquals(0L, stats.accessPathDups.get()); + assertEquals(2L, stats.accessPathCount.get()); + assertEquals(1L, stats.chunkCount.get()); + assertEquals(2L, stats.elementCount.get()); + + assertTrue(ft.isDone()); + assertFalse(ft.isCancelled()); + ft.get(); // verify nothing thrown. + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconKeyOrder.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconKeyOrder.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -19,7 +19,7 @@ * @todo define a BigdataValuePredicate that interoperates with this class to * support joins against the lexicon. */ -public class LexiconKeyOrder extends AbstractKeyOrder<BigdataValue> implements IKeyOrder<BigdataValue> { +public class LexiconKeyOrder extends AbstractKeyOrder<BigdataValue> { /* * Note: these constants make it possible to use switch(index()) Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -7,7 +7,7 @@ import com.bigdata.striterator.AbstractKeyOrder; import com.bigdata.striterator.IKeyOrder; -public class MagicKeyOrder extends AbstractKeyOrder<IMagicTuple> implements IKeyOrder<IMagicTuple>, Serializable { +public class MagicKeyOrder extends AbstractKeyOrder<IMagicTuple> implements Serializable { /** * Generated serialization version. Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java 2010-09-02 20:20:34 UTC (rev 3497) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOKeyOrder.java 2010-09-02 20:42:43 UTC (rev 3498) @@ -58,7 +58,7 @@ * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ */ -public class SPOKeyOrder extends AbstractKeyOrder<ISPO> implements IKeyOrder<ISPO>, Serializable { +public class SPOKeyOrder extends AbstractKeyOrder<ISPO> implements Serializable { /** * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-03 18:33:19
|
Revision: 3506 http://bigdata.svn.sourceforge.net/bigdata/?rev=3506&view=rev Author: thompsonbry Date: 2010-09-03 18:33:10 +0000 (Fri, 03 Sep 2010) Log Message: ----------- Changed IRelation#newElement() to be compatible with construction of new elements (SPOs, etc) from a binding set and an ordered list of variables and constants. The old method signature was basically reusing the IPredicate for this purpose. Added a method to IVariableOrConstant to permit the retrieval of the "as bound" value without testing to see if the object is a variable or a constant. This should clear up an pattern of conditional logic based on the nature of the object (variable or constant). Some more clean up of the operator hierarchy. More notes on how to handle different operator constructs. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/Solution.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -1,119 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 18, 2010 - */ - -package com.bigdata.bop; - -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import com.bigdata.bop.engine.BOpStats; -import com.bigdata.relation.accesspath.BlockingBuffer; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * Abstract base class for pipelined operators regardless of the type of data - * moving along the pipeline. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -abstract public class AbstractPipelineOp<E> extends BOpBase implements - PipelineOp<E> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends PipelineOp.Annotations { - - } - - /** - * Deep copy constructor. - * - * @param op - */ - protected AbstractPipelineOp(final AbstractPipelineOp<E> op) { - - super(op); - - } - - /** - * Shallow copy constructor. - * - * @param args - * @param annotations - */ - protected AbstractPipelineOp(final BOp[] args, - final Map<String, Object> annotations) { - - super(args, annotations); - - } - - public int getChunkCapacity() { - - return getProperty(Annotations.CHUNK_CAPACITY, - Annotations.DEFAULT_CHUNK_CAPACITY); - - } - - public int getChunkOfChunksCapacity() { - - return getProperty(Annotations.CHUNK_OF_CHUNKS_CAPACITY, - Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); - - } - - public long getChunkTimeout() { - - return getProperty(Annotations.CHUNK_TIMEOUT, - Annotations.DEFAULT_CHUNK_TIMEOUT); - - } - - /** - * The {@link TimeUnit}s in which the {@link #chunkTimeout} is measured. - */ - protected static transient final TimeUnit chunkTimeoutUnit = TimeUnit.MILLISECONDS; - - public BOpStats newStats() { - - return new BOpStats(); - - } - - public IBlockingBuffer<E[]> newBuffer() { - - return new BlockingBuffer<E[]>(getChunkOfChunksCapacity(), - getChunkCapacity(), getChunkTimeout(), chunkTimeoutUnit); - - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -465,6 +465,66 @@ } /** + * Copy the values for variables in the predicate from the element, applying + * them to the caller's {@link IBindingSet}. + * + * @param e + * The element. + * @param pred + * The predicate. + * @param bindingSet + * The binding set, which is modified as a side-effect. + */ + @SuppressWarnings("unchecked") + final private void copyValues(final IElement e, final IPredicate<?> pred, + final IBindingSet bindingSet) { + + for (int i = 0; i < pred.arity(); i++) { + + final IVariableOrConstant<?> t = pred.get(i); + + if (t.isVar()) { + + final IVariable<?> var = (IVariable<?>) t; + + final Constant<?> newval = new Constant(e.get(i)); + + bindingSet.set(var, newval); + + } + + } + + } + + /** + * Copy the bound values from the element into a binding set using the + * caller's variable names. + * + * @param vars + * The ordered list of variables. + * @param e + * The element. + * @param bindingSet + * The binding set, which is modified as a side-effect. + */ + final public void bind(final IVariable<?>[] vars, final IElement e, + final IBindingSet bindingSet) { + + for (int i = 0; i < vars.length; i++) { + + final IVariable<?> var = vars[i]; + + @SuppressWarnings("unchecked") + final Constant<?> newval = new Constant(e.get(i)); + + bindingSet.set(var, newval); + + } + + } + + /** * Check constraints. * * @param constraints @@ -499,26 +559,4 @@ } - @SuppressWarnings("unchecked") - final private void copyValues(final IElement e, final IPredicate<?> pred, - final IBindingSet bindingSet) { - - for (int i = 0; i < pred.arity(); i++) { - - final IVariableOrConstant<?> t = pred.get(i); - - if (t.isVar()) { - - final IVariable<?> var = (IVariable<?>) t; - - final Constant<?> newval = new Constant(e.get(i)); - - bindingSet.set(var, newval); - - } - - } - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -37,14 +37,14 @@ * @version $Id$ */ abstract public class BindingSetPipelineOp extends - AbstractPipelineOp<IBindingSet> { + PipelineOp<IBindingSet> { /** * */ private static final long serialVersionUID = 1L; - public interface Annotations extends AbstractPipelineOp.Annotations { + public interface Annotations extends PipelineOp.Annotations { /** * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of @@ -61,7 +61,7 @@ * * @param op */ - protected BindingSetPipelineOp(AbstractPipelineOp<IBindingSet> op) { + protected BindingSetPipelineOp(PipelineOp<IBindingSet> op) { super(op); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -23,7 +23,6 @@ */ package com.bigdata.bop; -import java.util.Map; /** @@ -153,13 +152,19 @@ // // } - public E get() { + final public E get() { return value; } - public String getName() { + final public E get(final IBindingSet bindingSet) { + + return value; + + } + + final public String getName() { throw new UnsupportedOperationException(); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -0,0 +1,75 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 3, 2010 + */ + +package com.bigdata.bop; + +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * An pipeline operator reads from a source and writes on a sink. + * + * @param <E> + * The generic type of the objects processed by the operator. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IPipelineOp<E> extends BOp { + + /** + * Return a new object which can be used to collect statistics on the + * operator evaluation (this may be overridden to return a more specific + * class depending on the operator). + */ + BOpStats newStats(); + + /** + * Instantiate a buffer suitable as a sink for this operator. The buffer + * will be provisioned based on the operator annotations. + * + * @return The buffer. + */ + IBlockingBuffer<E[]> newBuffer(); + + /** + * Return a {@link FutureTask} which computes the operator against the + * evaluation context. The caller is responsible for executing the + * {@link FutureTask} (this gives them the ability to hook the completion of + * the computation). + * + * @param context + * The evaluation context. + * + * @return The {@link FutureTask} which will compute the operator's + * evaluation. + */ + FutureTask<Void> eval(BOpContext<E> context); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -30,7 +30,6 @@ import java.io.Serializable; -import com.bigdata.bop.join.PipelineJoin; import com.bigdata.mdi.PartitionLocator; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IVariableOrConstant.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -64,6 +64,24 @@ E get(); /** + * Return the <i>as bound</i> value of the variable or constant. The <i>as + * bound</i> value of an {@link IConstant} is the contant's value. The <i>as + * bound</i> value of an {@link IVariable} is the bound value in the given + * {@link IBindingSet} -or- <code>null</code> if the variable is not bound + * in the {@link IBindingSet}. + * + * @param bindingSet + * The binding set. + * + * @return The as bound value of the constant or variable. + * + * @throws IllegalArgumentException + * if this is an {@link IVariable} and the <i>bindingSet</i> is + * <code>null</code>. + */ + E get(IBindingSet bindingSet); + + /** * Return the name of a variable. * * @throws UnsupportedOperationException Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -1,182 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 18, 2010 - */ - -package com.bigdata.bop; - -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.engine.BOpStats; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.relation.accesspath.AccessPath; -import com.bigdata.relation.accesspath.BlockingBuffer; -import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.IBuffer; - -/** - * An pipeline operator reads from a source and writes on a sink. - * - * @param <E> - * The generic type of the objects processed by the operator. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - * - * @todo It is too confusion to have an interface hierarchy which is separate - * from the class hierarchy for the operators. Therefore roll this - * interface into {@link AbstractPipelineOp} and then rename that class to - * {@link PipelineOp} - */ -public interface PipelineOp<E> extends BOp { - - /** - * Well known annotations pertaining to the binding set pipeline. - */ - public interface Annotations extends BOp.Annotations { - - /** - * The maximum #of chunks that can be buffered before an the producer - * would block (default {@value #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). - * Note that partial chunks may be combined into full chunks whose - * nominal capacity is specified by {@link #CHUNK_CAPACITY}. - * - * @see #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY - */ - String CHUNK_OF_CHUNKS_CAPACITY = PipelineOp.class.getName() - + ".chunkOfChunksCapacity"; - - /** - * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} - * - * @todo was 100. dialed down to reduce heap consumption for arrays. - * test performance @ 100 and 1000. - */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; - - /** - * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk - * of {@link IBindingSet}s (default {@value #CHUNK_CAPACITY}). Partial - * chunks may be automatically combined into full chunks. - * - * @see #DEFAULT_CHUNK_CAPACITY - * @see #CHUNK_OF_CHUNKS_CAPACITY - */ - String CHUNK_CAPACITY = PipelineOp.class.getName() + ".chunkCapacity"; - - /** - * Default for {@link #CHUNK_CAPACITY} - */ - int DEFAULT_CHUNK_CAPACITY = 100; - - /** - * The timeout in milliseconds that the {@link BlockingBuffer} will wait - * for another chunk to combine with the current chunk before returning - * the current chunk (default {@value #DEFAULT_CHUNK_TIMEOUT}). This may - * be ZERO (0) to disable the chunk combiner. - * - * @see #DEFAULT_CHUNK_TIMEOUT - */ - String CHUNK_TIMEOUT = PipelineOp.class.getName() + ".chunkTimeout"; - - /** - * The default for {@link #CHUNK_TIMEOUT}. - * - * @todo this is probably much larger than we want. Try 10ms. - */ - int DEFAULT_CHUNK_TIMEOUT = 1000; - - /** - * If the estimated rangeCount for an {@link AccessPath#iterator()} is - * LTE this threshold then use a fully buffered (synchronous) iterator. - * Otherwise use an asynchronous iterator whose capacity is governed by - * {@link #CHUNK_OF_CHUNKS_CAPACITY}. - * - * @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD - */ - String FULLY_BUFFERED_READ_THRESHOLD = PipelineOp.class.getName() - + ".fullyBufferedReadThreshold"; - - /** - * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}. - * - * @todo try something closer to the branching factor, e.g., 100. - */ - int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 1000; - - /** - * Flags for the iterator ({@link IRangeQuery#KEYS}, - * {@link IRangeQuery#VALS}, {@link IRangeQuery#PARALLEL}). - * <p> - * Note: The {@link IRangeQuery#PARALLEL} flag here is an indication - * that the iterator may run in parallel across the index partitions. - * This only effects scale-out and only for simple triple patterns since - * the pipeline join does something different (it runs inside the index - * partition using the local index, not the client's view of a - * distributed index). - * - * @see #DEFAULT_FLAGS - */ - String FLAGS = PipelineOp.class.getName() + ".flags"; - - /** - * The default flags will visit the keys and values of the non-deleted - * tuples and allows parallelism in the iterator (when supported). - */ - final int DEFAULT_FLAGS = IRangeQuery.KEYS | IRangeQuery.VALS - | IRangeQuery.PARALLEL; - - } - - /** - * Return a new object which can be used to collect statistics on the - * operator evaluation (this may be overridden to return a more specific - * class depending on the operator). - */ - BOpStats newStats(); - - /** - * Instantiate a buffer suitable as a sink for this operator. The buffer - * will be provisioned based on the operator annotations. - * - * @return The buffer. - */ - IBlockingBuffer<E[]> newBuffer(); - - /** - * Return a {@link FutureTask} which computes the operator against the - * evaluation context. The caller is responsible for executing the - * {@link FutureTask} (this gives them the ability to hook the completion of - * the computation). - * - * @param context - * The evaluation context. - * - * @return The {@link FutureTask} which will compute the operator's - * evaluation. - */ - FutureTask<Void> eval(BOpContext<E> context); - -} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java (from rev 3495, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractPipelineOp.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -0,0 +1,219 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 18, 2010 + */ + +package com.bigdata.bop; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.btree.IRangeQuery; +import com.bigdata.relation.accesspath.AccessPath; +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.IBuffer; + +/** + * An pipeline operator reads from a source and writes on a sink. This is an + * abstract base class for pipelined operators regardless of the type of data + * moving along the pipeline. + * + * @param <E> + * The generic type of the objects processed by the operator. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class PipelineOp<E> extends BOpBase implements IPipelineOp<E> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Well known annotations pertaining to the binding set pipeline. + */ + public interface Annotations extends BOp.Annotations { + + /** + * The maximum #of chunks that can be buffered before an the producer + * would block (default {@value #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). + * Note that partial chunks may be combined into full chunks whose + * nominal capacity is specified by {@link #CHUNK_CAPACITY}. + * + * @see #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY + */ + String CHUNK_OF_CHUNKS_CAPACITY = PipelineOp.class.getName() + + ".chunkOfChunksCapacity"; + + /** + * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + * + * @todo was 100. dialed down to reduce heap consumption for arrays. + * test performance @ 100 and 1000. + */ + int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; + + /** + * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk + * of {@link IBindingSet}s (default {@value #CHUNK_CAPACITY}). Partial + * chunks may be automatically combined into full chunks. + * + * @see #DEFAULT_CHUNK_CAPACITY + * @see #CHUNK_OF_CHUNKS_CAPACITY + */ + String CHUNK_CAPACITY = PipelineOp.class.getName() + ".chunkCapacity"; + + /** + * Default for {@link #CHUNK_CAPACITY} + */ + int DEFAULT_CHUNK_CAPACITY = 100; + + /** + * The timeout in milliseconds that the {@link BlockingBuffer} will wait + * for another chunk to combine with the current chunk before returning + * the current chunk (default {@value #DEFAULT_CHUNK_TIMEOUT}). This may + * be ZERO (0) to disable the chunk combiner. + * + * @see #DEFAULT_CHUNK_TIMEOUT + */ + String CHUNK_TIMEOUT = PipelineOp.class.getName() + ".chunkTimeout"; + + /** + * The default for {@link #CHUNK_TIMEOUT}. + * + * @todo this is probably much larger than we want. Try 10ms. + */ + int DEFAULT_CHUNK_TIMEOUT = 1000; + + /** + * If the estimated rangeCount for an {@link AccessPath#iterator()} is + * LTE this threshold then use a fully buffered (synchronous) iterator. + * Otherwise use an asynchronous iterator whose capacity is governed by + * {@link #CHUNK_OF_CHUNKS_CAPACITY}. + * + * @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD + */ + String FULLY_BUFFERED_READ_THRESHOLD = PipelineOp.class.getName() + + ".fullyBufferedReadThreshold"; + + /** + * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}. + * + * @todo try something closer to the branching factor, e.g., 100. + */ + int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 1000; + + /** + * Flags for the iterator ({@link IRangeQuery#KEYS}, + * {@link IRangeQuery#VALS}, {@link IRangeQuery#PARALLEL}). + * <p> + * Note: The {@link IRangeQuery#PARALLEL} flag here is an indication + * that the iterator may run in parallel across the index partitions. + * This only effects scale-out and only for simple triple patterns since + * the pipeline join does something different (it runs inside the index + * partition using the local index, not the client's view of a + * distributed index). + * + * @see #DEFAULT_FLAGS + */ + String FLAGS = PipelineOp.class.getName() + ".flags"; + + /** + * The default flags will visit the keys and values of the non-deleted + * tuples and allows parallelism in the iterator (when supported). + */ + final int DEFAULT_FLAGS = IRangeQuery.KEYS | IRangeQuery.VALS + | IRangeQuery.PARALLEL; + + } + + /** + * Deep copy constructor. + * + * @param op + */ + protected PipelineOp(final PipelineOp<E> op) { + + super(op); + + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + protected PipelineOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + + public int getChunkCapacity() { + + return getProperty(Annotations.CHUNK_CAPACITY, + Annotations.DEFAULT_CHUNK_CAPACITY); + + } + + public int getChunkOfChunksCapacity() { + + return getProperty(Annotations.CHUNK_OF_CHUNKS_CAPACITY, + Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); + + } + + public long getChunkTimeout() { + + return getProperty(Annotations.CHUNK_TIMEOUT, + Annotations.DEFAULT_CHUNK_TIMEOUT); + + } + + /** + * The {@link TimeUnit}s in which the {@link #chunkTimeout} is measured. + */ + protected static transient final TimeUnit chunkTimeoutUnit = TimeUnit.MILLISECONDS; + + public BOpStats newStats() { + + return new BOpStats(); + + } + + public IBlockingBuffer<E[]> newBuffer() { + + return new BlockingBuffer<E[]>(getChunkOfChunksCapacity(), + getChunkCapacity(), getChunkTimeout(), chunkTimeoutUnit); + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -139,6 +139,18 @@ } + public E get(final IBindingSet bindingSet) { + + if (bindingSet == null) + throw new IllegalArgumentException(); + + @SuppressWarnings("unchecked") + final IConstant<E> c = bindingSet.get(this); + + return c == null ? null : c.get(); + + } + public String getName() { return name; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -309,24 +309,26 @@ public Object asBound(final int index, final IBindingSet bindingSet) { - if (bindingSet == null) - throw new IllegalArgumentException(); + return get(index).get(bindingSet); - final IVariableOrConstant<?> t = get(index); +// if (bindingSet == null) +// throw new IllegalArgumentException(); +// +// final IVariableOrConstant<?> t = get(index); +// +// final IConstant<?> c; +// if (t.isVar()) { +// +// c = bindingSet.get((IVariable<?>) t); +// +// } else { +// +// c = (IConstant<?>) t; +// +// } +// +// return c == null ? null : c.get(); - final IConstant<?> c; - if (t.isVar()) { - - c = bindingSet.get((IVariable<?>) t); - - } else { - - c = (IConstant<?>) t; - - } - - return c == null ? null : c.get(); - } @SuppressWarnings("unchecked") Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -32,7 +32,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; -import com.bigdata.bop.AbstractPipelineOp; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BindingSetPipelineOp; @@ -62,7 +62,7 @@ */ private static final long serialVersionUID = 1L; - public interface Annotations extends AbstractPipelineOp.Annotations { + public interface Annotations extends PipelineOp.Annotations { /** * An {@link IConstraint} which specifies the condition. When the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -28,7 +28,7 @@ package com.bigdata.bop.ndx; -import com.bigdata.bop.AbstractPipelineOp; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; @@ -59,7 +59,7 @@ * @todo This needs to operation on element chunks, not {@link IBindingSet} * chunks. It also may not require pipelining. */ -abstract public class AbstractSampleIndex<E> extends AbstractPipelineOp<E> { +abstract public class AbstractSampleIndex<E> extends PipelineOp<E> { /** * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -28,11 +28,13 @@ package com.bigdata.relation; +import java.util.List; import java.util.Set; import java.util.concurrent.ExecutorService; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.IIndex; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleIterator; @@ -40,8 +42,6 @@ import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.locator.ILocatableResource; -import com.bigdata.relation.rule.IRule; -import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.striterator.IKeyOrder; /** @@ -75,25 +75,45 @@ */ Class<E> getElementClass(); +// /** +// * Create and return a new element. The element is constructed from the +// * predicate given the bindings. Typically, this is used when generating an +// * {@link ISolution} for an {@link IRule} during either a query or mutation +// * operations. The element is NOT inserted into the relation. +// * +// * @param predicate +// * The predicate that is the head of some {@link IRule}. +// * @param bindingSet +// * A set of bindings for that {@link IRule}. +// * +// * @return The new element. +// * +// * @throws IllegalArgumentException +// * if any parameter is <code>null</code>. +// * @throws IllegalStateException +// * if the predicate is not fully bound given those bindings. +// */ +// E newElement(IPredicate<E> predicate, IBindingSet bindingSet); + /** * Create and return a new element. The element is constructed from the - * predicate given the bindings. Typically, this is used when generating an - * {@link ISolution} for an {@link IRule} during either a query or mutation - * operations. The element is NOT inserted into the relation. + * ordered list of variables and constants. Variables are replaced using the + * given the bindings. The element is NOT inserted into the relation. * - * @param predicate - * The predicate that is the head of some {@link IRule}. + * @param a + * An ordered list of variables and/or constants. * @param bindingSet - * A set of bindings for that {@link IRule}. + * A set of bindings. * * @return The new element. * * @throws IllegalArgumentException * if any parameter is <code>null</code>. * @throws IllegalStateException - * if the predicate is not fully bound given those bindings. + * if there exists a variable which is not bound given those + * bindings. */ - E newElement(IPredicate<E> predicate, IBindingSet bindingSet); + E newElement(List<IVariableOrConstant<?>> a, IBindingSet bindingSet); /** * Return the {@link IKeyOrder} for the primary index for the relation. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -1,12 +1,14 @@ package com.bigdata.relation; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.ExecutorService; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.IIndex; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.TemporaryStore; @@ -122,10 +124,17 @@ } - public E newElement(final IPredicate<E> predicate, +// public E newElement(final IPredicate<E> predicate, +// final IBindingSet bindingSet) { +// +// return relation1.newElement(predicate, bindingSet); +// +// } + + public E newElement(final List<IVariableOrConstant<?>> a, final IBindingSet bindingSet) { - return relation1.newElement(predicate, bindingSet); + return relation1.newElement(a, bindingSet); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/Solution.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/Solution.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/Solution.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -101,9 +101,12 @@ // the relation for the head of the rule. final IRelation relation = joinNexus.getHeadRelationView(head); + + // use the relation's element factory. + this.e = (E) relation.newElement(head.args(), bindingSet); - // use the relation's element factory. - this.e = (E) relation.newElement(head, bindingSet); +// // use the relation's element factory. +// this.e = (E) relation.newElement(head, bindingSet); } else { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -66,6 +66,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.IIndex; import com.bigdata.btree.ISimpleSplitHandler; @@ -1354,7 +1355,7 @@ } @SuppressWarnings("unchecked") - public Object newElement(IPredicate predicate, + public Object newElement(List a, IBindingSet bindingSet) { throw new UnsupportedOperationException(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -29,19 +29,20 @@ import java.util.Comparator; import java.util.HashSet; +import java.util.List; import java.util.Properties; import java.util.Set; import java.util.UUID; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.IIndex; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.journal.IIndexManager; import com.bigdata.relation.AbstractRelation; -import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.locator.ILocatableResource; import com.bigdata.striterator.AbstractKeyOrder; import com.bigdata.striterator.IChunkedOrderedIterator; @@ -52,7 +53,7 @@ * <p> * Note: This has to be public in order to be an {@link ILocatableResource}. */ -public class R extends AbstractRelation<E> implements IMutableRelation<E> { +public class R extends AbstractRelation<E> { /** * Metadata about the index orders for this relation. @@ -134,16 +135,27 @@ } - public E newElement(final IPredicate<E> predicate, +// public E newElement(final IPredicate<E> predicate, +// final IBindingSet bindingSet) { +// +// final String name = (String) predicate.asBound(0, bindingSet); +// +// final String value = (String) predicate.asBound(1, bindingSet); +// +// return new E(name, value); +// } + + public E newElement(final List<IVariableOrConstant<?>> a, final IBindingSet bindingSet) { - final String name = (String) predicate.asBound(0, bindingSet); + final String name = (String) a.get(0).get(bindingSet); - final String value = (String) predicate.asBound(1, bindingSet); - - return new E(name, value); + final String value = (String) a.get(1).get(bindingSet); + + return new E(name,value); + } - + public Set<String> getIndexNames() { final Set<String> tmp = new HashSet<String>(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -52,17 +52,25 @@ import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.ChunkedOrderedIteratorOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.NoSuchBOpException; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.bset.Union; +import com.bigdata.relation.IMutableRelation; +import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.relation.rule.Program; import com.bigdata.resources.ResourceManager; +import com.bigdata.service.ndx.IAsynchronousWriteBufferFactory; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.ChunkedOrderedStriterator; +import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.ICloseableIterator; import com.bigdata.util.concurrent.Haltable; @@ -193,15 +201,70 @@ * * FIXME Unit tests for non-distinct {@link IElementFilter}s on an * {@link IPredicate}, unit tests for distinct element filter on an - * {@link IPredicate} which is capable of distributed operations + * {@link IPredicate} which is capable of distributed operations. Do not use + * distinct where not required (SPOC, only one graph, etc). + * <p> + * It seems like the right way to approach this is by unifying the stackable + * CTC striterator pattern with the chunked iterator pattern and passing the + * query engine (or the bop context) into the iterator construction process + * (or simply requesting that the query engine construct the iterator + * stack). + * <p> + * In terms of harmonization, it is difficult to say which way would work + * better. In the short term we could simply allow both and mask the + * differences in how we construct the filters, but the conversion to/from + * striterators and chunked iterators seems to waste a bit of effort. + * <p> + * The trickiest part of all of this is to allow a distributed filter + * pattern where the filter gets created on a set of nodes identified by the + * operator and the elements move among those nodes using the query engine's + * buffers. + * <p> + * To actually implement the distributed distinct filter we need to stack + * the following: * + * <pre> + * - ITupleIterator + * - Resolve ITuple to Element (e.g., SPOC). + * - Layer on optional IElementFilter associated with the IPredicate. + * - Layer on SameVariableConstraint iff required (done by AccessPath) + * - Resolve SPO to SPO, stripping off the context position. + * - Chunk SPOs (SPO[], IKeyOrder), where the key order is from the access path. + * - Filter SPO[] using DHT constructed on specified nodes of the cluster. + * The SPO[] chunks should be packaged into NIO buffers and shipped to those + * nodes. The results should be shipped back as a bit vectors packaged into + * a NIO buffers. + * - Dechunk SPO[] to SPO since that is the current expectation for the filter + * stack. + * - The result then gets wrapped as a {@link IChunkedOrderedIterator} by + * the AccessPath using a {@link ChunkedArrayIterator}. + * </pre> + * + * This stack is a bit complex(!). But it is certainly easy enough to + * generate the necessary bits programmatically. + * * FIXME Handling the {@link Union} of binding sets. * - * FIXME conditional routing for binding sets in the pipeline (to route - * around an optional join group based on an {@link IConstraint}). This - * should probably wrap the {@link BindingSetPipelineOp} such that we simply - * stream the grouped operator. + * FIXME INSERT and DELETE which will construct elements using + * {@link IRelation#newElement(java.util.List, IBindingSet)} from a binding + * set and then use {@link IMutableRelation#insert(IChunkedOrderedIterator)} + * and {@link IMutableRelation#delete(IChunkedOrderedIterator)}. For s/o, we + * first need to move the bits into the right places so it makes sense to + * unpack the processing of the loop over the elements and move the data + * around, writing on each index as necessary. There could be eventually + * consistent approaches to this as well. For justifications we need to + * update some additional indices, in which case we are stuck going through + * {@link IRelation} rather than routing data directly or using the + * {@link IAsynchronousWriteBufferFactory}. * + * FIXME Handle {@link Program}s. There are three flavors, which should + * probably be broken into three operators: sequence(ops), set(ops), and + * closure(op). The 'set' version would be parallelized, or at least have an + * annotation for parallel evaluation. These things belong in the same broad + * category as the join graph since they are operators which control the + * evaluation of other operators (the current pipeline join also has that + * characteristic which it uses to do the nested index subqueries). + * * FIXME SPARQL to BOP translation * * FIXME buffer management for s/o, including binding sets movement, element Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -28,6 +28,7 @@ package com.bigdata.relation.locator; +import java.util.List; import java.util.Properties; import java.util.Set; import java.util.UUID; @@ -38,6 +39,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.IIndex; import com.bigdata.btree.IndexMetadata; import com.bigdata.journal.BufferMode; @@ -227,7 +229,8 @@ private static class MockRelation extends AbstractRelation { - final private String indexName = "foo"; + static final private String indexName = "foo"; + private IIndex ndx; /** @@ -324,11 +327,16 @@ return null; } - public Object newElement(IPredicate predicate, IBindingSet bindingSet) { +// public Object newElement(IPredicate predicate, IBindingSet bindingSet) { +// // TODO Auto-generated method stub +// return null; +// } + + public Object newElement(List a, IBindingSet bindingSet) { // TODO Auto-generated method stub return null; } - + public Class<ISPO> getElementClass() { return null; @@ -344,7 +352,7 @@ // TODO Auto-generated method stub return null; } - + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -56,6 +56,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; @@ -804,13 +805,17 @@ * * @throws UnsupportedOperationException */ - public BigdataValue newElement(IPredicate<BigdataValue> predicate, +// public BigdataValue newElement(IPredicate<BigdataValue> predicate, +// IBindingSet bindingSet) { +// +// throw new UnsupportedOperationException(); +// +// } + public BigdataValue newElement(List<IVariableOrConstant<?>> a, IBindingSet bindingSet) { - throw new UnsupportedOperationException(); - } - + public Class<BigdataValue> getElementClass() { return BigdataValue.class; Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.Set; @@ -13,9 +14,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.Var; import com.bigdata.btree.BloomFilterFactory; @@ -473,18 +472,44 @@ return indexNames; } - public IMagicTuple newElement(IPredicate<IMagicTuple> predicate, - IBindingSet bindingSet) { +// public IMagicTuple newElement(IPredicate<IMagicTuple> predicate, +// IBindingSet bindingSet) { +// +// if (predicate == null) +// throw new IllegalArgumentException(); +// +// if (bindingSet == null) +// throw new IllegalArgumentException(); +// +// final IV[] terms = new IV[arity]; +// for (int i = 0; i < arity; i++) { +// terms[i] = asBound(predicate, i, bindingSet); +// } +// +// final MagicTuple magicTuple = new MagicTuple(terms); +// +// return magicTuple; +// +// } - if (predicate == null) + @SuppressWarnings("unchecked") + public IMagicTuple newElement(final List<IVariableOrConstant<?>> a, + final IBindingSet bindingSet) { + + if (a == null) throw new IllegalArgumentException(); if (bindingSet == null) throw new IllegalArgumentException(); final IV[] terms = new IV[arity]; + + final Iterator<IVariableOrConstant<?>> itr = a.iterator(); + for (int i = 0; i < arity; i++) { - terms[i] = asBound(predicate, i, bindingSet); + + terms[i] = (IV) itr.next().get(bindingSet); + } final MagicTuple magicTuple = new MagicTuple(terms); @@ -492,39 +517,39 @@ return magicTuple; } - + public Class<IMagicTuple> getElementClass() { return IMagicTuple.class; } - /** - * Extract the bound value from the predicate. When the predicate is not - * bound at that index, then extract its binding from the binding set. - * - * @param pred - * The predicate. - * @param index - * The index into that predicate. - * @param bindingSet - * The binding set. - * - * @return The bound value. - */ - private IV asBound(final IPredicate<IMagicTuple> predicate, - final int index, final IBindingSet bindingSet) { +// /** +// * Extract the bound value from the predicate. When the predicate is not +// * bound at that index, then extract its binding from the binding set. +// * +// * @param pred +// * The predicate. +// * @param index +// * The index into that predicate. +// * @param bindingSet +// * The binding set. +// * +// * @return The bound value. +// */ +// private IV asBound(final IPredicate<IMagicTuple> predicate, +// final int index, final IBindingSet bindingSet) { +// +// final IVariableOrConstant<IV> t = predicate.get(index); +// final IConstant<IV> c; +// if (t.isVar()) { +// c = bindingSet.get((IVariable) t); +// } else { +// c = (IConstant<IV>) t; +// } +// +// return c.get(); +// +// } - final IVariableOrConstant<IV> t = predicate.get(index); - final IConstant<IV> c; - if (t.isVar()) { - c = bindingSet.get((IVariable) t); - } else { - c = (IConstant<IV>) t; - } - - return c.get(); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java 2010-09-03 13:40:22 UTC (rev 3505) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java 2010-09-03 18:33:10 UTC (rev 3506) @@ -1494,32 +1494,69 @@ } +// /** +// * @todo This implementation was written early on and works for creating new +// * SPOs licensed by inference against a triple store. It does not +// * allow us to specify the statement type, which is always set to +// * [inferred]. It also does not capture the context if one exists, but +// * that could be done by inspection of the arity of the predicate. It +// * might be better to have an explicit "CONSTRUCT" operator rather +// * than having this implicit relationship between the head of a rule +// * and the element created from that rule. For example, that might let +// * us capture the distinction of inferred versus explicit within the +// * CONSTRUCT operator. +// */ +// public SPO newElement(final IPredicate<ISPO> predicate, +// final IBindingSet bindingSet) { +// +// if (predicate == null) +// throw new IllegalArgumentException(); +// +// if (bindingSet == null) +// throw new IllegalArgumentException(); +// +// final IV s = (IV) predicate.asBound(0, bindingSet); +// +// final IV p = (IV) predicate.asBound(1, bindingSet); +// +// final IV o = (IV) predicate.asBound(2, bindingSet); +// +// final SPO spo = new SPO(s, p, o, StatementEnum.Inferred); +// +// if(log.isDebugEnabled()) +// log.debug(spo.toString()); +// +// return spo; +// +// } + /** - * @todo This implementation was written early on and works for creating new - * SPOs licensed by inference against a triple store. It does not - * allow us to specify the statement type, which is always set to - * [inferred]. It also does not capture the context if one exists, but - * that could be done by inspection of the arity of the predicate. It - * might be better to have an explicit "CONSTRUCT" operator rather - * than having this implicit relationship between the head of a rule - * and the element created from that rule. For example, that might let - * us capture the distinction of inferred versus explicit within the - * CONSTRUCT operator. + * @todo This works for creating new SPOs licensed by inference against a + * triple store. However, it does not allow us to specify the + * statement type, which is always set to [inferred]. It also does not + * capture the context if one exists, but that could be done by + * inspection of the arity of the predicate. It might be better to + * have an explicit "CONSTRUCT" operator rather than havin... [truncated message content] |
From: <tho...@us...> - 2010-09-05 17:02:44
|
Revision: 3508 http://bigdata.svn.sourceforge.net/bigdata/?rev=3508&view=rev Author: thompsonbry Date: 2010-09-05 17:02:34 +0000 (Sun, 05 Sep 2010) Log Message: ----------- Refactored the code to map binding sets over shards and made it more efficient (it reuses the same logic that is used by the scale-out index views). Added IRelation#getKeyOrders() which brings us another step towards DDL support. Added an in-memory binding set sort operator and an implementation for ordering binding sets for SPARQL which leverages the openrdf ValueCompator. Added an "INSERT" operator to write on an index. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IMutableRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractUnsynchronizedArrayBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/ThreadLocalBufferFactory.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IQueryOptions.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/QueryOptions.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NestedSubqueryWithJoinThreadsTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/JoinMasterTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/AbstractScaleOutFederation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/ndx/AbstractSplitter.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/AbstractKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/IKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestMapBindingSetsOverShards.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/locator/TestDefaultResourceLocator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/TestSlice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/service/AbstractEmbeddedFederationTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicIndexWriteProc.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicIndexWriter.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicKeyOrder.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicTupleSerializer.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestSlice.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestSPARQLBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/TestDelete.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/TestInsert.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ConstantEval.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/ISlice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/ISortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/SortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestSortBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MapBindingSetsOverShards.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -114,9 +114,22 @@ * @return The value of the annotation and <code>null</code> if the * annotation is not bound. */ - Object getProperty(final String name); + <T> T getProperty(final String name); /** + * Return the value of the named annotation. + * + * @param name + * The name of the annotation. + * + * @return The value of the annotation. + * + * @throws IllegalArgumentException + * if the named annotation is not bound. + */ + <T> T getRequiredProperty(final String name); + + /** * Deep copy clone of the operator. */ BOp clone(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -299,12 +299,23 @@ } - public Object getProperty(final String name) { + public <T> T getProperty(final String name) { - return annotations.get(name); + return (T) annotations.get(name); } + public <T> T getRequiredProperty(final String name) { + + final T tmp = (T) annotations.get(name); + + if (tmp == null) + throw new IllegalArgumentException("Required property: " + name); + + return tmp; + + } + public String toString() { final StringBuilder sb = new StringBuilder(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -27,6 +27,8 @@ */ package com.bigdata.bop; +import java.util.Iterator; + import org.apache.log4j.Logger; import com.bigdata.bop.engine.BOpStats; @@ -36,6 +38,7 @@ import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITx; import com.bigdata.journal.TimestampUtility; +import com.bigdata.mdi.PartitionLocator; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; @@ -44,8 +47,10 @@ import com.bigdata.relation.locator.IResourceLocator; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.eval.IJoinNexus; +import com.bigdata.service.AbstractScaleOutFederation; import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; +import com.bigdata.service.ndx.IClientIndex; import com.bigdata.striterator.IKeyOrder; /** @@ -270,12 +275,100 @@ */ public IRelation getReadRelation(final IPredicate<?> pred) { + /* + * @todo Cache the resource locator? + * + * @todo This should be using the federation as the index manager when + * locating a resource for scale-out, right? But s/o reads must use the + * local index manager when actually obtaining the index view for the + * relation. + */ return (IRelation) getIndexManager().getResourceLocator().locate( pred.getOnlyRelationName(), getReadTimestamp()); } /** + * Return a writable view of the relation. + * + * @param namespace + * The namespace of the relation. + * + * @return A writable view of the relation. + */ + public IRelation getWriteRelation(final String namespace) { + + /* + * @todo Cache the resource locator? + * + * @todo This should be using the federation as the index manager when + * locating a resource for scale-out, right? But s/o writes must use + * the local index manager when actually obtaining the index view for + * the relation. + */ + return (IRelation) getIndexManager().getResourceLocator().locate( + namespace, getWriteTimestamp()); + + } + + /** + * Return an mutable view of the specified index. + * + * @param <T> + * The generic type of the elements in the relation. + * @param relation + * The relation. + * @param keyOrder + * The key order for that index. + * @param partitionId + * The partition identifier and <code>-1</code> unless running + * against an {@link IBigdataFederation}. + * + * @return The mutable view of the index. + * + * @throws UnsupportedOperationException + * if there is an attempt to read on an index partition when the + * database is not an {@link IBigdataFederation} or when the + * database is an {@link IBigdataFederation} unless the index + * partition was specified. + */ + public <T> ILocalBTreeView getMutableLocalIndexView( + final IRelation<T> relation, final IKeyOrder<T> keyOrder, + final int partitionId) { + + final String namespace = relation.getNamespace(); + + final ILocalBTreeView ndx; + + if (partitionId == -1) { + + if(indexManager instanceof IBigdataFederation<?>) + throw new UnsupportedOperationException(); + + // The index is not partitioned. + ndx = (ILocalBTreeView) indexManager.getIndex(namespace + "." + + keyOrder.getIndexName(), getWriteTimestamp()); + + } else { + + if(!(indexManager instanceof IBigdataFederation<?>)) + throw new UnsupportedOperationException(); + + // The name of the desired index partition. + final String name = DataService.getIndexPartitionName(namespace + + "." + keyOrder.getIndexName(), partitionId); + + // MUST be a local index view. + ndx = (ILocalBTreeView) indexManager.getIndex(name, + getWriteTimestamp()); + + } + + return ndx; + + } + + /** * Obtain an access path reading from relation for the specified predicate * (from the tail of some rule). * <p> @@ -558,5 +651,56 @@ return true; } - + +/* + * I've replaced this with AbstractSplitter for the moment. + */ +// /** +// * Return an iterator visiting the {@link PartitionLocator} for the index +// * partitions from which an {@link IAccessPath} must read in order to +// * materialize all elements which would be visited for that predicate. +// * +// * @param predicate +// * The predicate on which the next stage in the pipeline must +// * read, with whatever bindings already applied. This is used to +// * discover the shard(s) which span the key range against which +// * the access path must read. +// * +// * @return The iterator. +// */ +// public Iterator<PartitionLocator> locatorScan(final IPredicate<?> predicate) { +// +// final long timestamp = getReadTimestamp(); +// +// // Note: assumes that we are NOT using a view of two relations. +// final IRelation<?> relation = (IRelation<?>) fed.getResourceLocator() +// .locate(predicate.getOnlyRelationName(), timestamp); +// +// /* +// * Find the best access path for the predicate for that relation. +// * +// * Note: All we really want is the [fromKey] and [toKey] for that +// * predicate and index. This MUST NOT layer on expanders since the +// * layering also hides the [fromKey] and [toKey]. +// */ +// @SuppressWarnings("unchecked") +// final AccessPath<?> accessPath = (AccessPath<?>) relation +// .getAccessPath((IPredicate) predicate); +// +// // Note: assumes scale-out (EDS or JDS). +// final IClientIndex ndx = (IClientIndex) accessPath.getIndex(); +// +// /* +// * Note: could also be formed from relationName + "." + +// * keyOrder.getIndexName(), which is cheaper unless the index metadata +// * is cached. +// */ +// final String name = ndx.getIndexMetadata().getName(); +// +// return ((AbstractScaleOutFederation<?>) fed).locatorScan(name, +// timestamp, accessPath.getFromKey(), accessPath.getToKey(), +// false/* reverse */); +// +// } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -30,12 +30,15 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; import com.bigdata.bop.BOp.Annotations; +import com.bigdata.bop.engine.BOpStats; import com.bigdata.btree.AbstractNode; import cutthecrap.utils.striterators.Expander; @@ -462,4 +465,65 @@ } + + /** + * Combine chunks drawn from an iterator into a single chunk. + * + * @param itr + * The iterator + * @param stats + * {@link BOpStats#chunksIn} and {@link BOpStats#unitsIn} are + * updated. + * + * @return A single chunk containing all of the chunks visited by the + * iterator. + * + * @todo unit tests. + */ + static public IBindingSet[] toArray(final Iterator<IBindingSet[]> itr, + final BOpStats stats) { + + final List<IBindingSet[]> list = new LinkedList<IBindingSet[]>(); + + int nchunks = 0, nelements = 0; + { + + while (itr.hasNext()) { + + final IBindingSet[] a = itr.next(); + + list.add(a); + + nchunks++; + + nelements += a.length; + + list.add(a); + + } + + stats.chunksIn.add(nchunks); + stats.unitsIn.add(nelements); + + } + + if (nchunks == 0) { + return new IBindingSet[0]; + } else if (nchunks == 1) { + return list.get(0); + } else { + int n = 0; + final IBindingSet[] a = new IBindingSet[nelements]; + final Iterator<IBindingSet[]> itr2 = list.iterator(); + while (itr2.hasNext()) { + final IBindingSet[] t = itr2.next(); + System.arraycopy(t/* src */, 0/* srcPos */, a/* dest */, + n/* destPos */, t.length/* length */); + n += t.length; + } + return a; + } + + } // toArray() + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Constant.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -23,8 +23,6 @@ */ package com.bigdata.bop; - - /** * A constant. * Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ConstantEval.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ConstantEval.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ConstantEval.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -1,19 +0,0 @@ -package com.bigdata.bop; - -/** - * Evaluate a constant - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - * @param <E> - */ -public interface ConstantEval<E> extends BOp { - - /** - * Evaluate a constant. - * - * @return The value. - */ - E eval(); - -} \ No newline at end of file Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,77 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 4, 2010 + */ + +package com.bigdata.bop.aggregation; + +import java.util.Comparator; +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IBindingSet; + +/** + * Base class for operators which impose a sort order on binding sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class ComparatorOp extends BOpBase implements + Comparator<IBindingSet> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BOp.Annotations { + + /** + * An {@link ISortOrder}[] specifying the variables on which the sort + * will be imposed and the order (ascending or descending) for each + * variable. + */ + String ORDER = ComparatorOp.class.getName() + ".order"; + + } + + /** + * @param op + */ + public ComparatorOp(BOpBase op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public ComparatorOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -88,14 +88,6 @@ super(args, annotations); - final IVariable<?>[] vars = getVariables(); - - if (vars == null) - throw new IllegalArgumentException(); - - if (vars.length == 0) - throw new IllegalArgumentException(); - } /** @@ -133,7 +125,7 @@ */ public IVariable<?>[] getVariables() { - return (IVariable<?>[]) annotations.get(Annotations.VARIABLES); + return getRequiredProperty(Annotations.VARIABLES); } @@ -208,6 +200,12 @@ this.vars = op.getVariables(); + if (vars == null) + throw new IllegalArgumentException(); + + if (vars.length == 0) + throw new IllegalArgumentException(); + this.map = new ConcurrentHashMap<Solution, Solution>( op.getInitialCapacity(), op.getLoadFactor(), op.getConcurrencyLevel()); @@ -305,12 +303,13 @@ } + sink.flush(); + // done. return null; } finally { - sink.flush(); sink.close(); // discard the map. Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java (from rev 3423, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/ISlice.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,69 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 24, 2008 + */ + +package com.bigdata.bop.aggregation; + +import java.io.Serializable; + +import com.bigdata.relation.accesspath.IAccessPath; + +/** + * Indicates the first solution to be returned to the caller (offset) and the + * #of solutions to be returned (limit). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface ISlice extends Serializable { + + /** + * The first solution to be returned to the caller. A value of ZERO (0) + * indicates that all solutions should be returned. + */ + public long getOffset(); + + /** + * The maximum #of solutions to be returned to the caller. A value of + * {@link Long#MAX_VALUE} indicates that there is no limit. + * + * @todo modify to be consistent with + * {@link IAccessPath#iterator(long, long, int)} where a limit of ZERO + * (0L) is interpreted as NO limit and a limit of + * {@link Long#MAX_VALUE} is interpreted as ZERO (0L) (that is, also + * no limit). + */ + public long getLimit(); + + /** + * The index of the last solution that we will generate (OFFSET + LIMIT). If + * OFFSET + LIMIT would be greater than {@link Long#MAX_VALUE}, then use + * {@link Long#MAX_VALUE} instead. + */ + public long getLast(); + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java (from rev 3448, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/ISortOrder.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,55 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 24, 2008 + */ + +package com.bigdata.bop.aggregation; + +import java.io.Serializable; + +import com.bigdata.bop.IVariable; + +/** + * A variable and an order that will be imposed on the values for that variable. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface ISortOrder<E> extends Serializable { + + /** + * The variable whose values will be sorted. + */ + IVariable<E> getVariable(); + + /** + * <code>true</code> iff the values will be placed into an ascending sort + * and <code>false</code> if the values will be placed into a descending + * sort. + */ + boolean isAscending(); + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,112 @@ +package com.bigdata.bop.aggregation; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * An in-memory merge sort for binding sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + * + * @todo unit tests. + * @todo do an external merge sort operator. + */ +public class MemorySortOp extends SortOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + */ + public MemorySortOp(final MemorySortOp op) { + super(op); + } + + /** + * Required shallow copy constructor. + */ + public MemorySortOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new SortTask(this, context)); + + } + + /** + * Task executing on the node. + */ + static private class SortTask implements Callable<Void> { + + private final BOpContext<IBindingSet> context; + + /** + * The binding set comparator. + */ + private final Comparator<IBindingSet> comparator; + + SortTask(final MemorySortOp op, + final BOpContext<IBindingSet> context) { + + this.context = context; + + this.comparator = op.getComparator(); + + } + + public Void call() throws Exception { + + final BOpStats stats = context.getStats(); + + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + + try { + + final IBindingSet[] all = BOpUtility.toArray(context + .getSource(), stats); + + // sort. + Arrays.sort(all, comparator); + + // update counters. + stats.unitsOut.add(all.length); + stats.chunksOut.increment(); + + // write output and flush. + sink.add(all); + sink.flush(); + + // done. + return null; + + } finally { + + sink.close(); + + } + + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java (from rev 3423, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Slice.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,129 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 24, 2008 + */ + +package com.bigdata.bop.aggregation; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.math.BigInteger; + +/** + * Default implementation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class Slice implements ISlice, Externalizable { + + /** + * + */ + private static final long serialVersionUID = 5396509164843609197L; + + private long offset; + private long limit; + private long last; + + /** + * A slice corresponding to all results (offset is zero, limit is + * {@link Long#MAX_VALUE}). + */ + public static final transient ISlice ALL = new Slice(0, Long.MAX_VALUE); + + /** + * + * @param offset + * @param limit + * + * @throws IllegalArgumentException + * if offset is negative. + * @throws IllegalArgumentException + * if limit is non-positive. + */ + public Slice(final long offset, final long limit) { + + if (offset < 0) + throw new IllegalArgumentException(); + + if (limit <= 0) + throw new IllegalArgumentException(); + + this.offset = offset; + + this.limit = limit; + + // @todo what is a cheaper way to do this? + this.last = BigInteger.valueOf(offset).add(BigInteger.valueOf(limit)) + .min(BigInteger.valueOf(Long.MAX_VALUE)).longValue(); + + } + + public long getOffset() { + + return offset; + + } + + public long getLimit() { + + return limit; + + } + + public long getLast() { + + return last; + + } + + public String toString() { + + return "Slice{offset="+offset+", limit="+limit+", last="+last+"}"; + + } + + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + + offset = in.readLong(); + limit = in.readLong(); + last = in.readLong(); + + } + + public void writeExternal(ObjectOutput out) throws IOException { + + out.writeLong(offset); + out.writeLong(limit); + out.writeLong(last); + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,86 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 4, 2010 + */ + +package com.bigdata.bop.aggregation; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.PipelineOp; + +/** + * Base class for operators which sort binding sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class SortOp extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BindingSetPipelineOp.Annotations { + + /** + * The {@link ComparatorOp} which will impose the ordering on the + * binding sets. + * + * @see ComparatorOp + */ + String COMPARATOR = MemorySortOp.class.getName() + ".comparator"; + + } + + /** + * @param op + */ + public SortOp(PipelineOp<IBindingSet> op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public SortOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + + /** + * @see Annotations#COMPARATOR + */ + public ComparatorOp getComparator() { + + return getRequiredProperty(Annotations.COMPARATOR); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java (from rev 3448, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/SortOrder.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,80 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 24, 2008 + */ + +package com.bigdata.bop.aggregation; + +import com.bigdata.bop.IVariable; + +/** + * Default impl. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class SortOrder<E> implements ISortOrder<E> { + + /** + * + */ + private static final long serialVersionUID = -669873421670514139L; + + private final IVariable<E> var; + private final boolean asc; + + /** + * + * @param var + * The variable. + * @param asc + * <code>true</code> for an ascending sort and + * <code>false</code> for a descending sort. + */ + public SortOrder(final IVariable<E> var, final boolean asc) { + + if (var == null) + throw new IllegalArgumentException(); + + this.var = var; + + this.asc = asc; + + } + + public IVariable<E> getVariable() { + + return var; + + } + + public boolean isAscending() { + + return asc; + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,101 @@ +package com.bigdata.bop.aggregation; + +import java.util.Comparator; +import java.util.Map; + +import org.openrdf.query.algebra.evaluation.util.ValueComparator; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IVariable; + +/** + * A comparator for SPARQL binding sets. + * + * @see http://www.w3.org/TR/rdf-sparql-query/#modOrderBy + * @see ValueComparator + * + * @todo unit tests. + */ +public class SparqlBindingSetComparatorOp extends ComparatorOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + */ + public SparqlBindingSetComparatorOp(final SparqlBindingSetComparatorOp op) { + super(op); + } + + /** + * Required shallow copy constructor. + */ + public SparqlBindingSetComparatorOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + + /** + * @see Annotations#ORDER + */ + public ISortOrder<?>[] getOrder() { + + return getRequiredProperty(Annotations.ORDER); + + } + + /** + * The sort order to be imposed. + */ + private transient ISortOrder<?>[] order; + + private transient Comparator vc; + + public int compare(final IBindingSet bs1, final IBindingSet bs2) { + + if (order == null) { + + // lazy initialization. + order = getOrder(); + + if (order == null) + throw new IllegalArgumentException(); + + if (order.length == 0) + throw new IllegalArgumentException(); + + // comparator for RDF Value objects. + vc = new ValueComparator(); + + } + + for (int i = 0; i < order.length; i++) { + + final ISortOrder<?> o = order[i]; + + final IVariable v = o.getVariable(); + + int ret = vc.compare(bs1.get(v).get(), bs2.get(v).get()); + + if (!o.isAscending()) + ret = -ret; + + if (ret != 0) { + // not equal for this variable. + return ret; + } + + } + + // equal for all variables. + return 0; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html 2010-09-05 17:02:34 UTC (rev 3508) @@ -1,6 +1,6 @@ <html> <head> -<title>distinct, sort, and aggregation operators</title> +<title>solution modifier operators (distinct, sort, slice, and aggregation)</title> </head> <body> Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,271 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 25, 2010 + */ + +package com.bigdata.bop.mutation; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.btree.ILocalBTreeView; +import com.bigdata.btree.ITupleSerializer; +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.relation.IRelation; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.striterator.IKeyOrder; + +/** + * This operator writes elements constructed from binding sets and an orders + * list of variables and constants on an index. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @param <E> + * The generic type of the elements written onto the index. + */ +public class InsertOp<E> extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BindingSetPipelineOp.Annotations { + + /** + * An ordered {@link IVariableOrConstant}[]. Elements will be created + * using the binding sets which flow through the operator and + * {@link IRelation#newElement(java.util.List, IBindingSet)}. + */ + String SELECTED = InsertOp.class.getName() + ".selected"; + + /** + * The namespace of the relation to which the index belongs. + */ + String RELATION = InsertOp.class.getName() + ".relation"; + + /** + * The {@link IKeyOrder} for the index. + */ + String KEY_ORDER = InsertOp.class.getName() + ".keyOrder"; + + } + + /** + * Deep copy constructor. + * + * @param op + */ + public InsertOp(InsertOp<E> op) { + super(op); + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + public InsertOp(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + getRequiredProperty(Annotations.SELECTED); + + } + + /** + * @see Annotations#SELECTED + */ + public IVariableOrConstant<?>[] getSelected() { + +// return (IVariableOrConstant<?>[]) getProperty(Annotations.SELECTED); + return getRequiredProperty(Annotations.SELECTED); + + } + + /** + * @see Annotations#RELATION + */ + public String getRelation() { + + return getRequiredProperty(Annotations.RELATION); + + } + + /** + * @see Annotations#KEY_ORDER + */ + public IKeyOrder<E> getKeyOrder() { + + return getRequiredProperty(Annotations.KEY_ORDER); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new InsertTask<E>(this, context)); + + } + + /** + * Create elements from the selected bindings and insert them onto the named + * index. + */ + static private class InsertTask<E> implements Callable<Void> { + + private final BOpStats stats; + + private final BOpContext<IBindingSet> context; + + private final IAsynchronousIterator<IBindingSet[]> source; + + /** + * Only used to close the sink when we are done. + */ + private final IBlockingBuffer<IBindingSet[]> sink; + + private List<IVariableOrConstant<?>> selected; + + private final IRelation<E> relation; + + private final IKeyOrder<E> keyOrder; + + @SuppressWarnings("unchecked") + InsertTask(final InsertOp<E> op, final BOpContext<IBindingSet> context) { + + this.context = context; + + stats = context.getStats(); + + source = context.getSource(); + + sink = context.getSink(); + + selected = Arrays.asList(op.getSelected()); + + relation = context.getWriteRelation(op.getRelation()); + + keyOrder = op.getKeyOrder(); + + } + + /** + * + * @todo This does not order the tuples before writing on the local + * index. I am not sure that it should. I think that order is + * generally obtained from how we organize the tuples when mapping + * them across shards. However, for standalone databases it may + * make sense to insert a SORT on the selected attributes before + * the INSERT. + */ + public Void call() throws Exception { + + /* + * @todo validate for s/o. Since this goes through a common code + * path, what we really need to test is getMutableLocalIndexView(). + * The rest of the insert operation can be tested against a local + * Journal. + */ + final ILocalBTreeView ndx = context.getMutableLocalIndexView( + relation, keyOrder, context.getPartitionId()); + + final IKeyBuilder keyBuilder = ndx.getIndexMetadata() + .getKeyBuilder(); + + final ITupleSerializer tupleSer = ndx.getIndexMetadata() + .getTupleSerializer(); + + try { + + while (source.hasNext()) { + + final IBindingSet[] chunk = source.next(); + + stats.chunksIn.increment(); + stats.unitsIn.add(chunk.length); + + int nwritten = 0; + for (int i = 0; i < chunk.length; i++) { + + final IBindingSet bset = chunk[i]; + + final E e = relation.newElement(selected, bset); + + final byte[] key = keyOrder.getKey(keyBuilder, e); + + if (!ndx.contains(key)) { + + final byte[] val = tupleSer.serializeVal(e); + + ndx.insert(key, val); + + nwritten++; + + } + + } + + if (nwritten > 0) { + stats.unitsOut.add(nwritten); + stats.chunksOut.increment(); + } + + } + + return null; + + } finally { + + sink.close(); + + } + + } + + } + + + // E[] a = null; + // Note: useful if we will sort before writing on the index. +// if (i == 0) +// a = (E[]) java.lang.reflect.Array.newInstance(e +// .getClass()); +// +// a[i] = e; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/package.html =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/package.html (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/package.html 2010-09-05 17:02:34 UTC (rev 3508) @@ -0,0 +1,14 @@ +<html> +<head> +<title>mutation operators</title> +</head> +<body> + +<p> + + + +</p> + +</body> +</html> \ No newline at end of file Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/package.html ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -28,8 +28,14 @@ package com.bigdata.relation; +import java.util.ArrayList; +import java.util.List; import java.util.Properties; import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; import com.bigdata.bop.IPredicate; import com.bigdata.btree.IIndex; @@ -43,10 +49,25 @@ import com.bigdata.journal.Journal; import com.bigdata.journal.TemporaryRawStore; import com.bigdata.journal.TemporaryStore; +import com.bigdata.rdf.lexicon.LexiconRelation; +import com.bigdata.rdf.model.StatementEnum; +import com.bigdata.rdf.spo.ISPO; +import com.bigdata.rdf.spo.JustificationRemover; +import com.bigdata.rdf.spo.SPO; +import com.bigdata.rdf.spo.SPOAccessPath; +import com.bigdata.rdf.spo.SPOIndexRemover; +import com.bigdata.rdf.spo.SPOIndexWriter; +import com.bigdata.rdf.spo.SPOKeyOrder; +import com.bigdata.rdf.spo.SPORelation; +import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.relation.rule.eval.AbstractSolutionBuffer.InsertSolutionBuffer; import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; +import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; /** @@ -56,11 +77,6 @@ * @version $Id$ * @param <E> * The generic type of the [E]lements of the relation. - * - * @todo It would be interesting to do a GOM relation with its secondary index - * support and the addition of clustered indices. We would then get - * efficient JOINs via the rules layer for free and a high-level query - * language could be mapped onto those JOINs. */ abstract public class AbstractRelation<E> extends AbstractResource<IRelation<E>> implements IMutableRelation<E> { @@ -323,5 +339,5 @@ getChunkCapacity(), getFullyBufferedReadThreshold()).init(); } - + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IMutableRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IMutableRelation.java 2010-09-03 19:25:46 UTC (rev 3507) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IMutableRelation.java 2010-09-05 17:02:34 UTC (rev 3508) @@ -80,71 +80,4 @@ */ public long delete(IChunkedOrderedIterator<E> itr); - /* - * @todo update is notional. it has not been implemented yet (you can use - * delete+insert). i suspect that the implementation will eventually involve - * the "transform" being specified as an extension to the rule, e.g., - * - * update [relation] set z=foo from [relation] (x,y,bar) - * - * if the update causes a change in the key for either the primary index or - * any secondary index for the relation then it must be realized as a - * delete+insert since the changes to the underlying tuples might not be - * local (e.g., they could be in an different index partition, on a - * different data service, on even on a different host). such non-local - * changes will not be atomic unless you use a full transaction or - * read-behind from a last known consistent commit point. - * - * @todo the RDF DB faces exactly this problem with truth maintenance (the - * problem is somewhat more severe since inference against an unstable KB - * state can magnify any inconsistencies). - */ - -// /** -// * Update elements on the relation. -// * <p> -// * The implemention must locate each element in the relation and, if found, -// * update its state using the <i>transform</i>. It is an error if the -// * transformed element has a different "primary key" than the visited -// * element. -// * <p> -// * Note: While UPDATE is often realized as "DELETE + INSERT" within the same -// * transaction, the advantage of this formulation is that is one-half of the -// * cost since each element in the relation is visited only once. However, if -// * you need to update parts of the primary key then "DELETE + INSERT" is the -// * way to go since each the "delete" and the "insert" may operate on -// * different tuples located in different parts of the index. For scale-out -// * indices, those tuples can even lie on different machines. -// * -// * @param itr -// * An iterator visiting the elements selected for update. -// * @param transform -// * A transform that produces the new state for each visited -// * element. -// * -// * @return The #of elements that were actually modified in the relation. -// */ -// public long update(IChunkedOrderedIterator... [truncated message content] |
From: <tho...@us...> - 2010-09-05 18:16:10
|
Revision: 3509 http://bigdata.svn.sourceforge.net/bigdata/?rev=3509&view=rev Author: thompsonbry Date: 2010-09-05 18:16:01 +0000 (Sun, 05 Sep 2010) Log Message: ----------- Added a "slice" operator and a basic unit test for that operator. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/IQueryOptions.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/QueryOptions.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/NestedSubqueryWithJoinThreadsTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/RuleStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/JoinMasterTask.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/relation/rule/TestSlice.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/rdf/rules/TestSlice.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SparqlBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/ISlice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSPARQLBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/package.html branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/aggregation/TestSPARQLBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSortBindingSets.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -32,6 +32,8 @@ import org.apache.log4j.Logger; import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.solutions.SliceOp; import com.bigdata.btree.IIndex; import com.bigdata.btree.ILocalBTreeView; import com.bigdata.btree.IRangeQuery; @@ -652,6 +654,24 @@ } + /** + * Cancel the running query (normal termination). + * <p> + * Note: This method provides a means for an operator to indicate that the + * query should halt immediately. It used used by {@link SliceOp}, which + * needs to terminate the entire query once the slice has been satisfied. + * (If {@link SliceOp} just jumped out of its own evaluation loop then the + * query would not produce more results, but it would continue to run and + * the over produced results would just be thrown away.) + * <p> + * Note: When an individual {@link BOp} evaluation throws an exception, the + * {@link QueryEngine} will catch that exception and halt query evaluation + * with that thrown cause. + */ + public void halt() { + + } + /* * I've replaced this with AbstractSplitter for the moment. */ Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,77 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Sep 4, 2010 - */ - -package com.bigdata.bop.aggregation; - -import java.util.Comparator; -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpBase; -import com.bigdata.bop.IBindingSet; - -/** - * Base class for operators which impose a sort order on binding sets. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -abstract public class ComparatorOp extends BOpBase implements - Comparator<IBindingSet> { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends BOp.Annotations { - - /** - * An {@link ISortOrder}[] specifying the variables on which the sort - * will be imposed and the order (ascending or descending) for each - * variable. - */ - String ORDER = ComparatorOp.class.getName() + ".order"; - - } - - /** - * @param op - */ - public ComparatorOp(BOpBase op) { - super(op); - } - - /** - * @param args - * @param annotations - */ - public ComparatorOp(BOp[] args, Map<String, Object> annotations) { - super(args, annotations); - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,324 +0,0 @@ -package com.bigdata.bop.aggregation; - -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BindingSetPipelineOp; -import com.bigdata.bop.HashBindingSet; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.engine.BOpStats; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * A pipelined DISTINCT operator based on a hash table. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z - * thompsonbry $ - */ -public class DistinctBindingSetOp extends BindingSetPipelineOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends BindingSetPipelineOp.Annotations { - - /** - * The initial capacity of the {@link ConcurrentHashMap} used to impose - * the distinct constraint. - * - * @see #DEFAULT_INITIAL_CAPACITY - */ - String INITIAL_CAPACITY = DistinctBindingSetOp.class.getName()+".initialCapacity"; - - int DEFAULT_INITIAL_CAPACITY = 16; - - /** - * The load factor of the {@link ConcurrentHashMap} used to impose - * the distinct constraint. - * - * @see #DEFAULT_LOAD_FACTOR - */ - String LOAD_FACTOR = DistinctBindingSetOp.class.getName()+".loadFactor"; - - float DEFAULT_LOAD_FACTOR = .75f; - - /** - * The concurrency level of the {@link ConcurrentHashMap} used to impose - * the distinct constraint. - * - * @see #DEFAULT_CONCURRENCY_LEVEL - */ - String CONCURRENCY_LEVEL = DistinctBindingSetOp.class.getName()+".concurrencyLevel"; - - int DEFAULT_CONCURRENCY_LEVEL = 16; - - /** - * The variables on which the distinct constraint will be imposed. - * Binding sets with distinct values for the specified variables will be - * passed on. - */ - String VARIABLES = DistinctBindingSetOp.class.getName() + ".variables"; - - } - - /** - * Required deep copy constructor. - */ - public DistinctBindingSetOp(final DistinctBindingSetOp op) { - super(op); - } - - /** - * Required shallow copy constructor. - */ - public DistinctBindingSetOp(final BOp[] args, - final Map<String, Object> annotations) { - - super(args, annotations); - - } - - /** - * @see Annotations#INITIAL_CAPACITY - */ - public int getInitialCapacity() { - - return getProperty(Annotations.INITIAL_CAPACITY, - Annotations.DEFAULT_INITIAL_CAPACITY); - - } - - /** - * @see Annotations#LOAD_FACTOR - */ - public float getLoadFactor() { - - return getProperty(Annotations.LOAD_FACTOR, - Annotations.DEFAULT_LOAD_FACTOR); - - } - - /** - * @see Annotations#CONCURRENCY_LEVEL - */ - public int getConcurrencyLevel() { - - return getProperty(Annotations.CONCURRENCY_LEVEL, - Annotations.DEFAULT_CONCURRENCY_LEVEL); - - } - - /** - * @see Annotations#VARIABLES - */ - public IVariable<?>[] getVariables() { - - return getRequiredProperty(Annotations.VARIABLES); - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new DistinctTask(this, context)); - - } - - /** - * Wrapper used for the as bound solutions in the {@link ConcurrentHashMap}. - */ - private static class Solution { - private final int hash; - - private final IConstant<?>[] vals; - - public Solution(final IConstant<?>[] vals) { - this.vals = vals; - this.hash = java.util.Arrays.hashCode(vals); - } - - public int hashCode() { - return hash; - } - - public boolean equals(final Object o) { - if (this == o) - return true; - if (!(o instanceof Solution)) { - return false; - } - final Solution t = (Solution) o; - if (vals.length != t.vals.length) - return false; - for (int i = 0; i < vals.length; i++) { - // @todo verify that this allows for nulls with a unit test. - if (vals[i] == t.vals[i]) - continue; - if (vals[i] == null) - return false; - if (!vals[i].equals(t.vals[i])) - return false; - } - return true; - } - } - - /** - * Task executing on the node. - */ - static private class DistinctTask implements Callable<Void> { - - private final BOpContext<IBindingSet> context; - - /** - * A concurrent map whose keys are the bindings on the specified - * variables (the keys and the values are the same since the map - * implementation does not allow <code>null</code> values). - */ - private /*final*/ ConcurrentHashMap<Solution, Solution> map; - - /** - * The variables used to impose a distinct constraint. - */ - private final IVariable<?>[] vars; - - DistinctTask(final DistinctBindingSetOp op, - final BOpContext<IBindingSet> context) { - - this.context = context; - - this.vars = op.getVariables(); - - if (vars == null) - throw new IllegalArgumentException(); - - if (vars.length == 0) - throw new IllegalArgumentException(); - - this.map = new ConcurrentHashMap<Solution, Solution>( - op.getInitialCapacity(), op.getLoadFactor(), - op.getConcurrencyLevel()); - - } - - /** - * If the bindings are distinct for the configured variables then return - * those bindings. - * - * @param bset - * The binding set to be filtered. - * - * @return The distinct as bound values -or- <code>null</code> if the - * binding set duplicates a solution which was already accepted. - */ - private IConstant<?>[] accept(final IBindingSet bset) { - - final IConstant<?>[] r = new IConstant<?>[vars.length]; - - for (int i = 0; i < vars.length; i++) { - - /* - * Note: This allows null's. - * - * @todo write a unit test when some variables are not bound. - */ - r[i] = bset.get(vars[i]); - - } - - final Solution s = new Solution(r); - - final boolean distinct = map.putIfAbsent(s, s) == null; - - return distinct ? r : null; - - } - - public Void call() throws Exception { - - final BOpStats stats = context.getStats(); - - final IAsynchronousIterator<IBindingSet[]> itr = context - .getSource(); - - final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); - - try { - - while (itr.hasNext()) { - - final IBindingSet[] a = itr.next(); - - stats.chunksIn.increment(); - stats.unitsIn.add(a.length); - - final List<IBindingSet> accepted = new LinkedList<IBindingSet>(); - - int naccepted = 0; - - for (IBindingSet bset : a) { - -// System.err.println("considering: " + bset); - - final IConstant<?>[] vals = accept(bset); - - if (vals != null) { - -// System.err.println("accepted: " -// + Arrays.toString(vals)); - - accepted.add(new HashBindingSet(vars, vals)); - - naccepted++; - - } - - } - - if (naccepted > 0) { - - final IBindingSet[] b = accepted - .toArray(new IBindingSet[naccepted]); - -// System.err.println("output: " -// + Arrays.toString(b)); - - sink.add(b); - - stats.unitsOut.add(naccepted); - stats.chunksOut.increment(); - - } - - } - - sink.flush(); - - // done. - return null; - - } finally { - - sink.close(); - - // discard the map. - map = null; - - } - - } - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISlice.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,69 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 24, 2008 - */ - -package com.bigdata.bop.aggregation; - -import java.io.Serializable; - -import com.bigdata.relation.accesspath.IAccessPath; - -/** - * Indicates the first solution to be returned to the caller (offset) and the - * #of solutions to be returned (limit). - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public interface ISlice extends Serializable { - - /** - * The first solution to be returned to the caller. A value of ZERO (0) - * indicates that all solutions should be returned. - */ - public long getOffset(); - - /** - * The maximum #of solutions to be returned to the caller. A value of - * {@link Long#MAX_VALUE} indicates that there is no limit. - * - * @todo modify to be consistent with - * {@link IAccessPath#iterator(long, long, int)} where a limit of ZERO - * (0L) is interpreted as NO limit and a limit of - * {@link Long#MAX_VALUE} is interpreted as ZERO (0L) (that is, also - * no limit). - */ - public long getLimit(); - - /** - * The index of the last solution that we will generate (OFFSET + LIMIT). If - * OFFSET + LIMIT would be greater than {@link Long#MAX_VALUE}, then use - * {@link Long#MAX_VALUE} instead. - */ - public long getLast(); - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,55 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 24, 2008 - */ - -package com.bigdata.bop.aggregation; - -import java.io.Serializable; - -import com.bigdata.bop.IVariable; - -/** - * A variable and an order that will be imposed on the values for that variable. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public interface ISortOrder<E> extends Serializable { - - /** - * The variable whose values will be sorted. - */ - IVariable<E> getVariable(); - - /** - * <code>true</code> iff the values will be placed into an ascending sort - * and <code>false</code> if the values will be placed into a descending - * sort. - */ - boolean isAscending(); - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,112 +0,0 @@ -package com.bigdata.bop.aggregation; - -import java.util.Arrays; -import java.util.Comparator; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.FutureTask; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpUtility; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.engine.BOpStats; -import com.bigdata.relation.accesspath.IBlockingBuffer; - -/** - * An in-memory merge sort for binding sets. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z - * thompsonbry $ - * - * @todo unit tests. - * @todo do an external merge sort operator. - */ -public class MemorySortOp extends SortOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Required deep copy constructor. - */ - public MemorySortOp(final MemorySortOp op) { - super(op); - } - - /** - * Required shallow copy constructor. - */ - public MemorySortOp(final BOp[] args, - final Map<String, Object> annotations) { - - super(args, annotations); - - } - - public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - - return new FutureTask<Void>(new SortTask(this, context)); - - } - - /** - * Task executing on the node. - */ - static private class SortTask implements Callable<Void> { - - private final BOpContext<IBindingSet> context; - - /** - * The binding set comparator. - */ - private final Comparator<IBindingSet> comparator; - - SortTask(final MemorySortOp op, - final BOpContext<IBindingSet> context) { - - this.context = context; - - this.comparator = op.getComparator(); - - } - - public Void call() throws Exception { - - final BOpStats stats = context.getStats(); - - final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); - - try { - - final IBindingSet[] all = BOpUtility.toArray(context - .getSource(), stats); - - // sort. - Arrays.sort(all, comparator); - - // update counters. - stats.unitsOut.add(all.length); - stats.chunksOut.increment(); - - // write output and flush. - sink.add(all); - sink.flush(); - - // done. - return null; - - } finally { - - sink.close(); - - } - - } - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/Slice.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,129 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 24, 2008 - */ - -package com.bigdata.bop.aggregation; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.math.BigInteger; - -/** - * Default implementation. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class Slice implements ISlice, Externalizable { - - /** - * - */ - private static final long serialVersionUID = 5396509164843609197L; - - private long offset; - private long limit; - private long last; - - /** - * A slice corresponding to all results (offset is zero, limit is - * {@link Long#MAX_VALUE}). - */ - public static final transient ISlice ALL = new Slice(0, Long.MAX_VALUE); - - /** - * - * @param offset - * @param limit - * - * @throws IllegalArgumentException - * if offset is negative. - * @throws IllegalArgumentException - * if limit is non-positive. - */ - public Slice(final long offset, final long limit) { - - if (offset < 0) - throw new IllegalArgumentException(); - - if (limit <= 0) - throw new IllegalArgumentException(); - - this.offset = offset; - - this.limit = limit; - - // @todo what is a cheaper way to do this? - this.last = BigInteger.valueOf(offset).add(BigInteger.valueOf(limit)) - .min(BigInteger.valueOf(Long.MAX_VALUE)).longValue(); - - } - - public long getOffset() { - - return offset; - - } - - public long getLimit() { - - return limit; - - } - - public long getLast() { - - return last; - - } - - public String toString() { - - return "Slice{offset="+offset+", limit="+limit+", last="+last+"}"; - - } - - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - - offset = in.readLong(); - limit = in.readLong(); - last = in.readLong(); - - } - - public void writeExternal(ObjectOutput out) throws IOException { - - out.writeLong(offset); - out.writeLong(limit); - out.writeLong(last); - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,86 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Sep 4, 2010 - */ - -package com.bigdata.bop.aggregation; - -import java.util.Map; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BindingSetPipelineOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.PipelineOp; - -/** - * Base class for operators which sort binding sets. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -abstract public class SortOp extends BindingSetPipelineOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public interface Annotations extends BindingSetPipelineOp.Annotations { - - /** - * The {@link ComparatorOp} which will impose the ordering on the - * binding sets. - * - * @see ComparatorOp - */ - String COMPARATOR = MemorySortOp.class.getName() + ".comparator"; - - } - - /** - * @param op - */ - public SortOp(PipelineOp<IBindingSet> op) { - super(op); - } - - /** - * @param args - * @param annotations - */ - public SortOp(BOp[] args, Map<String, Object> annotations) { - super(args, annotations); - } - - /** - * @see Annotations#COMPARATOR - */ - public ComparatorOp getComparator() { - - return getRequiredProperty(Annotations.COMPARATOR); - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SortOrder.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,80 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 24, 2008 - */ - -package com.bigdata.bop.aggregation; - -import com.bigdata.bop.IVariable; - -/** - * Default impl. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class SortOrder<E> implements ISortOrder<E> { - - /** - * - */ - private static final long serialVersionUID = -669873421670514139L; - - private final IVariable<E> var; - private final boolean asc; - - /** - * - * @param var - * The variable. - * @param asc - * <code>true</code> for an ascending sort and - * <code>false</code> for a descending sort. - */ - public SortOrder(final IVariable<E> var, final boolean asc) { - - if (var == null) - throw new IllegalArgumentException(); - - this.var = var; - - this.asc = asc; - - } - - public IVariable<E> getVariable() { - - return var; - - } - - public boolean isAscending() { - - return asc; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/SparqlBindingSetComparatorOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,101 +0,0 @@ -package com.bigdata.bop.aggregation; - -import java.util.Comparator; -import java.util.Map; - -import org.openrdf.query.algebra.evaluation.util.ValueComparator; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IVariable; - -/** - * A comparator for SPARQL binding sets. - * - * @see http://www.w3.org/TR/rdf-sparql-query/#modOrderBy - * @see ValueComparator - * - * @todo unit tests. - */ -public class SparqlBindingSetComparatorOp extends ComparatorOp { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * Required deep copy constructor. - */ - public SparqlBindingSetComparatorOp(final SparqlBindingSetComparatorOp op) { - super(op); - } - - /** - * Required shallow copy constructor. - */ - public SparqlBindingSetComparatorOp(final BOp[] args, - final Map<String, Object> annotations) { - - super(args, annotations); - - } - - /** - * @see Annotations#ORDER - */ - public ISortOrder<?>[] getOrder() { - - return getRequiredProperty(Annotations.ORDER); - - } - - /** - * The sort order to be imposed. - */ - private transient ISortOrder<?>[] order; - - private transient Comparator vc; - - public int compare(final IBindingSet bs1, final IBindingSet bs2) { - - if (order == null) { - - // lazy initialization. - order = getOrder(); - - if (order == null) - throw new IllegalArgumentException(); - - if (order.length == 0) - throw new IllegalArgumentException(); - - // comparator for RDF Value objects. - vc = new ValueComparator(); - - } - - for (int i = 0; i < order.length; i++) { - - final ISortOrder<?> o = order[i]; - - final IVariable v = o.getVariable(); - - int ret = vc.compare(bs1.get(v).get(), bs2.get(v).get()); - - if (!o.isAscending()) - ret = -ret; - - if (ret != 0) { - // not equal for this variable. - return ret; - } - - } - - // equal for all variables. - return 0; - - } - -} Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html 2010-09-05 17:02:34 UTC (rev 3508) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/package.html 2010-09-05 18:16:01 UTC (rev 3509) @@ -1,17 +0,0 @@ -<html> -<head> -<title>solution modifier operators (distinct, sort, slice, and aggregation)</title> -</head> -<body> - -<p> - - This package provides distinct, sort, and aggregation operators. All of - these are potentially high volume hash partitioned operations against a - clustered database. Both in memory and disk based versions of the each - operator should be implemented. - -</p> - -</body> -</html> \ No newline at end of file Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java (from rev 3508, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ComparatorOp.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ComparatorOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -0,0 +1,77 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 4, 2010 + */ + +package com.bigdata.bop.solutions; + +import java.util.Comparator; +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpBase; +import com.bigdata.bop.IBindingSet; + +/** + * Base class for operators which impose a sort order on binding sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class ComparatorOp extends BOpBase implements + Comparator<IBindingSet> { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BOp.Annotations { + + /** + * An {@link ISortOrder}[] specifying the variables on which the sort + * will be imposed and the order (ascending or descending) for each + * variable. + */ + String ORDER = ComparatorOp.class.getName() + ".order"; + + } + + /** + * @param op + */ + public ComparatorOp(BOpBase op) { + super(op); + } + + /** + * @param args + * @param annotations + */ + public ComparatorOp(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java (from rev 3508, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/DistinctBindingSetOp.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -0,0 +1,324 @@ +package com.bigdata.bop.solutions; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.HashBindingSet; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * A pipelined DISTINCT operator based on a hash table. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + */ +public class DistinctBindingSetOp extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BindingSetPipelineOp.Annotations { + + /** + * The initial capacity of the {@link ConcurrentHashMap} used to impose + * the distinct constraint. + * + * @see #DEFAULT_INITIAL_CAPACITY + */ + String INITIAL_CAPACITY = DistinctBindingSetOp.class.getName()+".initialCapacity"; + + int DEFAULT_INITIAL_CAPACITY = 16; + + /** + * The load factor of the {@link ConcurrentHashMap} used to impose + * the distinct constraint. + * + * @see #DEFAULT_LOAD_FACTOR + */ + String LOAD_FACTOR = DistinctBindingSetOp.class.getName()+".loadFactor"; + + float DEFAULT_LOAD_FACTOR = .75f; + + /** + * The concurrency level of the {@link ConcurrentHashMap} used to impose + * the distinct constraint. + * + * @see #DEFAULT_CONCURRENCY_LEVEL + */ + String CONCURRENCY_LEVEL = DistinctBindingSetOp.class.getName()+".concurrencyLevel"; + + int DEFAULT_CONCURRENCY_LEVEL = 16; + + /** + * The variables on which the distinct constraint will be imposed. + * Binding sets with distinct values for the specified variables will be + * passed on. + */ + String VARIABLES = DistinctBindingSetOp.class.getName() + ".variables"; + + } + + /** + * Required deep copy constructor. + */ + public DistinctBindingSetOp(final DistinctBindingSetOp op) { + super(op); + } + + /** + * Required shallow copy constructor. + */ + public DistinctBindingSetOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + + /** + * @see Annotations#INITIAL_CAPACITY + */ + public int getInitialCapacity() { + + return getProperty(Annotations.INITIAL_CAPACITY, + Annotations.DEFAULT_INITIAL_CAPACITY); + + } + + /** + * @see Annotations#LOAD_FACTOR + */ + public float getLoadFactor() { + + return getProperty(Annotations.LOAD_FACTOR, + Annotations.DEFAULT_LOAD_FACTOR); + + } + + /** + * @see Annotations#CONCURRENCY_LEVEL + */ + public int getConcurrencyLevel() { + + return getProperty(Annotations.CONCURRENCY_LEVEL, + Annotations.DEFAULT_CONCURRENCY_LEVEL); + + } + + /** + * @see Annotations#VARIABLES + */ + public IVariable<?>[] getVariables() { + + return getRequiredProperty(Annotations.VARIABLES); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new DistinctTask(this, context)); + + } + + /** + * Wrapper used for the as bound solutions in the {@link ConcurrentHashMap}. + */ + private static class Solution { + private final int hash; + + private final IConstant<?>[] vals; + + public Solution(final IConstant<?>[] vals) { + this.vals = vals; + this.hash = java.util.Arrays.hashCode(vals); + } + + public int hashCode() { + return hash; + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof Solution)) { + return false; + } + final Solution t = (Solution) o; + if (vals.length != t.vals.length) + return false; + for (int i = 0; i < vals.length; i++) { + // @todo verify that this allows for nulls with a unit test. + if (vals[i] == t.vals[i]) + continue; + if (vals[i] == null) + return false; + if (!vals[i].equals(t.vals[i])) + return false; + } + return true; + } + } + + /** + * Task executing on the node. + */ + static private class DistinctTask implements Callable<Void> { + + private final BOpContext<IBindingSet> context; + + /** + * A concurrent map whose keys are the bindings on the specified + * variables (the keys and the values are the same since the map + * implementation does not allow <code>null</code> values). + */ + private /*final*/ ConcurrentHashMap<Solution, Solution> map; + + /** + * The variables used to impose a distinct constraint. + */ + private final IVariable<?>[] vars; + + DistinctTask(final DistinctBindingSetOp op, + final BOpContext<IBindingSet> context) { + + this.context = context; + + this.vars = op.getVariables(); + + if (vars == null) + throw new IllegalArgumentException(); + + if (vars.length == 0) + throw new IllegalArgumentException(); + + this.map = new ConcurrentHashMap<Solution, Solution>( + op.getInitialCapacity(), op.getLoadFactor(), + op.getConcurrencyLevel()); + + } + + /** + * If the bindings are distinct for the configured variables then return + * those bindings. + * + * @param bset + * The binding set to be filtered. + * + * @return The distinct as bound values -or- <code>null</code> if the + * binding set duplicates a solution which was already accepted. + */ + private IConstant<?>[] accept(final IBindingSet bset) { + + final IConstant<?>[] r = new IConstant<?>[vars.length]; + + for (int i = 0; i < vars.length; i++) { + + /* + * Note: This allows null's. + * + * @todo write a unit test when some variables are not bound. + */ + r[i] = bset.get(vars[i]); + + } + + final Solution s = new Solution(r); + + final boolean distinct = map.putIfAbsent(s, s) == null; + + return distinct ? r : null; + + } + + public Void call() throws Exception { + + final BOpStats stats = context.getStats(); + + final IAsynchronousIterator<IBindingSet[]> itr = context + .getSource(); + + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + + try { + + while (itr.hasNext()) { + + final IBindingSet[] a = itr.next(); + + stats.chunksIn.increment(); + stats.unitsIn.add(a.length); + + final List<IBindingSet> accepted = new LinkedList<IBindingSet>(); + + int naccepted = 0; + + for (IBindingSet bset : a) { + +// System.err.println("considering: " + bset); + + final IConstant<?>[] vals = accept(bset); + + if (vals != null) { + +// System.err.println("accepted: " +// + Arrays.toString(vals)); + + accepted.add(new HashBindingSet(vars, vals)); + + naccepted++; + + } + + } + + if (naccepted > 0) { + + final IBindingSet[] b = accepted + .toArray(new IBindingSet[naccepted]); + +// System.err.println("output: " +// + Arrays.toString(b)); + + sink.add(b); + + stats.unitsOut.add(naccepted); + stats.chunksOut.increment(); + + } + + } + + sink.flush(); + + // done. + return null; + + } finally { + + sink.close(); + + // discard the map. + map = null; + + } + + } + + } + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java (from rev 3508, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/ISortOrder.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/ISortOrder.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -0,0 +1,55 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +*/ +/* + * Created on Sep 24, 2008 + */ + +package com.bigdata.bop.solutions; + +import java.io.Serializable; + +import com.bigdata.bop.IVariable; + +/** + * A variable and an order that will be imposed on the values for that variable. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface ISortOrder<E> extends Serializable { + + /** + * The variable whose values will be sorted. + */ + IVariable<E> getVariable(); + + /** + * <code>true</code> iff the values will be placed into an ascending sort + * and <code>false</code> if the values will be placed into a descending + * sort. + */ + boolean isAscending(); + +} Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java (from rev 3508, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/aggregation/MemorySortOp.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -0,0 +1,112 @@ +package com.bigdata.bop.solutions; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * An in-memory merge sort for binding sets. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + * + * @todo unit tests. + * @todo do an external merge sort operator. + */ +public class MemorySortOp extends SortOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Required deep copy constructor. + */ + public MemorySortOp(final MemorySortOp op) { + super(op); + } + + /** + * Required shallow copy constructor. + */ + public MemorySortOp(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new SortTask(this, context)); + + } + + /** + * Task executing on the node. + */ + static private class SortTask implements Callable<Void> { + + private final BOpContext<IBindingSet> context; + + /** + * The binding set comparator. + */ + private final Comparator<IBindingSet> comparator; + + SortTask(final MemorySortOp op, + final BOpContext<IBindingSet> context) { + + this.context = context; + + this.comparator = op.getComparator(); + + } + + public Void call() throws Exception { + + final BOpStats stats = context.getStats(); + + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + + try { + + final IBindingSet[] all = BOpUtility.toArray(context + .getSource(), stats); + + // sort. + Arrays.sort(all, comparator); + + // update counters. + stats.unitsOut.add(all.length); + stats.chunksOut.increment(); + + // write output and flush. + sink.add(all); + sink.flush(); + + // done. + return null; + + } finally { + + sink.close(); + + } + + } + + } + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2010-09-05 18:16:01 UTC (rev 3509) @@ -0,0 +1,275 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 5, 2010 + */ + +package com.bigdata.bop.solutions; + +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.RunningQuery; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.UnsynchronizedArrayBuffer; +import com.bigdata.service.IBigdataFederation; + +/** + * An operator which imposes an offset/limit on a binding set pipeline. + * <p> + * Note: join processing typically involves concurrent processes, hence the + * order of the results will not be stable unless the results are sorted before + * applying the slice. When a slice is applied without a sort, the same query + * may return different results each time it is evaluated. + * <p> + * Note: When running on an {@link IBigdataFederation}, this operator must be + * imposed on the query controller so it can count the solutions as they flow + * through. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo If this operator is invoked for each chunk output by a query onto the + * pipeline then it will over produce unless (A) it is given the same + * {@link BOpStats} each time; and (B) it is not invoked for two chunks + * concurrently. + * <p> + * A safer way to impose the slice constraint is by wrapping the query + * buffer on the query controller. Once the slice is satisfied, it can + * just cancel the query. The only drawback of this approach is that the + * wrapping a buffer is not really the same as applying a {@link BOp} to + * the pipeline so it falls outside of the standard operator evaluation + * logic. + * + * @todo If we allow complex operator trees in which "subqueries" can also use a + * slice then either then need to run as their own query with their own + * {@link RunningQuery} state or the API for cancelling a running query as + * used here needs to only cancel evaluation of the child operators. + * Otherwise we could cancel all operator evaluation for the query, + * including operators which are ancestors of the {@link SliceOp}. + */ +public class SliceOp extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID ... [truncated message content] |
From: <tho...@us...> - 2010-09-08 20:52:16
|
Revision: 3524 http://bigdata.svn.sourceforge.net/bigdata/?rev=3524&view=rev Author: thompsonbry Date: 2010-09-08 20:52:07 +0000 (Wed, 08 Sep 2010) Log Message: ----------- Added a class (DirectBufferPoolAllocator) to manage allocations of direct ByteBuffer slices against the DirectBufferPool for use with the FederatedQueryEngine. There is a limited test suite for this class. Partly integrated the DirectBufferPoolAllocator with the FederatedQueryEngine. The generated intermediate results are now written onto direct ByteBuffer slices allocated on the DirectBufferPool and notification messages are sent to the receiving services. I have not yet modified the receiving FederatedQueryEngine to demand the data from the remote query engine. Harmonized InsertOp and PipelineJoin as IShardwiseBindingSetOps. This makes it possible to find the predicate for the access path on which we need to read/write and map the results to the appropriate shards. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryPeer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/IRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/RelationFusedView.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/resources/StoreManager.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/DataService.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/IDataService.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/ManagedResourceService.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/ResourceService.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/io/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/resources/AbstractResourceManagerTestCase.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/service/jini/DataServer.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/service/jini/JiniFederation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IShardwisePipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/io/DirectBufferPoolAllocator.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/io/TestDirectBufferPoolAllocator.java Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IShardwisePipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IShardwisePipelineOp.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IShardwisePipelineOp.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -0,0 +1,48 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 8, 2010 + */ + +package com.bigdata.bop; + +/** + * An interface for {@link BindingSetPipelineOp}s which are mapped across + * shards. + * + * @param <E> + * The generic type of the elements in the relation on which the + * predicate will read or write. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IShardwisePipelineOp<E> { + + /** + * The predicate which reads or writes on the shard. + */ + IPredicate<E> getPredicate(); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IShardwisePipelineOp.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryPeer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryPeer.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryPeer.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -3,8 +3,10 @@ import java.net.InetSocketAddress; import java.rmi.Remote; import java.rmi.RemoteException; +import java.util.UUID; import com.bigdata.bop.BOp; +import com.bigdata.service.IService; /** * Interface for a node participating in the exchange of NIO buffers to @@ -13,6 +15,14 @@ public interface IQueryPeer extends Remote { /** + * The {@link UUID} of the service within which the {@link IQueryPeer} is + * running. + * + * @see IService#getServiceUUID() + */ + UUID getServiceUUID() throws RemoteException; + + /** * Notify a service that a buffer having data for some {@link BOp} in some * running query is available. The receiver may request the data when they * are ready. If the query is cancelled, then the sender will drop the @@ -30,7 +40,9 @@ * @return <code>true</code> unless the receiver knows that the query has * already been cancelled. */ +// * @param nbytes +// * The #of bytes of data which are available for that operator. void bufferReady(IQueryClient clientProxy, InetSocketAddress serviceAddr, - long queryId, int bopId) throws RemoteException; + long queryId, int bopId/*, int nbytes*/) throws RemoteException; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -372,7 +372,7 @@ * is running -or- <code>null</code> if the {@link QueryEngine} is * not running against an {@link IBigdataFederation}. */ - protected UUID getServiceId() { + public UUID getServiceUUID() { return null; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -59,7 +59,6 @@ import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.resources.ResourceManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.striterator.ICloseableIterator; import com.bigdata.util.concurrent.Haltable; @@ -209,40 +208,6 @@ private final Set<Integer/*bopId*/> startedSet = new LinkedHashSet<Integer>(); /** - * A map associating resources with running queries. When a query halts, the - * resources listed in its resource map are released. Resources can include - * {@link ByteBuffer}s backing either incoming or outgoing - * {@link BindingSetChunk}s, temporary files associated with the query, hash - * tables, etc. - * - * @todo Cache any resources materialized for the query on this node (e.g., - * temporary graphs materialized from a peer or the client). A bop - * should be able to demand those data from the cache and otherwise - * have them be materialized. - * - * @todo only use the values in the map for transient objects, such as a - * hash table which is not backed by the disk. For {@link ByteBuffer}s - * we want to make the references go through the {@link BufferService} - * . For files, through the {@link ResourceManager}. - * - * @todo We need to track the resources in use by the query so they can be - * released when the query terminates. This includes: buffers; joins - * for which there is a chunk of binding sets that are currently being - * executed; downstream joins (they depend on the source joins to - * notify them when they are complete in order to decide their own - * termination condition); local hash tables which are part of a DHT - * (especially when they are persistent); buffers and disk resources - * allocated to N-way merge sorts, etc. - * - * @todo The set of buffers having data which has been accepted for this - * query. - * - * @todo The set of buffers having data which has been generated for this - * query. - */ - private final ConcurrentHashMap<UUID, Object> resourceMap = new ConcurrentHashMap<UUID, Object>(); - - /** * The chunks available for immediate processing. * <p> * Note: This is package private so it will be visible to the @@ -261,7 +226,7 @@ * combined before we execute the operator. For unselective operators, * we are going to run over all the data anyway. */ - final BlockingQueue<BindingSetChunk> chunksIn = new LinkedBlockingDeque<BindingSetChunk>(); + final /*private*/ BlockingQueue<BindingSetChunk> chunksIn = new LinkedBlockingDeque<BindingSetChunk>(); /** * The class executing the query on this node. @@ -601,7 +566,7 @@ final long elapsed = System.currentTimeMillis() - begin; if (log.isTraceEnabled()) log.trace("bopId=" + msg.bopId + ",partitionId=" + msg.partitionId - + ",serviceId=" + queryEngine.getServiceId() + + ",serviceId=" + queryEngine.getServiceUUID() + ", nchunks=" + fanOut + " : runningTaskCount=" + runningTaskCount + ", availableChunkCount=" + availableChunkCount + ", elapsed=" + elapsed); @@ -742,7 +707,7 @@ .getProperty(BindingSetPipelineOp.Annotations.BOP_ID); final IBlockingBuffer<IBindingSet[]> sink = (p == null ? queryBuffer : op.newBuffer()); - // altSink [@todo altSink=null or sink when not specified?] + // altSink (null when not specified). final Integer altSinkId = (Integer) op .getProperty(BindingSetPipelineOp.Annotations.ALT_SINK_REF); if (altSinkId != null && !bopIndex.containsKey(altSinkId)) { @@ -758,7 +723,7 @@ // Hook the FutureTask. final Runnable r = new Runnable() { public void run() { - final UUID serviceId = queryEngine.getServiceId(); + final UUID serviceId = queryEngine.getServiceUUID(); int fanIn = 1; int sinkChunksOut = 0; int altSinkChunksOut = 0; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -27,17 +27,18 @@ package com.bigdata.bop.fed; +import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.bset.ConditionalRoutingOp; import com.bigdata.bop.engine.IQueryClient; +import com.bigdata.bop.engine.IQueryPeer; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.RunningQuery; -import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.solutions.SliceOp; import com.bigdata.journal.IIndexManager; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -45,6 +46,7 @@ import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; +import com.bigdata.service.IDataService; import com.bigdata.service.ManagedResourceService; import com.bigdata.service.ResourceService; @@ -55,50 +57,14 @@ * @version $Id: FederatedQueryEngine.java 3508 2010-09-05 17:02:34Z thompsonbry * $ * - * @todo Modify the {@link FederatedQueryEngine} to actually run a distributed - * query. Since we are in the same JVM, the {@link IBindingSet} chunks can - * be used directly without being marshalled onto {@link ByteBuffer}s and - * transferred over the network. - * <p> - * Distributed query will fail until each {@link FederatedQueryEngine} is - * receiving chunks and running operators against its local - * {@link IIndexManager}. This requires that we map the output chunks for - * an operator over the shards for the next operator, that we send the - * appropriate messages to the query engine peers, that they demand the - * necessary data from their peers, etc. - * <p> - * Once distributed query is running, begin to marshall the chunks onto - * buffers [this might have to be done immediately to get the notification - * protocol working]. + * @todo buffer management for s/o including bindingSet[] movement for the + * pipeline and element[] movement for DHT on access path. * - * @todo buffer management for s/o, including binding sets movement, element - * chunk movement for DHT on access path, and on demand materialization of - * large query resources for large data sets, parallel closure, etc.; - * grouping operators which will run locally (such as a pipeline join plus - * a conditional routing operator) so we do not marshall binding sets - * between operators when they will not cross a network boundary. Also, - * handle mutation, programs and closure operators. - * - * @todo I have not yet figured out how to mark operators to indicate when their - * output should be mapped across shards or handled locally. It would - * appear that this is a concern of their parent in the operator tree. For - * example, the {@link ConditionalRoutingOp} would be applied to transform - * the output of a {@link PipelineJoin} before mapping the output over the - * shards. - * <p> - * The operator themselves could carry this information either as a Java - * method or as an annotation. - * <p> - * This could interact with how we combine {@link RunningQuery#chunksIn}. - * * @todo Override to release buffers associated with chunks buffered for a query * when it terminates (buffers may be for received chunks or chunks which * are awaiting transfer to another node). [This might be handled by a * {@link RunningQuery} override.] * - * @todo Override protocol hooks for moving data around among the - * {@link QueryEngine}s - * * @todo Compressed representations of binding sets with the ability to read * them in place or materialize them onto the java heap. The * representation should be ammenable to processing in C since we want to @@ -167,7 +133,7 @@ } @Override - protected UUID getServiceId() { + public UUID getServiceUUID() { return fed.getServiceUUID(); @@ -190,13 +156,18 @@ return resourceService; } - + + /** + * FIXME Once buffers are ready their data needs to be materialized on this + * node and the chunks queued for processing. + * + * @todo What is the cost of passing the proxy around like this? Should it + * be discovered instead from a registrar? + */ @Override public void bufferReady(IQueryClient clientProxy, InetSocketAddress serviceAddr, long queryId, int bopId) { - // @todo notify peer when a buffer is ready. - } /** @@ -246,4 +217,47 @@ } + /** + * Resolve an {@link IQueryPeer}. + * <p> + * Note: This only resolves the peers running on the {@link IDataService}s. + * It will not resolve a query controller unless an {@link IDataService} is + * being used as the query controller. + * + * @param serviceUUID + * The service {@link UUID}. + * + * @return The proxy for the query peer. + */ + protected IQueryPeer getQueryPeer(final UUID serviceUUID) { + + IQueryPeer proxy = proxyMap.get(serviceUUID); + + if (proxy == null) { + + final IDataService dataService = getFederation().getDataService( + serviceUUID); + + if (dataService == null) + throw new RuntimeException("No such service: " + serviceUUID); + + try { + proxy = dataService.getQueryEngine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + proxyMap.put(serviceUUID, proxy); + + } + + return proxy; + + } + + /** + * Cache for {@link #getQueryPeer(UUID)}. + */ + private final ConcurrentHashMap<UUID, IQueryPeer> proxyMap = new ConcurrentHashMap<UUID, IQueryPeer>(); + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -27,19 +27,39 @@ package com.bigdata.bop.fed; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; +import java.rmi.RemoteException; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IShardwisePipelineOp; +import com.bigdata.bop.engine.BindingSetChunk; import com.bigdata.bop.engine.IQueryClient; +import com.bigdata.bop.engine.IQueryPeer; +import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.RunningQuery; +import com.bigdata.io.DirectBufferPoolAllocator; +import com.bigdata.io.SerializerUtil; +import com.bigdata.io.DirectBufferPoolAllocator.IAllocation; +import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.mdi.PartitionLocator; +import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IBuffer; +import com.bigdata.resources.ResourceManager; import com.bigdata.service.IBigdataFederation; -import com.bigdata.service.jini.master.IAsynchronousClientTask; +import com.bigdata.service.ManagedResourceService; +import com.bigdata.service.ResourceService; import com.bigdata.striterator.IKeyOrder; /** @@ -69,6 +89,59 @@ * */ public class FederatedRunningQuery extends RunningQuery { + /** + * The {@link UUID} of the service which is the {@link IQueryClient} running + * this query. + */ + private final UUID queryControllerUUID; + + /** + * A map associating resources with running queries. When a query halts, the + * resources listed in its resource map are released. Resources can include + * {@link ByteBuffer}s backing either incoming or outgoing + * {@link BindingSetChunk}s, temporary files associated with the query, hash + * tables, etc. + * + * @todo This map will eventually need to be moved into {@link RunningQuery} + * in order to support temporary graphs or other disk-backed resources + * associated with the evaluation of a query against a standalone + * database. However, the main use case are the resources associated + * with query against an {@link IBigdataFederation} which it why it is + * being developed in the {@link FederatedRunningQuery} class. + * + * @todo Cache any resources materialized for the query on this node (e.g., + * temporary graphs materialized from a peer or the client). A bop + * should be able to demand those data from the cache and otherwise + * have them be materialized. + * + * @todo Only use the values in the map for transient objects, such as a + * hash table which is not backed by the disk. For {@link ByteBuffer}s + * we want to make the references go through the {@link ResourceService} + * . For files, through the {@link ResourceManager}. + * + * @todo We need to track the resources in use by the query so they can be + * released when the query terminates. This includes: buffers; joins + * for which there is a chunk of binding sets that are currently being + * executed; downstream joins (they depend on the source joins to + * notify them when they are complete in order to decide their own + * termination condition); local hash tables which are part of a DHT + * (especially when they are persistent); buffers and disk resources + * allocated to N-way merge sorts, etc. + * + * @todo The set of buffers having data which has been accepted for this + * query. + * + * @todo The set of buffers having data which has been generated for this + * query. + */ + private final ConcurrentHashMap<UUID, Object> resourceMap = new ConcurrentHashMap<UUID, Object>(); + + /** + * @todo Maintain multiple allocation contexts. Some can be query wide. + * Others might be specific to a serviceId and/or sinkId. + */ + private final ConcurrentHashMap<Object/* key */, IAllocationContext> allocationContexts = new ConcurrentHashMap<Object, IAllocationContext>(); + public FederatedRunningQuery(FederatedQueryEngine queryEngine, long queryId, long readTimestamp, long writeTimestamp, long begin, long timeout, boolean controller, IQueryClient clientProxy, @@ -76,6 +149,17 @@ super(queryEngine, queryId, readTimestamp, writeTimestamp, begin, timeout, controller, clientProxy, query, queryBuffer); + + /* + * Note: getServiceUUID() should be a smart proxy method and thus not + * actually do RMI here. However, it is resolved eagerly and cached + * anyway. + */ + try { + this.queryControllerUUID = getQueryController().getServiceUUID(); + } catch (RemoteException e) { + throw new RuntimeException(e); + } } @@ -87,38 +171,148 @@ } /** + * The allocation context key groups together allocations onto the same + * direct {@link ByteBuffer}s. There are different implementations depending + * on how it makes sense to group data data for a given query. + */ + static abstract private class AllocationContextKey { + + /** + * Must be overridden. The queryId must be a part of each hashCode() in + * order to ensure that the hash codes are well distributed across + * different queries on the same node. + */ + @Override + abstract public int hashCode(); + + /** + * Must be overridden. + */ + @Override + abstract public boolean equals(Object o); + + } + + /** + * An allocation context which is shared by all operators running in the + * same query. + */ + static private class QueryContext extends AllocationContextKey { + private final Long queryId; + + QueryContext(final Long queryId) { + this.queryId = Long.valueOf(queryId); + } + + public int hashCode() { + return queryId.hashCode(); + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof QueryContext)) + return false; + if (!queryId.equals(((QueryContext) o).queryId)) + return false; + return true; + } + } + + /** + * An allocation context which is shared by all operators running in the + * same query which target the same service. + */ + static private class ServiceContext extends AllocationContextKey { + private final Long queryId; + + private final UUID serviceUUID; + + ServiceContext(final Long queryId, final UUID serviceUUID) { + this.queryId = queryId; + this.serviceUUID = serviceUUID; + } + + public int hashCode() { + return queryId.hashCode() * 31 + serviceUUID.hashCode(); + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof ServiceContext)) + return false; + if (!queryId.equals(((ServiceContext) o).queryId)) + return false; + if (!serviceUUID.equals(((ServiceContext) o).serviceUUID)) + return false; + return true; + } + } + + /** + * An allocation context which is shared by all operators running in the + * same query which target the same shard (the same shard implies the same + * service, at least until we have HA with shard affinity). + */ + static private class ShardContext extends AllocationContextKey { + + private final Long queryId; + + private final int partitionId; + + ShardContext(final Long queryId, final int partitionId) { + this.queryId = queryId; + this.partitionId = partitionId; + } + + public int hashCode() { + return queryId.hashCode() * 31 + partitionId; + } + + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof ShardContext)) + return false; + if (!queryId.equals(((ShardContext) o).queryId)) + return false; + if (partitionId != partitionId) + return false; + return true; + } + } + + /** + * Return the {@link IAllocationContext} for the given key. + * + * @param key + * The key. + * + * @return The allocation context. + */ + private IAllocationContext getAllocationContext( + final AllocationContextKey key) { + + return getQueryEngine().getResourceService().getAllocator() + .getAllocationContext(key); + + } + + /** * {@inheritDoc} - * - * @return The #of chunks made available for consumption by the sink. This - * will always be ONE (1) for scale-up. For scale-out, there will be - * one chunk per index partition over which the intermediate results - * were mapped. - * - * FIXME SCALEOUT: This is where we need to map the binding sets - * over the shards for the target operator. Once they are mapped, - * write the binding sets onto an NIO buffer for the target node and - * then send an RMI message to the node telling it that there is a - * chunk available for the given (queryId,bopId,partitionId). - * <p> - * For selective queries in s/o, first format the data onto a list - * of byte[]s, one per target shard/node. Then, using a lock, obtain - * a ByteBuffer if there is none associated with the query yet. - * Otherwise, using the same lock, obtain a slice onto that - * ByteBuffer and put as much of the byte[] as will fit, continuing - * onto a newly recruited ByteBuffer if necessary. Release the lock - * and notify the target of the ByteBuffer slice (buffer#, off, - * len). Consider pushing the data proactively for selective - * queries. - * <p> - * For unselective queries in s/o, proceed as above but we need to - * get the data off the heap and onto the {@link ByteBuffer}s - * quickly (incrementally) and we want the consumers to impose flow - * control on the producers to bound the memory demand (this needs - * to be coordinated carefully to avoid deadlocks). Typically, large - * result sets should result in multiple passes over the consumer's - * shard rather than writing the intermediate results onto the disk. - * - * */ + * <p> + * This method is overridden to organize the output from one operator so in + * order to make it available to another operator running on a different + * node. There are several cases which have to be handled and which are + * identified by the {@link BOp#getEvaluationContext()}. In addition, we + * need to handle low latency and high data volume queries somewhat + * differently. Except for {@link BOpEvaluationContext#ANY}, all of these + * cases wind up writing the intermediate results onto a direct + * {@link ByteBuffer} and notifying the receiving service that there are + * intermediate results which it can pull when it is ready to process them. + * This pattern allows the receiver to impose flow control on the producer. + */ @Override protected <E> int add(final int sinkId, final IBlockingBuffer<IBindingSet[]> sink) { @@ -132,27 +326,31 @@ throw new IllegalArgumentException(); switch (bop.getEvaluationContext()) { - case ANY: + case ANY: { return super.add(sinkId, sink); + } case HASHED: { /* - * FIXME The sink self describes the nodes over which the - * binding sets will be mapped and the hash function to be applied - * so we look up those metadata and apply them to distributed the - * binding sets across the nodes. + * @todo The sink must use annotations to describe the nodes over + * which the binding sets will be mapped and the hash function to be + * applied. Look up those annotations and apply them to distribute + * the binding sets across the nodes. */ throw new UnsupportedOperationException(); } case SHARDED: { /* - * FIXME The sink must read or write on a shard so we map the - * binding sets across the access path for the sink. + * The sink must read or write on a shard so we map the binding sets + * across the access path for the sink. * * @todo For a pipeline join, the predicate is the right hand * operator of the sink. This might be true for INSERT and DELETE - * operators as well. + * operators as well. [It is not, but make it so and document this + * pattern or have a common interface method which returns the + * IPredicate regardless of whether it is an operand or an + * annotation.] * - * @todo IKeyOrder tells us which index will be used and should be + * Note: IKeyOrder tells us which index will be used and should be * set on the predicate by the join optimizer. * * @todo Use the read or write timestamp depending on whether the @@ -163,28 +361,21 @@ * data contained in the sink (in fact, we should just process the * sink data in place). */ - final IPredicate<E> pred = null; // @todo - final IKeyOrder<E> keyOrder = null; // @todo - final long timestamp = getReadTimestamp(); // @todo + @SuppressWarnings("unchecked") + final IPredicate<E> pred = ((IShardwisePipelineOp) bop).getPredicate(); + final IKeyOrder<E> keyOrder = pred.getKeyOrder(); + final long timestamp = getReadTimestamp(); // @todo read vs write timestamp. final int capacity = 1000;// @todo + final int capacity2 = 1000;// @todo final MapBindingSetsOverShardsBuffer<IBindingSet, E> mapper = new MapBindingSetsOverShardsBuffer<IBindingSet, E>( getFederation(), pred, keyOrder, timestamp, capacity) { - - @Override - IBuffer<IBindingSet> newBuffer(PartitionLocator locator) { - // TODO Auto-generated method stub - return null; - } - + @Override + IBuffer<IBindingSet> newBuffer(PartitionLocator locator) { + return new BlockingBuffer<IBindingSet>(capacity2); + } }; /* * Map the binding sets over shards. - * - * FIXME The buffers created above need to become associated with - * this query as resources of the query. Once we are done mapping - * the binding sets over the shards, the target node for each buffer - * needs to be set an RMI message to let it know that there is a - * chunk available for it for the target operator. */ { final IAsynchronousIterator<IBindingSet[]> itr = sink @@ -201,18 +392,52 @@ sink.close(); } } + /* + * The allocation context. + * + * @todo use (queryId, serviceId, sinkId) when the target bop is + * high volume operator (this requires annotation by the query + * planner of the operator tree). + */ + final IAllocationContext allocationContext = getAllocationContext(new QueryContext( + getQueryId())); + + /* + * Generate the output chunks and notify the receivers. + * + * @todo This stage should probably be integrated with the stage + * which maps the binding sets over the shards (immediately above) + * to minimize copying or visiting in the data. + */ + for (Map.Entry<PartitionLocator, IBuffer<IBindingSet>> e : mapper + .getSinks().entrySet()) { + + final PartitionLocator locator = e.getKey(); + + final IBuffer<IBindingSet> shardSink = e.getValue(); + + // FIXME harmonize IBuffer<IBindingSet> vs IBuffer<IBindingSet[]> +// sendOutputChunkReadyMessage(newOutputChunk(locator +// .getDataServiceUUID(), sinkId, allocationContext, +// shardSink)); + throw new UnsupportedOperationException(); + } - throw new UnsupportedOperationException(); } case CONTROLLER: { - final IQueryClient clientProxy = getQueryController(); + /* + * Format the binding sets onto a ByteBuffer and publish that + * ByteBuffer as a manager resource for the query and notify the + * query controller that data is available for it. + */ -// getQueryEngine().getResourceService().port; -// -// clientProxy.bufferReady(clientProxy, serviceAddr, getQueryId(), sinkId); + final IAllocationContext allocationContext = getAllocationContext(new QueryContext( + getQueryId())); - throw new UnsupportedOperationException(); + sendOutputChunkReadyMessage(newOutputChunk(queryControllerUUID, + sinkId, allocationContext, sink)); + } default: throw new AssertionError(bop.getEvaluationContext()); @@ -220,4 +445,193 @@ } + /** + * Create an {@link OutputChunk} from some intermediate results. + * + * @param serviceUUID + * The {@link UUID} of the {@link IQueryPeer} who is the + * recipient. + * @param sinkId + * The identifier of the target {@link BOp}. + * @param allocationContext + * The allocation context within which the {@link ByteBuffer}s + * will be managed for this {@link OutputChunk}. + * @param source + * The binding sets to be formatted onto a buffer. + * + * @return The {@link OutputChunk}. + */ + protected OutputChunk newOutputChunk( + final UUID serviceUUID, + final int sinkId, + final IAllocationContext allocationContext, + final IBlockingBuffer<IBindingSet[]> source) { + + if (serviceUUID == null) + throw new IllegalArgumentException(); + + if (allocationContext == null) + throw new IllegalArgumentException(); + + if (source == null) + throw new IllegalArgumentException(); + + int nbytes = 0; + + final List<IAllocation> allocations = new LinkedList<IAllocation>(); + + final IAsynchronousIterator<IBindingSet[]> itr = source.iterator(); + + try { + + while (itr.hasNext()) { + + // Next chunk to be serialized. + final IBindingSet[] chunk = itr.next(); + + // serialize the chunk of binding sets. + final byte[] data = SerializerUtil.serialize(chunk); + + // track size of the allocations. + nbytes += data.length; + + // allocate enough space for those data. + final IAllocation[] tmp; + try { + tmp = allocationContext.alloc(data.length); + } catch (InterruptedException ex) { + throw new RuntimeException(ex); + } + + // copy the data into the allocations. + DirectBufferPoolAllocator.put(data, tmp); + + // append the new allocations. + allocations.addAll(Arrays.asList(tmp)); + + } + + } finally { + + itr.close(); + + } + + return new OutputChunk(getQueryId(), serviceUUID, sinkId, nbytes, + allocations); + + } + + protected IQueryPeer getQueryPeer(final UUID serviceUUID) { + + if (serviceUUID == null) + throw new IllegalArgumentException(); + + final IQueryPeer queryPeer; + + if (serviceUUID.equals(queryControllerUUID)) { + + // The target is the query controller. + queryPeer = getQueryController(); + + } else { + + // The target is some data service. + queryPeer = getQueryEngine().getQueryPeer(serviceUUID); + + } + + return queryPeer; + + } + + /** + * Notify a remote {@link IQueryPeer} that data is available for it. + * + * @todo If the target for the {@link OutputChunk} is this node then just + * drop it onto the {@link QueryEngine}. + * + * @todo Report the #of bytes available with this message. However, first + * figure out if that if the #of bytes in this {@link OutputChunk} or + * across all {@link OutputChunk}s available for the target service + * and sink. + * + * @todo Consider a fast path with inline RMI based transfer for small sets + * of data. We might just serialize to a byte[] and send that directly + * using a different message to notify the {@link IQueryPeer}. + */ + protected void sendOutputChunkReadyMessage(final OutputChunk outputChunk) { + + try { + + // The peer to be notified. + final IQueryPeer peerProxy = getQueryPeer(outputChunk.serviceId); + + // The Internet address and port where the peer can read the data + // from this node. + final InetSocketAddress serviceAddr = getQueryEngine() + .getResourceService().getAddr(); + + peerProxy.bufferReady(getQueryController(), serviceAddr, + getQueryId(), outputChunk.sinkId); + + } catch (RemoteException e) { + + throw new RuntimeException(e); + + } + + } + + /** + * A chunk of outputs. + * + * @todo We probably need to use the {@link DirectBufferPoolAllocator} to + * receive the chunks within the {@link ManagedResourceService} as + * well. + * + * @todo Release the allocations associated with each output chunk once it + * is received by the remote service. + * <p> + * When the query terminates all output chunks targeting any node + * EXCEPT the query controller should be immediately dropped. + * <p> + * If there is an error during query evaluation, then the output + * chunks for the query controller should be immediately dropped. + * <p> + * If the iterator draining the results on the query controller is + * closed, then the output chunks for the query controller should be + * immediately dropped. + * + * @todo There are a few things where the resource must be made available to + * more than one operator evaluation phase. The best examples are + * temporary graphs for parallel closure and large collections of + * graphIds for SPARQL "NAMED FROM DATA SET" extensions. + */ + private static class OutputChunk { + + final long queryId; + + final UUID serviceId; + + final int sinkId; + + final int nbytes; + + final List<IAllocation> allocations; + + public OutputChunk(final long queryId, final UUID serviceId, + final int sinkId, final int nbytes, + final List<IAllocation> allocations) { + + this.queryId = queryId; + this.serviceId = serviceId; + this.sinkId = sinkId; + this.nbytes = nbytes; + this.allocations = allocations; + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -50,6 +50,7 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IShardwisePipelineOp; import com.bigdata.bop.IVariable; import com.bigdata.bop.engine.BOpStats; import com.bigdata.btree.BytesUtil; @@ -93,7 +94,8 @@ * @todo Break the star join logic out into its own join operator and test * suite. */ -public class PipelineJoin extends BindingSetPipelineOp { +public class PipelineJoin<E> extends BindingSetPipelineOp implements + IShardwisePipelineOp<E> { static private final Logger log = Logger.getLogger(PipelineJoin.class); @@ -256,22 +258,36 @@ } - protected BindingSetPipelineOp left() { + /** + * The left hand operator, which is the previous join in the pipeline join + * path. + */ + public BindingSetPipelineOp left() { return (BindingSetPipelineOp) get(0); } - protected IPredicate<?> right() { + /** + * The right hand operator, which is the {@link IPredicate}. + */ + @SuppressWarnings("unchecked") + public IPredicate<E> right() { - return (IPredicate<?>) get(1); + return (IPredicate<E>) get(1); } + + public IPredicate<E> getPredicate() { + + return right(); + + } /** * @see Annotations#CONSTRAINTS */ - protected IConstraint[] constraints() { + public IConstraint[] constraints() { return getProperty(Annotations.CONSTRAINTS, null/* defaultValue */); @@ -280,7 +296,7 @@ /** * @see Annotations#OPTIONAL */ - protected boolean isOptional() { + public boolean isOptional() { return getProperty(Annotations.OPTIONAL, Annotations.DEFAULT_OPTIONAL); @@ -289,7 +305,7 @@ /** * @see Annotations#MAX_PARALLEL */ - protected int getMaxParallel() { + public int getMaxParallel() { return getProperty(Annotations.MAX_PARALLEL, Annotations.DEFAULT_MAX_PARALLEL); @@ -298,7 +314,7 @@ /** * @see Annotations#SELECT */ - protected IVariable<?>[] variablesToKeep() { + public IVariable<?>[] variablesToKeep() { return getProperty(Annotations.SELECT, null/* defaultValue */); @@ -325,7 +341,7 @@ /** * The join that is being executed. */ - final private PipelineJoin joinOp; + final private PipelineJoin<?> joinOp; /** * The constraint (if any) specified for the join operator. @@ -450,7 +466,7 @@ * @param context */ public JoinTask(// - final PipelineJoin joinOp,// + final PipelineJoin<?> joinOp,// final BOpContext<IBindingSet> context ) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-08 20:11:49 UTC (rev 3523) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -27,8 +27,6 @@ package com.bigdata.bop.mutation; -import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; @@ -39,7 +37,7 @@ import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.IShardwisePipelineOp; import com.bigdata.bop.engine.BOpStats; import com.bigdata.btree.ILocalBTreeView; import com.bigdata.btree.ITupleSerializer; @@ -59,7 +57,8 @@ * @param <E> * The generic type of the elements written onto the index. */ -public class InsertOp<E> extends BindingSetPipelineOp { +public class InsertOp<E> extends BindingSetPipelineOp implements + IShardwisePipelineOp<E> { /** * @@ -69,12 +68,12 @@ public interface Annotations extends BindingSetPipelineOp.Annotations { /** - * An ordered {@link IVariableOrConstant}[]. Elements will be created - * using the binding sets which flow through the operator and - * {@link IRelation#newElement(java.util.List, IBindingSet)}. + * An {@link IPredicate}. The {@link IPredicate#asBound(IBindingSet)} + * predicate will be used to create the elements to be inserted into + * the relation. * - * @todo This should be an {@link IPredicate} and should be the right - * hand operand just like for a JOIN. + * @see IPredicate#asBound(IBindingSet) + * @see IRelation#newElement(java.util.List, IBindingSet) */ String SELECTED = InsertOp.class.getName() + ".selected"; @@ -116,9 +115,8 @@ /** * @see Annotations#SELECTED */ - public IVariableOrConstant<?>[] getSelected() { + public IPredicate<E> getPredicate() { -// return (IVariableOrConstant<?>[]) getProperty(Annotations.SELECTED); return getRequiredProperty(Annotations.SELECTED); } @@ -164,7 +162,7 @@ */ private final IBlockingBuffer<IBindingSet[]> sink; - private List<IVariableOrConstant<?>> selected; + private IPredicate<E> predicate; private final IRelation<E> relation; @@ -181,7 +179,7 @@ sink = context.getSink(); - selected = Arrays.asList(op.getSelected()); + predicate = op.getPredicate(); relation = context.getWriteRelation(op.getRelation()); @@ -229,7 +227,7 @@ final IBindingSet bset = chunk[i]; - final E e = relation.newElement(selected, bset); + final E e = relation.newElement(predicate.args(), bset); final byte[] key = keyOrder.getKey(keyBuilder, e); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/io/DirectBufferPoolAllocator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/io/DirectBufferPoolAllocator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/io/DirectBufferPoolAllocator.java 2010-09-08 20:52:07 UTC (rev 3524) @@ -0,0 +1,653 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 8, 2010 + */ + +package com.bigdata.io; + +import java.nio.BufferOverflowException; +import java.nio.ByteBuffer; +import java.util.LinkedList; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; + +import com.bigdata.service.ResourceService; +import com.bigdata.util.concurrent.Haltable; + +/** + * An allocator for {@link ByteBuffer} slices backed by direct + * {@link ByteBuffer}s allocated against a {@link DirectBufferPool}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + * + * @todo Make the type of the identifier for the {@link IAllocation} generic + * using a factory pattern (we need {@link UUID} for scale-out, but the + * class could be reused for other purposes as well). [The allocation + * context identifier should continue to be an application specified + * object.] + */ +public class DirectBufferPoolAllocator { + + /** + * The pool from which the direct {@link ByteBuffer}s are allocated. + */ + private final DirectBufferPool directBufferPool; + + /** + * The set of allocation contexts. + */ + private final ConcurrentHashMap<Object/* key */, AllocationContext> allocationContexts = new ConcurrentHashMap<Object, AllocationContext>(); + + /** + * The set of {@link IAllocation} outstanding against the + * {@link #directBufferPool}. + */ + private final ConcurrentHashMap<UUID, Allocation> allocations = new ConcurrentHashMap<UUID, Allocation>(); + + /** + * @todo Maybe replace this with a private {@link Haltable} (or extend + * {@link Haltable}) so we can test {@link Haltable#halted()} in + * critical methods? If we expose the {@link Haltable} then the + * {@link ResourceService} can also check it to see whether all + * allocations have been invalidated. However, that will not help us + * to invalidate a specific {@link IAllocationContext}. For that + * purpose we would need to do pretty much the same thing recursively. + */ + private final AtomicBoolean open = new AtomicBoolean(true); + + /** + * + * @param pool + * The pool from which the direct {@link ByteBuffer}s are + * allocated. + */ + public DirectBufferPoolAllocator(final DirectBufferPool pool) { + + this.directBufferPool = pool; + + } + + /** + * Extended to {@link #close()} the allocator. + */ + @Override + protected void finalize() throws Throwable { + + close(); + + super.finalize(); + + } + + /** + * Releases all {@link AllocationContext}s and all direct {@link ByteBuffer} + * s which they are using. + */ + public void close() { + + if (open.compareAndSet(true/* expect */, false/* update */)) { + + for (AllocationContext c : allocationContexts.values()) { + + c.release(); + + } + + } + + } + + /** + * The maximum #of bytes in a single {@link IAllocation}. + */ + public int getMaxSlotSize() { + + return directBufferPool.getBufferCapacity(); + + } + + /** + * Return an allocation context for the key. If none exists for that key, + * then one is atomically created and returned. + * + * @param key + * A key which uniquely identifies that context. The key will be + * inserted into a hash table and therefore must have appropriate + * hashCode() and equals() methods. + * + * @return The allocation context. + */ + public IAllocationContext getAllocationContext(final Object key) { + + AllocationContext c = allocationContexts.get(key); + + if (c == null) { + + final AllocationContext t = allocationContexts.putIfAbsent(key, + c = new AllocationContext(key)); + + if (t != null) { + + // lost the race to another thread. + c = t; + + } + + } + + return c; + + } + + /** + * Return the allocation associated with that id. + * + * @param id + * The allocation identifier. + * + * @return The allocation -or- <code>null</code> if there is no such + * allocation. + */ + public IAllocation getAllocation(final UUID id) { + + return allocations.get(id); + + } + +// /** +// * A direct {@link ByteBuffer} allocated from the {@link #directBufferPool} +// * together with the identifier assigned to that {@link ByteBuffer} (we can +// * not directly insert {@link ByteBuffer}s into the keys of a hash map since +// * their hash code is a function of their content). +// */ +// private class DirectBufferAllocation { +// +// private final Long id; +// +// private final ByteBuffer directBuffer; +// +// public DirectBufferAllocation(final Long id, +// final ByteBuffer directBuffer) { +// +// if (id == null) +// throw new IllegalArgumentException(); +// +// if (directBuffer == null) +// throw new IllegalArgumentException(); +// +// this.id = id; +// +// this.directBuffer = directBuffer; +// +// } +// +// } + + /** + * An allocation context links some application specified key with a list + * of direct {@link ByteBuffer}s on which allocations have been made by + * the application. + */ + public interface IAllocationContext { + + /** + * Allocate a series of {@link ByteBuffer} slices on which the + * application may write data. The application is encouraged to maintain + * the order of the allocations in the array in order to preserve the + * ordering of data written onto those allocation. + * + * @param nbytes + * The #of bytes required. + * + * @return The {@link UUID}s of those allocations. + * + * @throws InterruptedException + */ + IAllocation[] alloc(int nbytes) throws InterruptedException; + + /** + * Release all allocations made against this allocation context. + */ + void release(); + + } + + /** + * An allocation against a direct {@link ByteBuffer}. + */ + public interface IAllocation { + + /** The allocation identifier. */ + public UUID getId(); + + /** + * The allocated {@link ByteBuffer#slice()}. + */ + public ByteBuffer getSlice(); + + /** + * Release this allocation. + * <p> + * Note: The implementation is encouraged to release the associated + * direct {@link ByteBuffer} if there are no remaining allocations + * against it and MAY made the slice of the buffer available for + * reallocation. + * <p> + * Note: An {@link InterruptedException} MAY be thrown. This allows us + * to handle cases where a concurrent process (such as a query) was + * halted and its component threads were interrupted. By looking for the + * interrupt, we can avoid attempts to release an allocation in some + * thread where the entire {@link IAllocationContext} has already been + * released by another thread. + * + * @throws InterruptedException + */ + public void release() throws InterruptedException; + + } + + /** + * An allocation against a direct {@link ByteBuffer}. + */ + // Note: package private for the unit tests. + /*private*/ class Allocation implements IAll... [truncated message content] |
From: <tho...@us...> - 2010-09-14 14:46:10
|
Revision: 3543 http://bigdata.svn.sourceforge.net/bigdata/?rev=3543&view=rev Author: thompsonbry Date: 2010-09-14 14:46:01 +0000 (Tue, 14 Sep 2010) Log Message: ----------- Refactored the RunState and ChunkTask out of the RunningQuery. Working on the federation based unit test setup. We can not use the EmbeddedFederation for this because the serviceId is shared by both data service instances. Unfortunately, we can no longer easily use the JiniServiceHelper either due to things like the jini group setup. I am going to tackle this next on a workstation with more RAM so I can attach to a running federation. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BindingSetChunk.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/NIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/R.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestMapBindingSetsOverShards.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/jini/start/config/JiniCoreServicesConfiguration.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/jini/start/process/JiniCoreServicesProcessHelper.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkAccessor.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/service/jini/util/ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/service/jini/util/JiniCoreServicesHelper.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/service/jini/util/JiniServicesHelper.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniCoreServicesHelper.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/java/com/bigdata/service/jini/util/JiniServicesHelper.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BindingSetChunk.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BindingSetChunk.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BindingSetChunk.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -3,7 +3,6 @@ import java.io.Serializable; import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; import com.bigdata.bop.fed.FederatedRunningQuery; import com.bigdata.relation.accesspath.IAsynchronousIterator; @@ -94,8 +93,16 @@ // NOP } - public IAsynchronousIterator<E[]> iterator() { - return source; + public IChunkAccessor<E> getChunkAccessor() { + return new ChunkAccessor(); } + private class ChunkAccessor implements IChunkAccessor<E> { + + public IAsynchronousIterator<E[]> iterator() { + return source; + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -18,68 +18,66 @@ private static final long serialVersionUID = 1L; /** The identifier of the query. */ - final long queryId; + final public long queryId; /** The identifier of the operator. */ - final int bopId; + final public int bopId; /** - * The index partition identifier against which the operator was - * executing. + * The index partition identifier against which the operator was executing. */ - final int partitionId; + final public int partitionId; /** * The identifier of the service on which the operator was executing. */ - final UUID serviceId; + final public UUID serviceId; /** * * The cause and <code>null</code> if the operator halted normally. */ - final Throwable cause; + final public Throwable cause; /** - * The operator identifier for the primary sink -or- <code>null</code> - * if there is no primary sink (for example, if this is the last - * operator in the pipeline). + * The operator identifier for the primary sink -or- <code>null</code> if + * there is no primary sink (for example, if this is the last operator in + * the pipeline). */ - final Integer sinkId; + final public Integer sinkId; /** - * The number of the {@link BindingSetChunk}s that were output for the - * primary sink. (This information is used for the atomic termination - * decision.) + * The number of the {@link IChunkMessage}s that were output for the primary + * sink. (This information is used for the atomic termination decision.) * <p> * For a given downstream operator this is ONE (1) for scale-up. For - * scale-out, this is one per index partition over which the - * intermediate results were mapped. + * scale-out, this is one per index partition over which the intermediate + * results were mapped. */ - final int sinkChunksOut; + final public int sinkChunksOut; /** - * The operator identifier for the alternative sink -or- - * <code>null</code> if there is no alternative sink. + * The operator identifier for the alternative sink -or- <code>null</code> + * if there is no alternative sink. */ - final Integer altSinkId; + final public Integer altSinkId; /** - * The number of the {@link BindingSetChunk}s that were output for the - * alternative sink. (This information is used for the atomic - * termination decision.) + * The number of the {@link IChunkMessage}s that were output for the + * alternative sink. (This information is used for the atomic termination + * decision.) * <p> * For a given downstream operator this is ONE (1) for scale-up. For - * scale-out, this is one per index partition over which the - * intermediate results were mapped. It is zero if there was no - * alternative sink for the operator. + * scale-out, this is one per index partition over which the intermediate + * results were mapped. It is zero if there was no alternative sink for the + * operator. */ - final int altSinkChunksOut; + final public int altSinkChunksOut; /** - * The statistics for the execution of the bop against the partition on - * the service. + * The statistics for the execution of the bop against the partition on the + * service. */ - final BOpStats taskStats; + final public BOpStats taskStats; /** * @param queryId @@ -88,19 +86,18 @@ * The operator whose execution phase has terminated for a * specific index partition and input chunk. * @param partitionId - * The index partition against which the operator was - * executed. + * The index partition against which the operator was executed. * @param serviceId * The node which executed the operator. * @param cause * <code>null</code> unless execution halted abnormally. * @param chunksOut - * A map reporting the #of binding set chunks which were - * output for each downstream operator for which at least one - * chunk of output was produced. + * A map reporting the #of binding set chunks which were output + * for each downstream operator for which at least one chunk of + * output was produced. * @param taskStats - * The statistics for the execution of that bop on that shard - * and service. + * The statistics for the execution of that bop on that shard and + * service. */ public HaltOpMessage( // @@ -110,17 +107,6 @@ final Integer altSinkId, final int altSinkChunksOut,// final BOpStats taskStats) { - if (altSinkId != null && sinkId == null) { - // The primary sink must be defined if the altSink is defined. - throw new IllegalArgumentException(); - } - - if (sinkId != null && altSinkId != null - && sinkId.intValue() == altSinkId.intValue()) { - // The primary and alternative sink may not be the same operator. - throw new IllegalArgumentException(); - } - this.queryId = queryId; this.bopId = bopId; this.partitionId = partitionId; @@ -132,4 +118,5 @@ this.altSinkChunksOut = altSinkChunksOut; this.taskStats = taskStats; } -} \ No newline at end of file + +} Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkAccessor.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkAccessor.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkAccessor.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -0,0 +1,96 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 13, 2010 + */ + +package com.bigdata.bop.engine; + +import java.nio.ByteBuffer; +import java.util.concurrent.BlockingQueue; + +import com.bigdata.bop.IBindingSet; +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.striterator.IChunkedIterator; + +/** + * API providing a variety of ways to access chunks of data (data are typically + * elements or binding sets). + * + * @todo Expose an {@link IChunkedIterator}, which handles both element at a + * time and chunk at a time. + * + * @todo Expose a mechanism to visit the direct {@link ByteBuffer} slices in + * which the data are stored. For an operator which executes on a GPU, we + * want to transfer the data from the direct {@link ByteBuffer} in which + * it was received into a direct {@link ByteBuffer} which is a slice onto + * its VRAM. (And obviously we need to do the reverse with the outputs of + * a GPU operator). + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IChunkAccessor<E> { + + /** + * Visit the binding sets in the chunk. + * + * @deprecated We do not need to use {@link IAsynchronousIterator} any more. + * This could be much more flexible and should be harmonized to + * support high volume operators, GPU operators, etc. probably + * the right thing to do is introduce another interface here + * with a getChunk():IChunk where IChunk let's you access the + * chunks data in different ways (and chunks can be both + * {@link IBindingSet}[]s and element[]s so we might need to + * raise that into the interfaces and/or generics as well). + * + * @todo It is likely that we can convert to the use of + * {@link BlockingQueue} instead of {@link BlockingBuffer} in the + * operators and then handle the logic for combining chunks inside of + * the {@link QueryEngine}. E.g., by scanning this list for chunks for + * the same bopId and combining them logically into a single chunk. + * <p> + * For scale-out, chunk combination will naturally occur when the node + * on which the operator will run requests the {@link ByteBuffer}s + * from the source nodes. Those will get wrapped up logically into a + * source for processing. For selective operators, those chunks can be + * combined before we execute the operator. For unselective operators, + * we are going to run over all the data anyway. + */ + IAsynchronousIterator<E[]> iterator(); + +// /** +// * Chunked iterator pattern. The iterator may be used for element at a time +// * processing, but the underlying iterator operators in chunks. The size of +// * the chunks depends originally on the data producer, but smaller chunks +// * may be automatically combined into larger chunks both during production +// * and when data are buffered, whether to get them off of the heap or to +// * transfer them among nodes. +// * +// * @return +// */ +// IChunkedIterator<E> chunkedIterator(); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkAccessor.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -57,35 +57,10 @@ * Discard the materialized data. */ void release(); - + /** - * Visit the binding sets in the chunk. - * - * @todo we do not need to use {@link IAsynchronousIterator} any more. This - * could be much more flexible and should be harmonized to support - * high volume operators, GPU operators, etc. probably the right thing - * to do is introduce another interface here with a getChunk():IChunk - * where IChunk let's you access the chunks data in different ways - * (and chunks can be both {@link IBindingSet}[]s and element[]s so we - * might need to raise that into the interfaces and/or generics as - * well). - * - * @todo It is likely that we can convert to the use of - * {@link BlockingQueue} instead of {@link BlockingBuffer} in the - * operators and then handle the logic for combining chunks inside of - * the {@link QueryEngine}. E.g., by scanning this list for chunks for - * the same bopId and combining them logically into a single chunk. - * <p> - * For scale-out, chunk combination will naturally occur when the node - * on which the operator will run requests the {@link ByteBuffer}s - * from the source nodes. Those will get wrapped up logically into a - * source for processing. For selective operators, those chunks can be - * combined before we execute the operator. For unselective operators, - * we are going to run over all the data anyway. - * - * @throws IllegalStateException - * if the payload is not materialized. + * Return an interface which may be used to access the chunk's data. */ - IAsynchronousIterator<E[]> iterator(); + IChunkAccessor<E> getChunkAccessor(); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -2,21 +2,24 @@ import java.rmi.RemoteException; +import com.bigdata.bop.BindingSetPipelineOp; + /** * Interface for a client executing queries (the query controller). */ public interface IQueryClient extends IQueryPeer { -// /** -// * Return the query. -// * -// * @param queryId -// * The query identifier. -// * @return The query. -// * -// * @throws RemoteException -// */ -// public BOp getQuery(long queryId) throws RemoteException; + /** + * Return the query. + * + * @param queryId + * The query identifier. + * @return The query. + * + * @throws IllegalArgumentException + * if there is no such query. + */ + public BindingSetPipelineOp getQuery(long queryId) throws RemoteException; /** * Notify the client that execution has started for some query, operator, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -45,7 +45,6 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; import com.bigdata.bop.bset.Union; -import com.bigdata.bop.fed.FederatedQueryEngine; import com.bigdata.btree.BTree; import com.bigdata.btree.IndexSegment; import com.bigdata.btree.view.FusedView; @@ -54,7 +53,6 @@ import com.bigdata.rdf.spo.SPORelation; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.Program; @@ -413,7 +411,7 @@ /** * The currently executing queries. */ - final ConcurrentHashMap<Long/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<Long, RunningQuery>(); + final protected ConcurrentHashMap<Long/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<Long, RunningQuery>(); /** * A priority queue of {@link RunningQuery}s having binding set chunks @@ -513,7 +511,8 @@ */ private class QueryEngineTask implements Runnable { public void run() { - System.err.println("QueryEngine running: " + this); + if(log.isInfoEnabled()) + log.info("running: " + this); while (true) { try { final RunningQuery q = priorityQueue.take(); @@ -522,11 +521,12 @@ continue; final IChunkMessage<IBindingSet> chunk = q.chunksIn.poll(); if (log.isTraceEnabled()) - log.trace("Accepted chunk: queryId=" + queryId - + ", bopId=" + chunk.getBOpId()); - // create task. + log.trace("Accepted chunk: " + chunk); try { + // create task. final FutureTask<?> ft = q.newChunkTask(chunk); + if (log.isDebugEnabled()) + log.debug("Running chunk: " + chunk); // execute task. localIndexManager.getExecutorService().execute(ft); } catch (RejectedExecutionException ex) { @@ -670,6 +670,9 @@ // remove from the set of running queries. runningQueries.remove(q.getQueryId(), q); + + if (log.isInfoEnabled()) + log.info("Removed entry for query: " + q.getQueryId()); } @@ -800,6 +803,17 @@ return runningQueries.get(queryId); } + + public BindingSetPipelineOp getQuery(final long queryId) { + + final RunningQuery q = getRunningQuery(queryId); + + if (q == null) + throw new IllegalArgumentException(); + + return q.getQuery(); + + } /** * Places the {@link RunningQuery} object into the internal map. @@ -827,30 +841,8 @@ final IQueryClient clientProxy, final BindingSetPipelineOp query) { return new RunningQuery(this, queryId, true/* controller */, - this/* clientProxy */, query, newQueryBuffer(query)); + this/* clientProxy */, query); } - /** - * Return a buffer onto which the solutions will be written. - * - * @todo This method is probably in the wrong place. We should use whatever - * is associated with the top-level {@link BOp} in the query and then - * rely on the NIO mechanisms to move the data around as necessary. - * - * @todo Could return a data structure which encapsulates the query results - * and could allow multiple results from a query, e.g., one per step - * in a program. - * - * @deprecated This is going away. - * - * @see FederatedQueryEngine#newQueryBuffer(BindingSetPipelineOp) - */ - protected IBlockingBuffer<IBindingSet[]> newQueryBuffer( - final BindingSetPipelineOp query) { - - return query.newBuffer(); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-14 13:50:31 UTC (rev 3542) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-14 14:46:01 UTC (rev 3543) @@ -53,6 +53,7 @@ import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.NoSuchBOpException; +import com.bigdata.bop.PipelineOp; import com.bigdata.bop.bset.CopyBindingSetOp; import com.bigdata.bop.solutions.SliceOp; import com.bigdata.journal.IIndexManager; @@ -72,6 +73,12 @@ .getLogger(RunningQuery.class); /** + * Logger for the {@link ChunkTask}. + */ + private final static Logger chunkTaskLog = Logger + .getLogger(ChunkTask.class); + + /** * The run state of the query and the result of the computation iff it * completes execution normally (without being interrupted, cancelled, etc). */ @@ -91,11 +98,6 @@ /** The unique identifier for this query. */ final private long queryId; -// /** -// * The timestamp when the query was accepted by this node (ms). -// */ -// final private long begin; - /** * The query deadline. The value is the system clock time in milliseconds * when the query is due and {@link Long#MAX_VALUE} if there is no deadline. @@ -105,12 +107,6 @@ final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); /** - * How long the query is allowed to run (elapsed milliseconds) -or- - * {@link Long#MAX_VALUE} if there is no deadline. - */ - final private long timeout; - - /** * <code>true</code> iff the outer {@link QueryEngine} is the controller for * this query. */ @@ -126,7 +122,7 @@ final private IQueryClient clientProxy; /** The query. */ - final private BOp query; + final private BindingSetPipelineOp query; /** * The buffer used for the overall output of the query pipeline. @@ -153,59 +149,20 @@ private final ConcurrentHashMap<BSBundle, Future<?>> operatorFutures = new ConcurrentHashMap<BSBundle, Future<?>>(); /** - * A lock guarding {@link #runningTaskCount}, {@link #availableChunkCount}, - * {@link #availableChunkCountMap}. + * A lock guarding {@link RunState#runningTaskCount}, + * {@link RunState#availableChunkCount}, + * {@link RunState#availableChunkCountMap}. This is <code>null</code> unless + * this is the query controller. + * + * @see RunState */ - private final ReentrantLock runStateLock = new ReentrantLock(); + private final ReentrantLock runStateLock; /** - * The #of tasks for this query which have started but not yet halted and - * ZERO (0) if this is not the query coordinator. - * <p> - * This is guarded by the {@link #runningStateLock}. + * The run state of this query and <code>null</code> unless this is the + * query controller. */ - private long runningTaskCount = 0; - - /** - * The #of chunks for this query of which a running task has made available - * but which have not yet been accepted for processing by another task and - * ZERO (0) if this is not the query coordinator. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private long availableChunkCount = 0; - - /** - * A map reporting the #of chunks available for each operator in the - * pipeline (we only report chunks for pipeline operators). The total #of - * chunks available for any given operator in the pipeline is reported by - * {@link #availableChunkCount}. - * <p> - * The movement of the intermediate binding set chunks forms an acyclic - * directed graph. This map is used to track the #of chunks available for - * each bop in the pipeline. When a bop has no more incoming chunks, we send - * an asynchronous message to all nodes on which that bop had executed - * informing the {@link QueryEngine} on that node that it should immediately - * release all resources associated with that bop. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableChunkCountMap = new LinkedHashMap<Integer, AtomicLong>(); - - /** - * A collection reporting on the #of instances of a given {@link BOp} which - * are concurrently executing. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Map<Integer/*bopId*/, AtomicLong/*runningCount*/> runningCountMap = new LinkedHashMap<Integer, AtomicLong>(); - - /** - * A collection of the operators which have executed at least once. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Set<Integer/*bopId*/> startedSet = new LinkedHashSet<Integer>(); + final private RunState runState; /** * The chunks available for immediate processing (they must have been @@ -285,7 +242,7 @@ /** * Return the operator tree for this query. */ - public BOp getQuery() { + public BindingSetPipelineOp getQuery() { return query; } @@ -329,8 +286,8 @@ public RunningQuery(final QueryEngine queryEngine, final long queryId, // final long begin, final boolean controller, - final IQueryClient clientProxy, final BOp query, - final IBlockingBuffer<IBindingSet[]> queryBuffer) { + final IQueryClient clientProxy, final BindingSetPipelineOp query + ) { if (queryEngine == null) throw new IllegalArgumentException(); @@ -342,21 +299,41 @@ throw new IllegalArgumentException(); this.queryEngine = queryEngine; + this.queryId = queryId; -// this.begin = begin; + this.controller = controller; + this.clientProxy = clientProxy; + this.query = query; - this.queryBuffer = queryBuffer; + this.bopIndex = BOpUtility.getIndex(query); + this.statsMap = controller ? new ConcurrentHashMap<Integer, BOpStats>() : null; + + runStateLock = controller ? new ReentrantLock() : null; - this.timeout = query.getProperty(BOp.Annotations.TIMEOUT, - BOp.Annotations.DEFAULT_TIMEOUT); + runState = controller ? new RunState(this) : null; + + this.queryBuffer = newQueryBuffer(); + + } - if (timeout < 0) - throw new IllegalArgumentException(); + /** + * Return the buffer on which the solutions will be written (if any). This + * is based on the top-level operator in the query plan. + * + * @return The buffer for the solutions -or- <code>null</code> if the + * top-level operator in the query plan is a mutation operator. + */ + protected IBlockingBuffer<IBindingSet[]> newQueryBuffer() { + + if (query.isMutation()) + return null; + + return ((BindingSetPipelineOp) query).newBuffer(); } @@ -423,27 +400,88 @@ if (log.isDebugEnabled()) log.debug("queryId=" + queryId + ", chunksIn.size()=" - + chunksIn.size()); + + chunksIn.size() + ", msg=" + msg); } /** - * Invoked once by the query controller with the initial - * {@link BindingSetChunk} which gets the query moving. - * - * @todo this should reject multiple invocations for a given query instance. + * The run state for the query. */ - public void startQuery(final IChunkMessage<IBindingSet> chunk) { - if (!controller) - throw new UnsupportedOperationException(); - if (chunk == null) - throw new IllegalArgumentException(); - if (chunk.getQueryId() != queryId) // @todo equals() if queryId is UUID. - throw new IllegalArgumentException(); - final int bopId = chunk.getBOpId(); - runStateLock.lock(); - try { - lifeCycleSetUpQuery(); + static private class RunState { + + /** + * The query. + */ + private final RunningQuery query; + + /** + * The query identifier. + */ + private final long queryId; + + /** + * The #of tasks for this query which have started but not yet halted + * and ZERO (0) if this is not the query coordinator. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private long runningTaskCount = 0; + + /** + * The #of chunks for this query of which a running task has made + * available but which have not yet been accepted for processing by + * another task and ZERO (0) if this is not the query coordinator. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private long availableChunkCount = 0; + + /** + * A map reporting the #of chunks available for each operator in the + * pipeline (we only report chunks for pipeline operators). The total + * #of chunks available across all operators in the pipeline is reported + * by {@link #availableChunkCount}. + * <p> + * The movement of the intermediate binding set chunks forms an acyclic + * directed graph. This map is used to track the #of chunks available + * for each bop in the pipeline. When a bop has no more incoming chunks, + * we send an asynchronous message to all nodes on which that bop had + * executed informing the {@link QueryEngine} on that node that it + * should immediately release all resources associated with that bop. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableChunkCountMap = new LinkedHashMap<Integer, AtomicLong>(); + + /** + * A collection reporting on the #of instances of a given {@link BOp} + * which are concurrently executing. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Map<Integer/* bopId */, AtomicLong/* runningCount */> runningCountMap = new LinkedHashMap<Integer, AtomicLong>(); + + /** + * A collection of the operators which have executed at least once. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Set<Integer/* bopId */> startedSet = new LinkedHashSet<Integer>(); + + public RunState(final RunningQuery query) { + + this.query = query; + + this.queryId = query.queryId; + + } + + public void startQuery(final IChunkMessage<?> msg) { + + query.lifeCycleSetUpQuery(); + + final Integer bopId = Integer.valueOf(msg.getBOpId()); + availableChunkCount++; { AtomicLong n = availableChunkCountMap.get(bopId); @@ -451,114 +489,81 @@ availableChunkCountMap.put(bopId, n = new AtomicLong()); n.incrementAndGet(); } + if (log.isInfoEnabled()) log.info("queryId=" + queryId + ",runningTaskCount=" + runningTaskCount + ",availableChunks=" + availableChunkCount); + System.err.println("startQ : bopId=" + bopId + ",running=" + runningTaskCount + ",available=" + availableChunkCount); - queryEngine.acceptChunk(chunk); - } finally { - runStateLock.unlock(); + } - } - /** - * Message provides notice that the operator has started execution and will - * consume some specific number of binding set chunks. - * - * @param bopId - * The identifier of the operator. - * @param partitionId - * The index partition identifier against which the operator is - * executing. - * @param serviceId - * The identifier of the service on which the operator is - * executing. - * @param fanIn - * The #of chunks that will be consumed by the operator - * execution. - * - * @throws UnsupportedOperationException - * If this node is not the query coordinator. - */ - public void startOp(final StartOpMessage msg) { - if (!controller) - throw new UnsupportedOperationException(); - final Integer bopId = Integer.valueOf(msg.bopId); - runStateLock.lock(); - try { + public void startOp(final StartOpMessage msg) { + + final Integer bopId = Integer.valueOf(msg.bopId); + runningTaskCount++; { AtomicLong n = runningCountMap.get(bopId); if (n == null) runningCountMap.put(bopId, n = new AtomicLong()); n.incrementAndGet(); - if(startedSet.add(bopId)) { + if (startedSet.add(bopId)) { // first evaluation pass for this operator. - lifeCycleSetUpOperator(msg.bopId); + query.lifeCycleSetUpOperator(bopId); } } + availableChunkCount -= msg.nchunks; + { AtomicLong n = availableChunkCountMap.get(bopId); if (n == null) throw new AssertionError(); n.addAndGet(-msg.nchunks); } - System.err.println("startOp: bopId=" + msg.bopId + ",running=" + + System.err.println("startOp: bopId=" + bopId + ",running=" + runningTaskCount + ",available=" + availableChunkCount + ",fanIn=" + msg.nchunks); - if (deadline.get() < System.currentTimeMillis()) { + + // check deadline. + if (query.deadline.get() < System.currentTimeMillis()) { + if (log.isTraceEnabled()) - log.trace("queryId: deadline expired."); - future.halt(new TimeoutException()); - cancel(true/* mayInterruptIfRunning */); + log.trace("expired: queryId=" + queryId + ", deadline=" + + query.deadline); + + query.future.halt(new TimeoutException()); + + query.cancel(true/* mayInterruptIfRunning */); + } - } finally { - runStateLock.unlock(); + } - } - /** - * Message provides notice that the operator has ended execution. The - * termination conditions for the query are checked. (For scale-out, the - * node node controlling the query needs to be involved for each operator - * start/stop in order to make the termination decision atomic). - * - * @throws UnsupportedOperationException - * If this node is not the query coordinator. - */ - public void haltOp(final HaltOpMessage msg) { - if (!controller) - throw new UnsupportedOperationException(); - runStateLock.lock(); - try { - // update per-operator statistics. - { - final BOpStats stats = statsMap.get(msg.bopId); - if (stats == null) { - statsMap.put(msg.bopId, msg.taskStats); - } else { - stats.add(msg.taskStats); - } - } - /* - * Update termination criteria counters. - */ + /** + * Update termination criteria counters. + */ + public void haltOp(final HaltOpMessage msg) { + // chunks generated by this task. final int fanOut = msg.sinkChunksOut + msg.altSinkChunksOut; availableChunkCount += fanOut; if (msg.sinkId != null) { AtomicLong n = availableChunkCountMap.get(msg.sinkId); if (n == null) - availableChunkCountMap.put(msg.sinkId, n = new AtomicLong()); + availableChunkCountMap + .put(msg.sinkId, n = new AtomicLong()); n.addAndGet(msg.sinkChunksOut); } if (msg.altSinkId != null) { AtomicLong n = availableChunkCountMap.get(msg.altSinkId); if (n == null) - availableChunkCountMap.put(msg.altSinkId, n = new AtomicLong()); + availableChunkCountMap.put(msg.altSinkId, + n = new AtomicLong()); n.addAndGet(msg.altSinkChunksOut); } // one less task is running. @@ -575,7 +580,7 @@ * No more chunks can appear for this operator so invoke its end * of life cycle hook. */ - lifeCycleTearDownOperator(msg.bopId); + query.lifeCycleTearDownOperator(msg.bopId); } System.err.println("haltOp : bopId=" + msg.bopId + ",running=" + runningTaskCount + ",available=" + availableChunkCount @@ -584,61 +589,156 @@ + runningTaskCount; assert availableChunkCount >= 0 : "availableChunkCount=" + availableChunkCount; -// final long elapsed = System.currentTimeMillis() - begin; if (log.isTraceEnabled()) - log.trace("bopId=" + msg.bopId + ",partitionId=" + msg.partitionId - + ",serviceId=" + queryEngine.getServiceUUID() - + ", nchunks=" + fanOut + " : runningTaskCount=" - + runningTaskCount + ", availableChunkCount=" - + availableChunkCount);// + ", elapsed=" + elapsed); + log.trace("bopId=" + msg.bopId + ",partitionId=" + + msg.partitionId + ",serviceId=" + + query.queryEngine.getServiceUUID() + ", nchunks=" + + fanOut + " : runningTaskCount=" + runningTaskCount + + ", availableChunkCount=" + availableChunkCount); // test termination criteria if (msg.cause != null) { // operator failed on this chunk. log.error("Error: Canceling query: queryId=" + queryId + ",bopId=" + msg.bopId + ",partitionId=" + msg.partitionId, msg.cause); - future.halt(msg.cause); - cancel(true/* mayInterruptIfRunning */); + query.future.halt(msg.cause); + query.cancel(true/* mayInterruptIfRunning */); } else if (runningTaskCount == 0 && availableChunkCount == 0) { // success (all done). - future.halt(getStats()); - cancel(true/* mayInterruptIfRunning */); - } else if (deadline.get() < System.currentTimeMillis()) { if (log.isTraceEnabled()) - log.trace("queryId: deadline expired."); - future.halt(new TimeoutException()); - cancel(true/* mayInterruptIfRunning */); + log.trace("success: queryId=" + queryId); + query.future.halt(query.getStats()); + query.cancel(true/* mayInterruptIfRunning */); + } else if (query.deadline.get() < System.currentTimeMillis()) { + if (log.isTraceEnabled()) + log.trace("expired: queryId=" + queryId + ", deadline=" + + query.deadline); + query.future.halt(new TimeoutException()); + query.cancel(true/* mayInterruptIfRunning */); } + } + + /** + * Return <code>true</code> the specified operator can no longer be + * triggered by the query. The specific criteria are that no operators + * which are descendants of the specified operator are running or have + * chunks available against which they could run. Under those conditions + * it is not possible for a chunk to show up which would cause the + * operator to be executed. + * + * @param bopId + * Some operator identifier. + * + * @return <code>true</code> if the operator can not be triggered given + * the current query activity. + * + * @throws IllegalMonitorStateException + * unless the {@link #runStateLock} is held by the caller. + */ + protected boolean isOperatorDone(final int bopId) { + + return PipelineUtility.isDone(bopId, query.getQuery(), + query.bopIndex, runningCountMap, availableChunkCountMap); + + } + + } // class RunState + + /** + * Invoked once by the query controller with the initial + * {@link BindingSetChunk} which gets the query moving. + * + * @todo this should reject multiple invocations for a given query instance. + */ + public void startQuery(final IChunkMessage<IBindingSet> msg) { + + if (!controller) + throw new UnsupportedOperationException(); + + if (msg == null) + throw new IllegalArgumentException(); + + if (msg.getQueryId() != queryId) // @todo equals() if queryId is UUID. + throw new IllegalArgumentException(); + + runStateLock.lock(); + + try { + + runState.startQuery(msg); + + queryEngine.acceptChunk(msg); + } finally { + runStateLock.unlock(); + } + } /** - * Return <code>true</code> the specified operator can no longer be - * triggered by the query. The specific criteria are that no operators which - * are descendants of the specified operator are running or have chunks - * available against which they could run. Under those conditions it is not - * possible for a chunk to show up which would cause the operator to be - * executed. + * Message provides notice that the operator has started execution and will + * consume some specific number of binding set chunks. * - * @param bopId - * Some operator identifier. + * @param msg The {@link StartOpMessage}. * - * @return <code>true</code> if the operator can not be triggered given the - * current query activity. + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + public void startOp(final StartOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(); + + runStateLock.lock(); + + try { + + runState.startOp(msg); + + } finally { + + runStateLock.unlock(); + + } + + } + + /** + * Message provides notice that the operator has ended execution. The + * termination conditions for the query are checked. (For scale-out, the + * node node controlling the query needs to be involved for each operator + * start/stop in order to make the termination decision atomic). * - * @throws IllegalMonitorStateException - * unless the {@link #runStateLock} is held by the caller. + * @param msg The {@link HaltOpMessage} + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. */ - protected boolean isOperatorDone(final int bopId) { + public void haltOp(final HaltOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(); - if (!runStateLock.isHeldByCurrentThread()) - throw new IllegalMonitorStateException(); + // update per-operator statistics. + final BOpStats tmp = statsMap.putIfAbsent(msg.bopId, msg.taskStats); - return PipelineUtility.isDone(bopId, query, bopIndex, runningCountMap, - availableChunkCountMap); + if (tmp != null) + tmp.add(msg.taskStats); + runStateLock.lock(); + + try { + + runState.haltOp(msg); + + } finally { + + runStateLock.unlock(); + + } + } /** @@ -703,100 +803,222 @@ * A chunk to be consumed. */ @SuppressWarnings("unchecked") - protected FutureTask<Void> newChunkTask(final IChunkMessage<IBindingSet> chunk) { - /* - * Look up the BOp in the index, create the BOpContext for that BOp, and - * return the value returned by BOp.eval(context). - */ - final int bopId = chunk.getBOpId(); - final int partitionId = chunk.getPartitionId(); - final BOp bop = bopIndex.get(bopId); - if (bop == null) { - throw new NoSuchBOpException(bopId); - } - if (!(bop instanceof BindingSetPipelineOp)) { - /* - * @todo evaluation of element[] pipelines needs to use pretty much - * the same code, but it needs to be typed for E[] rather than - * IBindingSet[]. - * - * @todo evaluation of Monet style BATs would also operate under - * different assumptions, closer to those of an element[]. - */ - throw new UnsupportedOperationException(bop.getClass().getName()); - } - // self - final BindingSetPipelineOp op = ((BindingSetPipelineOp) bop); - // parent (null if this is the root of the operator tree). - final BOp p = BOpUtility.getParent(query, op); - // sink (null unless parent is defined) - final Integer sinkId = p == null ? null : (Integer) p - .getProperty(BindingSetPipelineOp.Annotations.BOP_ID); - final IBlockingBuffer<IBindingSet[]> sink = (p == null ? queryBuffer - : op.newBuffer()); - // altSink (null when not specified). - final Integer altSinkId = (Integer) op - .getProperty(BindingSetPipelineOp.Annotations.ALT_SINK_REF); - if (altSinkId != null && !bopIndex.containsKey(altSinkId)) { - throw new NoSuchBOpException(altSinkId); - } - final IBlockingBuffer<IBindingSet[]> altSink = altSinkId == null ? null - : op.newBuffer(); - // context - final BOpContext context = new BOpContext(this, partitionId, op - .newStats(), chunk.iterator(), sink, altSink); - // FutureTask for operator execution (not running yet). - final FutureTask<Void> f = op.eval(context); - // Hook the FutureTask. - final Runnable r = new Runnable() { - public void run() { - final UUID serviceId = queryEngine.getServiceUUID(); - int fanIn = 1; - int sinkChunksOut = 0; - int altSinkChunksOut = 0; - try { - clientProxy.startOp(new StartOpMessage(queryId, - bopId, partitionId, serviceId, fanIn)); - if (log.isDebugEnabled()) - log.debug("Running chunk: queryId=" + queryId - + ", bopId=" + bopId + ", bop=" + bop); - f.run(); // run - f.get(); // verify success - if (sink != queryBuffer && !sink.isEmpty()) { - // handle output chunk. - sinkChunksOut += handleOutputChunk(sinkId, sink); - } - if (altSink != queryBuffer && altSink != null - && !altSink.isEmpty()) { - // handle alt sink output chunk. - altSinkChunksOut += handleOutputChunk(altSinkId, altSink); - } - clientProxy.haltOp(new HaltOpMessage(queryId, bopId, - partitionId, serviceId, null/* cause */, - sinkId, sinkChunksOut, altSinkId, - altSinkChunksOut, context.getStats())); - } catch (Throwable t) { - try { - clientProxy.haltOp(new HaltOpMessage(queryId, - bopId, partitionId, serviceId, - t/* cause */, sinkId, sinkChunksOut, altSinkId, - altSinkChunksOut, context.getStats())); - } catch (RemoteException e) { - cancel(true/* mayInterruptIfRunning */); - log.error("queryId=" + queryId, e); - } - } - } - }; + protected FutureTask<Void> newChunkTask( + final IChunkMessage<IBindingSet> chunk) { + + // create runnable to evaluate a chunk for an operator and partition. + final Runnable r = new ChunkTask(chunk); + // wrap runnable. final FutureTask<Void> f2 = new FutureTask(r, null/* result */); + // add to list of active futures for this query. - operatorFutures.put(new BSBundle(bopId, partitionId), f2); + operatorFutures.put(new BSBundle(chunk.getBOpId(), chunk + .getPartitionId()), f2); + // return : caller will execute. return f2; + } /** + * Runnable evaluates an operator for some chunk of inputs. In scale-out, + * the operator may be evaluated against some partition of a scale-out + * index. + */ + private class ChunkTask implements Runnable { + + /** Alias for the {@link ChunkTask}'s logger. */ + private final Logger log = chunkTaskLog; + + /** The index of the bop which is being evaluated. */ + private final int bopId; + + /** + * The index partition against which the operator is being evaluated and + * <code>-1</code> if the operator is not being evaluated against a + * shard. + */ + private final int partitionId; + + /** The operator which is being evaluated. */ + private final BOp bop; + + /** + * The index of the operator which is the default sink for outputs + * generated by this evaluation. This is the + * {@link BOp.Annotations#BOP_ID} of the parent of this operator. This + * will be <code>null</code> if the operator does not have a parent and + * is not a query since no outputs will be generated in that case. + */ + private final Integer sinkId; + + /** + * The index of the operator which is the alternative sink for outputs + * generated by this evaluation. This is <code>null</code> unless the + * operator explicitly specifies an alternative sink using + * {@link BindingSetPipelineOp.Annotations#ALT_SINK_REF}. + */ + private final Integer altSinkId; + + /** + * The sink on which outputs destined for the {@link #sinkId} operator + * will be written and <code>null</code> if {@link #sinkId} is + * <code>null</code>. + */ + private final IBlockingBuffer<IBindingSet[]> sink; + + /** + * The sink on which outputs destined for the {@link #altSinkId} + * operator will be written and <code>null</code> if {@link #altSinkId} + * is <code>null</code>. + */ + private final IBlockingBuffer<IBindingSet[]> altSink; + + /** + * The evaluation context for this operator. + */ + private final BOpContext<IBindingSet> context; + + /** + * {@link FutureTask} which evaluates the operator (evaluation is + * delegated to this {@link FutureTask}). + */ + private final FutureTask<Void> ft; + + /** + * Create a task to consume a chunk. This looks up the {@link BOp} which + * is the target for the message in the {@link RunningQuery#bopIndex}, + * creates the sink(s) for the {@link BOp}, creates the + * {@link BOpContext} for that {@link BOp}, and wraps the value returned + * by {@link PipelineOp#eval(BOpContext)} in order to handle the outputs + * written on those sinks. + * + * @param chunk + * A message containing the materialized chunk and metadata + * about the operator which will consume that chunk. + */ + public ChunkTask(final IChunkMessage<IBindingSet> chunk) { + bopId = chunk.getBOpId(); + partitionId = chunk.getPartitionId(); + bop = bopIndex.get(bopId); + if (bop == null) { + throw new NoSuchBOpException(bopId); + } + if (!(bop instanceof BindingSetPipelineOp)) { + /* + * @todo evaluation of element[] pipelines needs to use pretty + * much the same code, but it needs to be typed for E[] rather + * than IBindingSet[]. + * + * @todo evaluation of Monet style BATs would also operate under + * different assumptions, closer to those of an element[]. + */ + throw new UnsupportedOperationException(bop.getClass() + .getName()); + } + + // self + final BindingSetPipelineOp op = ((BindingSetPipelineOp) bop); + + // parent (null if this is the root of the operator tree). + final BOp p = BOpUtility.getParent(query, op); + + // sink (null unless parent is defined) + sinkId = p == null ? null : (Integer) p + .getProperty(BindingSetPipelineOp.Annotations.BOP_ID); + + // altSink (null when not specified). + altSinkId = (Integer) op + .getProperty(BindingSetPipelineOp.Annotations.ALT_SINK_REF); + + if (altSinkId != null && !bopIndex.containsKey(altSinkId)) + throw new NoSuchBOpException(altSinkId); + + if (altSinkId != null && sinkId == null) { + throw new RuntimeException( + "The primary sink must be defined if the altSink is defined: " + + bop); + } + + if (sinkId != null && altSinkId != null + && sinkId.intValue() == altSinkId.intValue()) { + throw new RuntimeException( + "The primary and alternative sink may not be the same operator: " + + bop); + } + + sink = (p == null ? queryBuffer : op.newBuffer()); + + altSink = altSinkId == null ? null : op.newBuffer(); + + // context + context = new BOpContext<IBindingSet>(RunningQuery.this, + partitionId, op.newStats(), chunk.getChunkAccessor() + .iterator(), sink, altSink); + + // FutureTask for operator execution (not running yet). + ft = op.eval(context); + + } + + /** + * Evaluate the {@link IChunkMessage}. + */ + public void run() { + final UUID serviceId = queryEngine.getServiceUUID(); + int fanIn = 1; + int sinkChunksOut = 0; + int altSinkChunksOut = 0; + try { + clientProxy.startOp(new StartOpMessage(queryId, + bopId, partitionId, serviceId, fanIn)); + if (log.isDebugEnabled()) + log.debug("Running chunk: queryId=" + queryId + ", bopId=" + + bopId + ", bop=" + bop); + ft.run(); // run + ft.get(); // verify success + if (sink != null && sink != queryBuffer && !sink.isEmpty()) { + /* + * Handle sink output, sending appropriate chunk + * message(s). + * + * Note: This maps output over shards/nodes in s/o. + ... [truncated message content] |
From: <tho...@us...> - 2010-09-14 19:55:17
|
Revision: 3552 http://bigdata.svn.sourceforge.net/bigdata/?rev=3552&view=rev Author: thompsonbry Date: 2010-09-14 19:55:11 +0000 (Tue, 14 Sep 2010) Log Message: ----------- Fixed import of a class since removed in AbstractTripleStore. BOp#getRequiredProperty(String name) was not compiling under ant due to a generic type parameter for the return type. I have removed the generic type parameter and added explicit casts everywhere this method is used. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SparqlBindingSetComparatorOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -116,18 +116,36 @@ */ <T> T getProperty(final String name); - /** - * Return the value of the named annotation. - * - * @param name - * The name of the annotation. - * - * @return The value of the annotation. - * - * @throws IllegalArgumentException - * if the named annotation is not bound. - */ - <T> T getRequiredProperty(final String name); +// /** +// * Return the value of the named annotation. +// * +// * @param name +// * The name of the annotation. +// * +// * @return The value of the annotation. +// * +// * @throws IllegalArgumentException +// * if the named annotation is not bound. +// */ +// <T> T getRequiredProperty(final String name); + + /** + * Return the value of the named annotation. + * + * @param name + * The name of the annotation. + * + * @return The value of the annotation. + * + * @throws IllegalArgumentException + * if the named annotation is not bound. + * + * @todo Note: This variant without generics is required for some java + * compiler versions. + * + * @see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6302954 + */ + public Object getRequiredProperty(final String name); /** * Deep copy clone of the operator. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -306,11 +306,22 @@ } - public <T> T getRequiredProperty(final String name) { +// public <T> T getRequiredProperty(final String name) { +// +// @SuppressWarnings("unchecked") +// final T tmp = (T) annotations.get(name); +// +// if (tmp == null) +// throw new IllegalArgumentException("Required property: " + name); +// +// return tmp; +// +// } - @SuppressWarnings("unchecked") - final T tmp = (T) annotations.get(name); + public Object getRequiredProperty(final String name) { + final Object tmp = annotations.get(name); + if (tmp == null) throw new IllegalArgumentException("Required property: " + name); @@ -358,7 +369,7 @@ public final long getTimestamp() { - return getRequiredProperty(Annotations.TIMESTAMP); + return (Long) getRequiredProperty(Annotations.TIMESTAMP); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -319,7 +319,7 @@ final IIndexManager tmp = getFederation() == null ? getIndexManager() : getFederation(); - final long timestamp = pred + final long timestamp = (Long) pred .getRequiredProperty(BOp.Annotations.TIMESTAMP); return (IRelation<?>) tmp.getResourceLocator().locate( @@ -391,7 +391,7 @@ final int partitionId = predicate.getPartitionId(); - final long timestamp = predicate + final long timestamp = (Long) predicate .getRequiredProperty(BOp.Annotations.TIMESTAMP); final int flags = predicate.getProperty( Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -113,34 +113,36 @@ super(args, annotations); - getRequiredProperty(Annotations.SELECTED); - - } + getRequiredProperty(Annotations.SELECTED); - /** - * @see Annotations#SELECTED - */ - public IPredicate<E> getPredicate() { + } - return getRequiredProperty(Annotations.SELECTED); + /** + * @see Annotations#SELECTED + */ + @SuppressWarnings("unchecked") + public IPredicate<E> getPredicate() { - } - + return (IPredicate<E>) getRequiredProperty(Annotations.SELECTED); + + } + /** * @see Annotations#RELATION */ public String getRelation() { - return getRequiredProperty(Annotations.RELATION); + return (String) getRequiredProperty(Annotations.RELATION); } /** * @see Annotations#KEY_ORDER */ - public IKeyOrder<E> getKeyOrder() { + @SuppressWarnings("unchecked") + public IKeyOrder<E> getKeyOrder() { - return getRequiredProperty(Annotations.KEY_ORDER); + return (IKeyOrder<E>) getRequiredProperty(Annotations.KEY_ORDER); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -125,7 +125,7 @@ */ public IVariable<?>[] getVariables() { - return getRequiredProperty(Annotations.VARIABLES); + return (IVariable<?>[]) getRequiredProperty(Annotations.VARIABLES); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -140,7 +140,7 @@ */ public long getOffset() { - return getRequiredProperty(Annotations.OFFSET); + return (Long) getRequiredProperty(Annotations.OFFSET); } @@ -149,7 +149,7 @@ */ public long getLimit() { - return getRequiredProperty(Annotations.LIMIT); + return (Long) getRequiredProperty(Annotations.LIMIT); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -80,7 +80,7 @@ */ public ComparatorOp getComparator() { - return getRequiredProperty(Annotations.COMPARATOR); + return (ComparatorOp) getRequiredProperty(Annotations.COMPARATOR); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SparqlBindingSetComparatorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SparqlBindingSetComparatorOp.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SparqlBindingSetComparatorOp.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -46,7 +46,7 @@ */ public ISortOrder<?>[] getOrder() { - return getRequiredProperty(Annotations.ORDER); + return (ISortOrder<?>[]) getRequiredProperty(Annotations.ORDER); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-09-14 19:18:42 UTC (rev 3551) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-09-14 19:55:11 UTC (rev 3552) @@ -146,7 +146,6 @@ import com.bigdata.relation.rule.eval.IRuleTaskFactory; import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.search.FullTextIndex; -import com.bigdata.service.AbstractEmbeddedDataService; import com.bigdata.service.DataService; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.ndx.IClientIndex; @@ -1129,8 +1128,7 @@ * and writers. This property depends on primarily on the concurrency * control mechanisms (if any) that are used to prevent concurrent access to * an unisolated index while a thread is writing on that index. Stores based - * on the {@link IBigdataFederation} or an - * {@link AbstractEmbeddedDataService} automatically inherent the + * on the {@link IBigdataFederation} automatically inherent the * appropriate concurrency controls as would a store whose index access was * intermediated by the executor service of an {@link IConcurrencyManager}. * <p> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-15 15:55:04
|
Revision: 3557 http://bigdata.svn.sourceforge.net/bigdata/?rev=3557&view=rev Author: thompsonbry Date: 2010-09-15 15:54:52 +0000 (Wed, 15 Sep 2010) Log Message: ----------- Changed queryId from 'long' to UUID, which is what the existing scale-out query code is using. Moved the 2DS distributed query test suite into the bigdata-jini module since it has a dependency on JiniClient. Extracted RunState from RunningQuery into its own class. Provided a logger for a table view of the RunState of a query as it evolves. Added some stress tests for concurrent query. Currently working through a concurrency issue in com.bigdata.bop.queryEngine.RunState. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryDecl.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/LocalChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryDecl.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/NIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/QueryContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ServiceContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ShardContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/ThickChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestNIOChunkMessage.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestThickChunkMessage.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/HaltOpMessage.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -18,7 +18,7 @@ private static final long serialVersionUID = 1L; /** The identifier of the query. */ - final public long queryId; + final public UUID queryId; /** The identifier of the operator. */ final public int bopId; @@ -101,7 +101,7 @@ */ public HaltOpMessage( // - final long queryId, final int bopId, final int partitionId, + final UUID queryId, final int bopId, final int partitionId, final UUID serviceId, Throwable cause, // final Integer sinkId, final int sinkChunksOut,// final Integer altSinkId, final int altSinkChunksOut,// Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkMessage.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -1,13 +1,10 @@ package com.bigdata.bop.engine; -import java.nio.ByteBuffer; -import java.util.concurrent.BlockingQueue; +import java.util.UUID; import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; import com.bigdata.bop.fed.FederatedRunningQuery; -import com.bigdata.relation.accesspath.BlockingBuffer; -import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.btree.raba.IRaba; import com.bigdata.service.ResourceService; /** @@ -32,7 +29,7 @@ IQueryClient getQueryController(); /** The query identifier. */ - long getQueryId(); + UUID getQueryId(); /** The identifier for the target {@link BOp}. */ int getBOpId(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryClient.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -1,6 +1,7 @@ package com.bigdata.bop.engine; import java.rmi.RemoteException; +import java.util.UUID; import com.bigdata.bop.BindingSetPipelineOp; @@ -19,7 +20,7 @@ * @throws IllegalArgumentException * if there is no such query. */ - BindingSetPipelineOp getQuery(long queryId) throws RemoteException; + BindingSetPipelineOp getQuery(UUID queryId) throws RemoteException; /** * Notify the client that execution has started for some query, operator, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryDecl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryDecl.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IQueryDecl.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -1,5 +1,7 @@ package com.bigdata.bop.engine; +import java.util.UUID; + import com.bigdata.bop.BindingSetPipelineOp; /** @@ -15,7 +17,7 @@ /** * The query identifier. */ - long getQueryId(); + UUID getQueryId(); /** * The query. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -55,24 +55,6 @@ */ IIndexManager getIndexManager(); -// /** -// * The timestamp or transaction identifier against which the query is -// * reading. -// * -// * @deprecated move into the individual operator. See -// * {@link BOp.Annotations#TIMESTAMP} -// */ -// long getReadTimestamp(); -// -// /** -// * The timestamp or transaction identifier against which the query is -// * writing. -// * -// * @deprecated moved into the individual operator. See -// * {@link BOp.Annotations#TIMESTAMP} -// */ -// long getWriteTimestamp(); - /** * Terminate query evaluation */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/LocalChunkMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/LocalChunkMessage.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/LocalChunkMessage.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -1,6 +1,7 @@ package com.bigdata.bop.engine; import java.io.Serializable; +import java.util.UUID; import com.bigdata.bop.BOp; import com.bigdata.bop.fed.FederatedRunningQuery; @@ -21,7 +22,7 @@ /** * The query identifier. */ - private final long queryId; + private final UUID queryId; /** * The target {@link BOp}. @@ -42,7 +43,7 @@ return queryController; } - public long getQueryId() { + public UUID getQueryId() { return queryId; } @@ -59,12 +60,15 @@ } public LocalChunkMessage(final IQueryClient queryController, - final long queryId, final int bopId, final int partitionId, + final UUID queryId, final int bopId, final int partitionId, final IAsynchronousIterator<E[]> source) { if (queryController == null) throw new IllegalArgumentException(); + if (queryId == null) + throw new IllegalArgumentException(); + if (source == null) throw new IllegalArgumentException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryDecl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryDecl.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryDecl.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -28,6 +28,7 @@ package com.bigdata.bop.engine; import java.io.Serializable; +import java.util.UUID; import com.bigdata.bop.BindingSetPipelineOp; @@ -44,18 +45,21 @@ */ private static final long serialVersionUID = 1L; - private final long queryId; + private final UUID queryId; private final IQueryClient clientProxy; private final BindingSetPipelineOp query; - public QueryDecl(final IQueryClient clientProxy, final long queryId, + public QueryDecl(final IQueryClient clientProxy, final UUID queryId, final BindingSetPipelineOp query) { if (clientProxy == null) throw new IllegalArgumentException(); + if (queryId == null) + throw new IllegalArgumentException(); + if (query == null) throw new IllegalArgumentException(); @@ -75,7 +79,7 @@ return clientProxy; } - public long getQueryId() { + public UUID getQueryId() { return queryId; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -28,10 +28,13 @@ package com.bigdata.bop.engine; import java.rmi.RemoteException; +import java.util.Comparator; import java.util.UUID; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicReference; @@ -412,13 +415,23 @@ /** * The currently executing queries. */ - final protected ConcurrentHashMap<Long/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<Long, RunningQuery>(); + final protected ConcurrentHashMap<UUID/* queryId */, RunningQuery> runningQueries = new ConcurrentHashMap<UUID, RunningQuery>(); /** - * A priority queue of {@link RunningQuery}s having binding set chunks - * available for consumption. + * A queue of {@link RunningQuery}s having binding set chunks available for + * consumption. + * + * @todo Be careful when testing out a {@link PriorityBlockingQueue} here. + * First, that collection is intrinsically bounded (it is backed by an + * array) so it will BLOCK under heavy load and could be expected to + * have some resize costs if the queue size becomes too large. Second, + * either {@link RunningQuery} needs to implement an appropriate + * {@link Comparator} or we need to pass one into the constructor for + * the queue. */ - final private PriorityBlockingQueue<RunningQuery> priorityQueue = new PriorityBlockingQueue<RunningQuery>(); + final private BlockingQueue<RunningQuery> priorityQueue = new LinkedBlockingQueue<RunningQuery>(); +// final private BlockingQueue<RunningQuery> priorityQueue = new PriorityBlockingQueue<RunningQuery>( +// ); /** * @@ -480,7 +493,10 @@ * if the query engine is shutting down. */ protected void assertRunning() { - + + if (engineFuture.get() == null) + throw new IllegalStateException("Not initialized."); + if (shutdown) throw new IllegalStateException("Shutting down."); @@ -517,7 +533,7 @@ while (true) { try { final RunningQuery q = priorityQueue.take(); - final long queryId = q.getQueryId(); + final UUID queryId = q.getQueryId(); if (q.isCancelled()) continue; final IChunkMessage<IBindingSet> chunk = q.chunksIn.poll(); @@ -553,7 +569,7 @@ * chunk will be attached to the query and the query will be scheduled for * execution. * - * @param chunk + * @param msg * A chunk of intermediate results. * * @throws IllegalArgumentException @@ -561,25 +577,27 @@ * @throws IllegalStateException * if the chunk is not materialized. */ - void acceptChunk(final IChunkMessage<IBindingSet> chunk) { + protected void acceptChunk(final IChunkMessage<IBindingSet> msg) { - if (chunk == null) + if (msg == null) throw new IllegalArgumentException(); - if (!chunk.isMaterialized()) + if (!msg.isMaterialized()) throw new IllegalStateException(); - final RunningQuery q = runningQueries.get(chunk.getQueryId()); + final RunningQuery q = runningQueries.get(msg.getQueryId()); if(q == null) throw new IllegalStateException(); // add chunk to the query's input queue on this node. - q.acceptChunk(chunk); + q.acceptChunk(msg); + + assertRunning(); // add query to the engine's task queue. priorityQueue.add(q); - + } /** @@ -697,20 +715,6 @@ * IQueryClient */ -// public BOp getQuery(final long queryId) throws RemoteException { -// -// final RunningQuery q = runningQueries.get(queryId); -// -// if (q != null) { -// -// return q.getQuery(); -// -// } -// -// return null; -// -// } - public void startOp(final StartOpMessage msg) throws RemoteException { final RunningQuery q = runningQueries.get(msg.queryId); @@ -770,17 +774,20 @@ * needs to talk to a federation. There should be nothing DS * specific about the {@link FederatedQueryEngine}. */ - public RunningQuery eval(final long queryId, + public RunningQuery eval(final UUID queryId, final BindingSetPipelineOp query, final IChunkMessage<IBindingSet> msg) throws Exception { + if (queryId == null) + throw new IllegalArgumentException(); + if (query == null) throw new IllegalArgumentException(); if (msg == null) throw new IllegalArgumentException(); - if (queryId != msg.getQueryId()) // @todo use equals() to compare UUIDs. + if (!queryId.equals(msg.getQueryId())) throw new IllegalArgumentException(); final RunningQuery runningQuery = newRunningQuery(this, queryId, @@ -813,6 +820,8 @@ runningQuery.startQuery(msg); + acceptChunk(msg); + return runningQuery; } @@ -826,13 +835,13 @@ * @return The {@link RunningQuery} -or- <code>null</code> if there is no * query associated with that query identifier. */ - protected RunningQuery getRunningQuery(final long queryId) { + protected RunningQuery getRunningQuery(final UUID queryId) { return runningQueries.get(queryId); } - public BindingSetPipelineOp getQuery(final long queryId) { + public BindingSetPipelineOp getQuery(final UUID queryId) { final RunningQuery q = getRunningQuery(queryId); @@ -851,9 +860,12 @@ * @param runningQuery * The {@link RunningQuery}. */ - protected void putRunningQuery(final long queryId, + protected void putRunningQuery(final UUID queryId, final RunningQuery runningQuery) { + if (queryId == null) + throw new IllegalArgumentException(); + if (runningQuery == null) throw new IllegalArgumentException(); @@ -865,7 +877,7 @@ * Factory for {@link RunningQuery}s. */ protected RunningQuery newRunningQuery(final QueryEngine queryEngine, - final long queryId, final boolean controller, + final UUID queryId, final boolean controller, final IQueryClient clientProxy, final BindingSetPipelineOp query) { return new RunningQuery(this, queryId, true/* controller */, Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -0,0 +1,543 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 15, 2010 + */ + +package com.bigdata.bop.engine; + +import java.rmi.RemoteException; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; + +/** + * The run state for a {@link RunningQuery}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +class RunState { + + static private final Logger log = Logger.getLogger(RunState.class); + + /** + * Inner class provides a 2nd logger used for tabular representations. + */ + static private class TableLog { + + static private final Logger tableLog = Logger.getLogger(TableLog.class); + + } + + /** + * The query. + */ + private final RunningQuery query; + + /** + * The query identifier. + */ + private final UUID queryId; + + /** + * The #of run state transitions which have occurred for this query. + */ + private long nsteps = 0; + + /** + * The #of tasks for this query which have started but not yet halted and + * ZERO (0) if this is not the query coordinator. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private long totalRunningTaskCount = 0; + + /** + * The #of chunks for this query of which a running task has made available + * but which have not yet been accepted for processing by another task and + * ZERO (0) if this is not the query coordinator. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private long totalAvailableChunkCount = 0; + + /** + * A map reporting the #of chunks available for each operator in the + * pipeline (we only report chunks for pipeline operators). The total #of + * chunks available across all operators in the pipeline is reported by + * {@link #totalAvailableChunkCount}. + * <p> + * The movement of the intermediate binding set chunks forms an acyclic + * directed graph. This map is used to track the #of chunks available for + * each bop in the pipeline. When a bop has no more incoming chunks, we send + * an asynchronous message to all nodes on which that bop had executed + * informing the {@link QueryEngine} on that node that it should immediately + * release all resources associated with that bop. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableChunkCountMap = new LinkedHashMap<Integer, AtomicLong>(); + + /** + * A collection reporting on the #of instances of a given {@link BOp} which + * are concurrently executing. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Map<Integer/* bopId */, AtomicLong/* runningCount */> runningTaskCountMap = new LinkedHashMap<Integer, AtomicLong>(); + + /** + * A collection of the operators which have executed at least once. + * <p> + * This is guarded by the {@link #runningStateLock}. + */ + private final Set<Integer/* bopId */> startedSet = new LinkedHashSet<Integer>(); + + public RunState(final RunningQuery query) { + + this.query = query; + + this.queryId = query.getQueryId(); + + // this.nops = query.bopIndex.size(); + + } + + public void startQuery(final IChunkMessage<?> msg) { + + nsteps++; + + // query.lifeCycleSetUpQuery(); + + final Integer bopId = Integer.valueOf(msg.getBOpId()); + + totalAvailableChunkCount++; + + assert totalAvailableChunkCount == 1 : "totalAvailableChunkCount=" + + totalAvailableChunkCount + " :: msg=" + msg; + + { + + AtomicLong n = availableChunkCountMap.get(bopId); + + if (n == null) + availableChunkCountMap.put(bopId, n = new AtomicLong()); + + final long tmp = n.incrementAndGet(); + + assert tmp == 1 : "availableChunkCount=" + tmp + " for bopId=" + + msg.getBOpId() + " :: msg=" + msg; + + } + + if (log.isInfoEnabled()) + log.info("queryId=" + queryId + ",totalRunningTaskCount=" + + totalRunningTaskCount + ",totalAvailableChunkCount=" + + totalAvailableChunkCount); + + if (TableLog.tableLog.isInfoEnabled()) { + /* + * Note: RunState is only used by the query controller so this will + * not do an RMI and the RemoteException will not be thrown. + */ + final UUID serviceId; + try { + serviceId = msg.getQueryController().getServiceUUID(); + } catch (RemoteException ex) { + throw new AssertionError(ex); + } + TableLog.tableLog.info("\n\nqueryId=" + queryId + "\n"); + // TableLog.tableLog.info(query.getQuery().toString()+"\n"); + TableLog.tableLog.info(getTableHeader()); + TableLog.tableLog.info(getTableRow("startQ", serviceId, + -1/* shardId */, 1/* fanIn */)); + } + + System.err.println("startQ : nstep="+nsteps+", bopId=" + bopId + + ",totalRunningTaskCount=" + totalRunningTaskCount + + ",totalAvailableTaskCount=" + totalAvailableChunkCount); + + } + + /** + * @return <code>true</code> if this is the first time we will evaluate the + * op. + */ + public boolean startOp(final StartOpMessage msg) { + + nsteps++; + + if (log.isTraceEnabled()) + log.trace(msg.toString()); + + final Integer bopId = Integer.valueOf(msg.bopId); + + totalRunningTaskCount++; + + assert totalRunningTaskCount >= 1 : "runningTaskCount=" + + totalRunningTaskCount + " :: msg=" + msg; + final boolean firstTime; + { + + AtomicLong n = runningTaskCountMap.get(bopId); + + if (n == null) + runningTaskCountMap.put(bopId, n = new AtomicLong()); + + final long tmp = n.incrementAndGet(); + + assert tmp >= 0 : "runningTaskCount=" + tmp + " for bopId=" + + msg.bopId + " :: msg=" + msg; + + firstTime = startedSet.add(bopId); + // + // // first evaluation pass for this operator. + // query.lifeCycleSetUpOperator(bopId); + // + // } + + } + + totalAvailableChunkCount -= msg.nchunks; + + assert totalAvailableChunkCount >= 0 : "totalAvailableChunkCount=" + + totalAvailableChunkCount + " :: msg=" + msg; + + { + + AtomicLong n = availableChunkCountMap.get(bopId); + + if (n == null) + throw new AssertionError(); + + final long tmp = n.addAndGet(-msg.nchunks); + + assert tmp >= 0 : "availableChunkCount=" + tmp + " for bopId=" + + msg.bopId + " :: msg=" + msg; + + } + + System.err.println("startOp: nstep="+nsteps+", bopId=" + bopId + + ",totalRunningTaskCount=" + totalRunningTaskCount + + ",totalAvailableChunkCount=" + totalAvailableChunkCount + + ",fanIn=" + msg.nchunks); + + if (TableLog.tableLog.isInfoEnabled()) { + TableLog.tableLog.info(getTableRow("startOp", msg.serviceId, + msg.partitionId, msg.nchunks/* fanIn */)); + } + + // check deadline. + final long deadline = query.getDeadline(); + if (deadline < System.currentTimeMillis()) { + + if (log.isTraceEnabled()) + log.trace("expired: queryId=" + queryId + ", deadline=" + + deadline); + + query.future.halt(new TimeoutException()); + + query.cancel(true/* mayInterruptIfRunning */); + + } + return firstTime; + } + + /** + * Update termination criteria counters. @return <code>true</code> if the + * operator life cycle is over. + */ + public boolean haltOp(final HaltOpMessage msg) { + + nsteps++; + + if (log.isTraceEnabled()) + log.trace(msg.toString()); + + // chunks generated by this task. + final int fanOut = msg.sinkChunksOut + msg.altSinkChunksOut; + { + + totalAvailableChunkCount += fanOut; + + assert totalAvailableChunkCount >= 0 : "totalAvailableChunkCount=" + + totalAvailableChunkCount + " :: msg=" + msg; + + if (msg.sinkId != null) { + AtomicLong n = availableChunkCountMap.get(msg.sinkId); + if (n == null) + availableChunkCountMap + .put(msg.sinkId, n = new AtomicLong()); + + final long tmp = n.addAndGet(msg.sinkChunksOut); + + assert tmp >= 0 : "availableChunkCount=" + tmp + " for bopId=" + + msg.sinkId + " :: msg=" + msg; + + } + + if (msg.altSinkId != null) { + + AtomicLong n = availableChunkCountMap.get(msg.altSinkId); + + if (n == null) + availableChunkCountMap.put(msg.altSinkId, + n = new AtomicLong()); + + final long tmp = n.addAndGet(msg.altSinkChunksOut); + + assert tmp >= 0 : "availableChunkCount=" + tmp + " for bopId=" + + msg.altSinkId + " :: msg=" + msg; + + } + + } + + // one less task is running. + totalRunningTaskCount--; + + assert totalRunningTaskCount >= 0 : "runningTaskCount=" + + totalRunningTaskCount + " :: msg=" + msg; + + { + + final AtomicLong n = runningTaskCountMap.get(msg.bopId); + + if (n == null) + throw new AssertionError(); + + final long tmp = n.decrementAndGet(); + + assert tmp >= 0 : "runningTaskCount=" + tmp + " for bopId=" + + msg.bopId + " :: msg=" + msg; + + } + + // Figure out if this operator is done. + final boolean isDone = isOperatorDone(msg.bopId); + + System.err.println("haltOp : nstep=" + nsteps + ", bopId=" + msg.bopId + + ",totalRunningTaskCount=" + totalRunningTaskCount + + ",totalAvailableTaskCount=" + totalAvailableChunkCount + + ",fanOut=" + fanOut); + + if (TableLog.tableLog.isInfoEnabled()) { + TableLog.tableLog.info(getTableRow("haltOp", msg.serviceId, + msg.partitionId, fanOut)); + } + + if (log.isTraceEnabled()) + log.trace("bopId=" + msg.bopId + ",partitionId=" + msg.partitionId + + ",serviceId=" + query.getQueryEngine().getServiceUUID() + + ", nchunks=" + fanOut + " : totalRunningTaskCount=" + + totalRunningTaskCount + ", totalAvailableChunkCount=" + + totalAvailableChunkCount); + + // test termination criteria + final long deadline = query.getDeadline(); + if (msg.cause != null) { + + // operator failed on this chunk. + log.error("Error: Canceling query: queryId=" + queryId + ",bopId=" + + msg.bopId + ",partitionId=" + msg.partitionId, msg.cause); + + query.future.halt(msg.cause); + + query.cancel(true/* mayInterruptIfRunning */); + + } else if (totalRunningTaskCount == 0 && totalAvailableChunkCount == 0) { + + // success (all done). + if (log.isTraceEnabled()) + log.trace("success: queryId=" + queryId); + + query.future.halt(query.getStats()); + + query.cancel(true/* mayInterruptIfRunning */); + + } else if (deadline < System.currentTimeMillis()) { + + if (log.isTraceEnabled()) + log.trace("expired: queryId=" + queryId + ", deadline=" + + deadline); + + query.future.halt(new TimeoutException()); + + query.cancel(true/* mayInterruptIfRunning */); + + } + return isDone; + } + + /** + * Return <code>true</code> the specified operator can no longer be + * triggered by the query. The specific criteria are that no operators which + * are descendants of the specified operator are running or have chunks + * available against which they could run. Under those conditions it is not + * possible for a chunk to show up which would cause the operator to be + * executed. + * + * @param bopId + * Some operator identifier. + * + * @return <code>true</code> if the operator can not be triggered given the + * current query activity. + * + * @throws IllegalMonitorStateException + * unless the {@link #runStateLock} is held by the caller. + */ + protected boolean isOperatorDone(final int bopId) { + + return PipelineUtility.isDone(bopId, query.getQuery(), query.bopIndex, + runningTaskCountMap, availableChunkCountMap); + + } + + /* + * Human readable representations of the query run state. + */ + + /** + * Human readable summary of the current {@link RunState}. + *<p> + * Note: You must holding the lock guarding the {@link RunState} to + * guarantee that will return a consistent representation. + */ + public String toString() { + + final StringBuilder sb = new StringBuilder(); + + sb.append(getClass().getName()); + sb.append("{nsteps=" + nsteps); + sb.append(",totalRunningTaskCount=" + totalRunningTaskCount); + sb.append(",totalAvailableTaskCount=" + totalAvailableChunkCount); + sb.append("}"); + + return sb.toString(); + + } + + private String getTableHeader() { + + final StringBuilder sb = new StringBuilder(); + + final Integer[] bopIds = query.bopIndex.keySet() + .toArray(new Integer[0]); + + Arrays.sort(bopIds); + + // header 2. + sb.append("step\tlabel\tshardId\tfanIO\tavail\trun"); + + for (int i = 0; i < bopIds.length; i++) { + + final Integer id = bopIds[i]; + + sb.append("\trun#" + id + "\tavail#" + id); + + } + + sb.append("\tserviceId"); + + sb.append('\n'); + + return sb.toString(); + + } + + /** + * Return a tabular representation of the query {@link RunState}. + *<p> + * Note: You must holding the lock guarding the {@link RunState} to + * guarantee that will return a consistent representation. + * + * @param label + * The state change level (startQ, startOp, haltOp). + * @param serviceId + * The node on which the operator is/was executed. + * @param shardId + * The index partition against which the operator was running and + * <code>-1</code> if the operator was not evaluated against a + * specific index partition. + * @param + * @param fanIO + * The fanIn (startQ,startOp) or fanOut (haltOp). + */ + private String getTableRow(final String label, final UUID serviceId, + final int shardId, final int fanIO) { + + final StringBuilder sb = new StringBuilder(); + + sb.append(Long.toString(nsteps)); + sb.append('\t'); + sb.append(label); + sb.append('\t'); + sb.append(Integer.toString(shardId)); + sb.append('\t'); + sb.append(Integer.toString(fanIO)); + sb.append('\t'); + sb.append(Long.toString(totalAvailableChunkCount)); + sb.append('\t'); + sb.append(Long.toString(totalRunningTaskCount)); + + final Integer[] bopIds = query.bopIndex.keySet() + .toArray(new Integer[0]); + + Arrays.sort(bopIds); + + for (int i = 0; i < bopIds.length; i++) { + + final Integer id = bopIds[i]; + + final AtomicLong nrunning = runningTaskCountMap.get(id); + + final AtomicLong navailable = availableChunkCountMap.get(id); + + sb.append("\t" + (navailable == null ? "N/A" : navailable.get())); + + sb.append("\t" + (nrunning == null ? "N/A" : nrunning.get())); + + } + + // Note: At the end to keep the table pretty. Will be null unless s/o. + sb.append('\t'); + sb.append(serviceId == null ? "N/A" : serviceId.toString()); + + sb.append('\n'); + + return sb.toString(); + + } + +} // class RunState Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -28,10 +28,7 @@ package com.bigdata.bop.engine; import java.rmi.RemoteException; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; @@ -81,8 +78,10 @@ /** * The run state of the query and the result of the computation iff it * completes execution normally (without being interrupted, cancelled, etc). + * <p> + * Note: Package private in order to expose this field to {@link RunState}. */ - final private Haltable<Map<Integer,BOpStats>> future = new Haltable<Map<Integer,BOpStats>>(); + final /*private*/ Haltable<Map<Integer,BOpStats>> future = new Haltable<Map<Integer,BOpStats>>(); /** * The runtime statistics for each {@link BOp} in the query and @@ -96,7 +95,7 @@ final private QueryEngine queryEngine; /** The unique identifier for this query. */ - final private long queryId; + final private UUID queryId; /** * The query deadline. The value is the system clock time in milliseconds @@ -149,8 +148,8 @@ private final ConcurrentHashMap<BSBundle, Future<?>> operatorFutures = new ConcurrentHashMap<BSBundle, Future<?>>(); /** - * A lock guarding {@link RunState#runningTaskCount}, - * {@link RunState#availableChunkCount}, + * A lock guarding {@link RunState#totalRunningTaskCount}, + * {@link RunState#totalAvailableChunkCount}, * {@link RunState#availableChunkCountMap}. This is <code>null</code> unless * this is the query controller. * @@ -209,6 +208,19 @@ } /** + * Return the query deadline (the time at which it will terminate regardless + * of its run state). + * + * @return The query deadline (milliseconds since the epoch) and + * {@link Long#MAX_VALUE} if no explicit deadline was specified. + */ + public long getDeadline() { + + return deadline.get(); + + } + + /** * The class executing the query on this node. */ public QueryEngine getQueryEngine() { @@ -233,7 +245,7 @@ /** * The unique identifier for this query. */ - public long getQueryId() { + public UUID getQueryId() { return queryId; @@ -283,15 +295,16 @@ * {@link ITx#UNISOLATED} nor a read-write transaction * identifier. */ - public RunningQuery(final QueryEngine queryEngine, final long queryId, -// final long begin, - final boolean controller, - final IQueryClient clientProxy, final BindingSetPipelineOp query - ) { + public RunningQuery(final QueryEngine queryEngine, final UUID queryId, + final boolean controller, final IQueryClient clientProxy, + final BindingSetPipelineOp query) { if (queryEngine == null) throw new IllegalArgumentException(); + if (queryId == null) + throw new IllegalArgumentException(); + if (clientProxy == null) throw new IllegalArgumentException(); @@ -392,6 +405,12 @@ if (!msg.isMaterialized()) throw new IllegalStateException(); + if (isCancelled()) + throw new IllegalStateException("Cancelled"); + + if (isDone()) + throw new IllegalStateException("Done"); + // verify still running. future.halted(); @@ -399,252 +418,11 @@ chunksIn.add(msg); if (log.isDebugEnabled()) - log.debug("queryId=" + queryId + ", chunksIn.size()=" - + chunksIn.size() + ", msg=" + msg); + log.debug("chunksIn.size()=" + chunksIn.size() + ", msg=" + msg); } /** - * The run state for the query. - */ - static private class RunState { - - /** - * The query. - */ - private final RunningQuery query; - - /** - * The query identifier. - */ - private final long queryId; - - /** - * The #of tasks for this query which have started but not yet halted - * and ZERO (0) if this is not the query coordinator. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private long runningTaskCount = 0; - - /** - * The #of chunks for this query of which a running task has made - * available but which have not yet been accepted for processing by - * another task and ZERO (0) if this is not the query coordinator. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private long availableChunkCount = 0; - - /** - * A map reporting the #of chunks available for each operator in the - * pipeline (we only report chunks for pipeline operators). The total - * #of chunks available across all operators in the pipeline is reported - * by {@link #availableChunkCount}. - * <p> - * The movement of the intermediate binding set chunks forms an acyclic - * directed graph. This map is used to track the #of chunks available - * for each bop in the pipeline. When a bop has no more incoming chunks, - * we send an asynchronous message to all nodes on which that bop had - * executed informing the {@link QueryEngine} on that node that it - * should immediately release all resources associated with that bop. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Map<Integer/* bopId */, AtomicLong/* availableChunkCount */> availableChunkCountMap = new LinkedHashMap<Integer, AtomicLong>(); - - /** - * A collection reporting on the #of instances of a given {@link BOp} - * which are concurrently executing. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Map<Integer/* bopId */, AtomicLong/* runningCount */> runningCountMap = new LinkedHashMap<Integer, AtomicLong>(); - - /** - * A collection of the operators which have executed at least once. - * <p> - * This is guarded by the {@link #runningStateLock}. - */ - private final Set<Integer/* bopId */> startedSet = new LinkedHashSet<Integer>(); - - public RunState(final RunningQuery query) { - - this.query = query; - - this.queryId = query.queryId; - - } - - public void startQuery(final IChunkMessage<?> msg) { - - query.lifeCycleSetUpQuery(); - - final Integer bopId = Integer.valueOf(msg.getBOpId()); - - availableChunkCount++; - { - AtomicLong n = availableChunkCountMap.get(bopId); - if (n == null) - availableChunkCountMap.put(bopId, n = new AtomicLong()); - n.incrementAndGet(); - } - - if (log.isInfoEnabled()) - log.info("queryId=" + queryId + ",runningTaskCount=" - + runningTaskCount + ",availableChunks=" - + availableChunkCount); - - System.err.println("startQ : bopId=" + bopId + ",running=" - + runningTaskCount + ",available=" + availableChunkCount); - - } - - public void startOp(final StartOpMessage msg) { - - final Integer bopId = Integer.valueOf(msg.bopId); - - runningTaskCount++; - { - AtomicLong n = runningCountMap.get(bopId); - if (n == null) - runningCountMap.put(bopId, n = new AtomicLong()); - n.incrementAndGet(); - if (startedSet.add(bopId)) { - // first evaluation pass for this operator. - query.lifeCycleSetUpOperator(bopId); - } - } - - availableChunkCount -= msg.nchunks; - - { - AtomicLong n = availableChunkCountMap.get(bopId); - if (n == null) - throw new AssertionError(); - n.addAndGet(-msg.nchunks); - } - - System.err.println("startOp: bopId=" + bopId + ",running=" - + runningTaskCount + ",available=" + availableChunkCount - + ",fanIn=" + msg.nchunks); - - // check deadline. - if (query.deadline.get() < System.currentTimeMillis()) { - - if (log.isTraceEnabled()) - log.trace("expired: queryId=" + queryId + ", deadline=" - + query.deadline); - - query.future.halt(new TimeoutException()); - - query.cancel(true/* mayInterruptIfRunning */); - - } - - } - - /** - * Update termination criteria counters. - */ - public void haltOp(final HaltOpMessage msg) { - - // chunks generated by this task. - final int fanOut = msg.sinkChunksOut + msg.altSinkChunksOut; - availableChunkCount += fanOut; - if (msg.sinkId != null) { - AtomicLong n = availableChunkCountMap.get(msg.sinkId); - if (n == null) - availableChunkCountMap - .put(msg.sinkId, n = new AtomicLong()); - n.addAndGet(msg.sinkChunksOut); - } - if (msg.altSinkId != null) { - AtomicLong n = availableChunkCountMap.get(msg.altSinkId); - if (n == null) - availableChunkCountMap.put(msg.altSinkId, - n = new AtomicLong()); - n.addAndGet(msg.altSinkChunksOut); - } - // one less task is running. - runningTaskCount--; - { - final AtomicLong n = runningCountMap.get(msg.bopId); - if (n == null) - throw new AssertionError(); - n.decrementAndGet(); - } - // Figure out if this operator is done. - if (isOperatorDone(msg.bopId)) { - /* - * No more chunks can appear for this operator so invoke its end - * of life cycle hook. - */ - query.lifeCycleTearDownOperator(msg.bopId); - } - System.err.println("haltOp : bopId=" + msg.bopId + ",running=" - + runningTaskCount + ",available=" + availableChunkCount - + ",fanOut=" + fanOut); - assert runningTaskCount >= 0 : "runningTaskCount=" - + runningTaskCount; - assert availableChunkCount >= 0 : "availableChunkCount=" - + availableChunkCount; - if (log.isTraceEnabled()) - log.trace("bopId=" + msg.bopId + ",partitionId=" - + msg.partitionId + ",serviceId=" - + query.queryEngine.getServiceUUID() + ", nchunks=" - + fanOut + " : runningTaskCount=" + runningTaskCount - + ", availableChunkCount=" + availableChunkCount); - // test termination criteria - if (msg.cause != null) { - // operator failed on this chunk. - log.error("Error: Canceling query: queryId=" + queryId - + ",bopId=" + msg.bopId + ",partitionId=" - + msg.partitionId, msg.cause); - query.future.halt(msg.cause); - query.cancel(true/* mayInterruptIfRunning */); - } else if (runningTaskCount == 0 && availableChunkCount == 0) { - // success (all done). - if (log.isTraceEnabled()) - log.trace("success: queryId=" + queryId); - query.future.halt(query.getStats()); - query.cancel(true/* mayInterruptIfRunning */); - } else if (query.deadline.get() < System.currentTimeMillis()) { - if (log.isTraceEnabled()) - log.trace("expired: queryId=" + queryId + ", deadline=" - + query.deadline); - query.future.halt(new TimeoutException()); - query.cancel(true/* mayInterruptIfRunning */); - } - } - - /** - * Return <code>true</code> the specified operator can no longer be - * triggered by the query. The specific criteria are that no operators - * which are descendants of the specified operator are running or have - * chunks available against which they could run. Under those conditions - * it is not possible for a chunk to show up which would cause the - * operator to be executed. - * - * @param bopId - * Some operator identifier. - * - * @return <code>true</code> if the operator can not be triggered given - * the current query activity. - * - * @throws IllegalMonitorStateException - * unless the {@link #runStateLock} is held by the caller. - */ - protected boolean isOperatorDone(final int bopId) { - - return PipelineUtility.isDone(bopId, query.getQuery(), - query.bopIndex, runningCountMap, availableChunkCountMap); - - } - - } // class RunState - - /** * Invoked once by the query controller with the initial * {@link IChunkMessage} which gets the query moving. */ @@ -656,17 +434,17 @@ if (msg == null) throw new IllegalArgumentException(); - if (msg.getQueryId() != queryId) // @todo equals() if queryId is UUID. + if (!queryId.equals(msg.getQueryId())) throw new IllegalArgumentException(); runStateLock.lock(); try { - + + lifeCycleSetUpQuery(); + runState.startQuery(msg); - queryEngine.acceptChunk(msg); - } finally { runStateLock.unlock(); @@ -693,8 +471,9 @@ try { - runState.startOp(msg); - + if (runState.startOp(msg)) + lifeCycleSetUpOperator(msg.bopId); + } finally { runStateLock.unlock(); @@ -729,7 +508,16 @@ try { - runState.haltOp(msg); + if (runState.haltOp(msg)) { + + /* + * No more chunks can appear for this operator so invoke its end + * of life cycle hook. + */ + + lifeCycleTearDownOperator(msg.bopId); + + } } finally { @@ -996,19 +784,49 @@ altSinkChunksOut += handleOutputChunk(altSinkId, altSink); } - clientProxy.haltOp(new HaltOpMessage(queryId, bopId, - partitionId, serviceId, null/* cause */, - sinkId, sinkChunksOut, altSinkId, - altSinkChunksOut, context.getStats())); + final HaltOpMessage msg = new HaltOpMessage(queryId, bopId, + partitionId, serviceId, null/* cause */, sinkId, + sinkChunksOut, altSinkId, altSinkChunksOut, context + .getStats()); + clientProxy.haltOp(msg); } catch (Throwable t) { - try { - clientProxy.haltOp(new HaltOpMessage(queryId, - bopId, partitionId, serviceId, - t/* cause */, sinkId, sinkChunksOut, altSinkId, - altSinkChunksOut, context.getStats())); - } catch (RemoteException e) { - cancel(true/* mayInterruptIfRunning */); - log.error("queryId=" + queryId + ", bopId=" + bopId, e); + /* + * Mark the query as halted on this node regardless of whether + * we are able to communicate with the query controller. + * + * Note: Invoking halt(t) here will log an error. This logged + * error message is necessary in order to catch errors in + * clientProxy.haltOp() (above and below). + */ + // Note: uncomment if paranoid about masked errors after the 1st reported error. +// log.error("queryId=" + queryId + ", bopId=" + bopId, t); + + if (t == future.halt(t)) { + /* + * Send the halt message to the query controller. + * + * Note: Since the exception return from halt(t) is our + * exception, we are responsible for communicating this + * exception to the query controller. If that message does + * not arrive then the query controller will not know that + * we have terminated the query. This can result in a long + * running query which must be explicitly cancelled on the + * query controller. + * + * @todo if we are unable to send the message to the query + * controller then we could retry each time an error is + * thrown for this query. + */ + final HaltOpMessage msg = new HaltOpMessage(queryId, bopId, + partitionId, serviceId, t/* cause */, sinkId, + sinkChunksOut, altSinkId, altSinkChunksOut, context + .getStats()); + try { + clientProxy.haltOp(msg); + } catch (RemoteException e) { + cancel(true/* mayInterruptIfRunning */); + log.error("queryId=" + queryId + ", bopId=" + bopId, e); + } } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StartOpMessage.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -22,7 +22,7 @@ private static final long serialVersionUID = 1L; /** The query identifier. */ - final public long queryId; + final public UUID queryId; /** The operator identifier. */ final public int bopId; @@ -39,7 +39,7 @@ */ final public int nchunks; - public StartOpMessage(final long queryId, final int opId, + public StartOpMessage(final UUID queryId, final int opId, final int partitionId, final UUID serviceId, final int nchunks) { this.queryId = queryId; this.bopId = opId; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-09-15 14:30:14 UTC (rev 3556) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java 2010-09-15 15:54:52 UTC (rev 3557) @@ -70,6 +70,11 @@ .getLogger(FederatedQueryEngine.class); /** + * The {@link UUID} associated with this service. + */ + private final UUID serviceUUID; + + /** * The {@link IBigdataFederation} iff running in scale-out. * <p> * Note: The {@link IBigdataFederation} is required in scale-out in order to @@ -99,7 +104,7 @@ @Override public UUID getServiceUUID() { - return fed.getServiceUUID(); + return serviceUUID; } @@ -127,7 +132,7 @@ * {@inheritDoc} */ @Override - protected FederatedRunningQuery getRunningQuery(final long queryId) { + protected FederatedRunningQuery getRunningQuery(final UUID queryId) { return (FederatedRunningQuery) super.getRunningQuery(queryId); @@ -147,10 +152,10 @@ */ public FederatedQueryEngine(final DataService dataService) { - this(dataService.getFederation(), + this(dataService.getServiceUUID(), dataService.getFederation(), new DelegateIndexManager(dataService), dataService .getResourceManager().getResourceService()); - + } /** @@ -164,6 +169,7 @@ * @param resourceService */ public FederatedQueryEngine(// + final UUID thisService, final IBigdataFederation<?> fed,// final IIndexManager indexManager,// final ManagedResourceService resourceService// @@ -179,6 +185,8 @@ this.fed = fed; + this.serviceUUID = thisService; + this.resourceService = resourceService; } @@ -277,6 +285,7 @@ if(!accept(msg)) { if(log.isDebugEnabled()) log.debug("dropping: " + msg); + continue; } if(log.isDebugEnabled()) log.debug("accepted: " + msg); @@ -287,7 +296,7 @@ * etc. */ FederatedQueryEngine.this - .bufferReady((IChunkMessage) msg); + .acceptChunk((IChunkMessage) msg); } catch(Throwable t) { if(InnerCause.isInnerCause(t, InterruptedException.class)) { log.warn("Interrupted."); @@ -318,7 +327,7 @@ */ private boolean accept(final IChunkMessage<?> msg) throws RemoteException { - final long queryId = msg.getQueryId(); + final UUID queryId = msg.getQueryId(); // lookup query by id. FederatedRunningQuery q = getRunningQuery(queryId); @@ -385,7 +394,7 @@ public void declareQuery(final IQueryDecl queryDecl) { - final long queryId = queryDecl.getQueryId(); + final UUID queryId = queryDecl.getQueryId(); putRunningQuery(queryId, newRunningQuery(this, queryId, false/* controller */, queryDecl.getQueryController(), @@ -411,7 +420,7 @@ */ @Override protected FederatedRunningQuery newRunningQuery( - final QueryEngine queryEngine, final long queryId, + final QueryEngine queryEngine, final UUID queryId, final boolean controller, final IQueryC... [truncated message content] |
From: <tho...@us...> - 2010-09-15 20:04:12
|
Revision: 3559 http://bigdata.svn.sourceforge.net/bigdata/?rev=3559&view=rev Author: thompsonbry Date: 2010-09-15 20:04:06 +0000 (Wed, 15 Sep 2010) Log Message: ----------- Cleaning up System.err usage. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-15 19:52:00 UTC (rev 3558) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-15 20:04:06 UTC (rev 3559) @@ -538,8 +538,8 @@ */ protected void lifeCycleSetUpOperator(final int bopId) { - System.err.println("lifeCycleSetUpOperator: queryId=" + queryId - + ", bopId=" + bopId); + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); } @@ -556,8 +556,8 @@ */ protected void lifeCycleTearDownOperator(final int bopId) { - System.err.println("lifeCycleTearDownOperator: queryId=" + queryId - + ", bopId=" + bopId); + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); } @@ -567,7 +567,8 @@ */ protected void lifeCycleSetUpQuery() { - System.err.println("lifeCycleSetUpQuery: queryId=" + queryId); + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); } @@ -577,7 +578,8 @@ */ protected void lifeCycleTearDownQuery() { - System.err.println("lifeCycleTearDownQuery: queryId=" + queryId); + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-09-15 19:52:00 UTC (rev 3558) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java 2010-09-15 20:04:06 UTC (rev 3559) @@ -224,14 +224,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(1, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the query solution stats. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println(stats.toString()); + if (log.isInfoEnabled()) + log.info(stats.toString()); // query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -314,14 +316,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(2, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: "+stats.toString()); + if (log.isInfoEnabled()) + log.info("start: "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -330,24 +334,12 @@ assertEquals(1L, stats.chunksOut.get()); } -// // validate the stats for the access path. -// { -// final BOpStats stats = statsMap.get(predId); -// assertNotNull(stats); -// System.err.println("pred : "+stats.toString()); -// -// // verify query solution stats details. -// assertEquals(1L, stats.chunksIn.get()); -// assertEquals(1L, stats.unitsIn.get()); -// assertEquals(1L, stats.unitsOut.get()); -// assertEquals(1L, stats.chunksOut.get()); -// } - // validate the stats for the join operator. { final BOpStats stats = statsMap.get(joinId); assertNotNull(stats); - System.err.println("join : "+stats.toString()); + if (log.isInfoEnabled()) + log.info("join : "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -525,14 +517,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(3, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -541,24 +535,12 @@ assertEquals(1L, stats.chunksOut.get()); } - // // validate the stats for the access path. - // { - // final BOpStats stats = statsMap.get(predId); - // assertNotNull(stats); - // System.err.println("pred : "+stats.toString()); - // - // // verify query solution stats details. - // assertEquals(1L, stats.chunksIn.get()); - // assertEquals(1L, stats.unitsIn.get()); - // assertEquals(1L, stats.unitsOut.get()); - // assertEquals(1L, stats.chunksOut.get()); - // } - // validate the stats for the 1st join operator. { final BOpStats stats = statsMap.get(joinId1); assertNotNull(stats); - System.err.println("join1: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -571,7 +553,8 @@ { final BOpStats stats = statsMap.get(joinId2); assertNotNull(stats); - System.err.println("join2: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-09-15 19:52:00 UTC (rev 3558) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java 2010-09-15 20:04:06 UTC (rev 3559) @@ -368,14 +368,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(1, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the query solution stats. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println(stats.toString()); + if (log.isInfoEnabled()) + log.info(stats.toString()); // query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -475,14 +477,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(2, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: "+stats.toString()); + if (log.isInfoEnabled()) + log.info("start: "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -491,24 +495,12 @@ assertEquals(1L, stats.chunksOut.get()); } -// // validate the stats for the access path. -// { -// final BOpStats stats = statsMap.get(predId); -// assertNotNull(stats); -// System.err.println("pred : "+stats.toString()); -// -// // verify query solution stats details. -// assertEquals(1L, stats.chunksIn.get()); -// assertEquals(1L, stats.unitsIn.get()); -// assertEquals(1L, stats.unitsOut.get()); -// assertEquals(1L, stats.chunksOut.get()); -// } - // validate the stats for the join operator. { final BOpStats stats = statsMap.get(joinId); assertNotNull(stats); - System.err.println("join : "+stats.toString()); + if (log.isInfoEnabled()) + log.info("join : "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -698,14 +690,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(3, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -714,24 +708,12 @@ assertEquals(1L, stats.chunksOut.get()); } - // // validate the stats for the access path. - // { - // final BOpStats stats = statsMap.get(predId); - // assertNotNull(stats); - // System.err.println("pred : "+stats.toString()); - // - // // verify query solution stats details. - // assertEquals(1L, stats.chunksIn.get()); - // assertEquals(1L, stats.unitsIn.get()); - // assertEquals(1L, stats.unitsOut.get()); - // assertEquals(1L, stats.chunksOut.get()); - // } - // validate the stats for the 1st join operator. { final BOpStats stats = statsMap.get(joinId1); assertNotNull(stats); - System.err.println("join1: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -744,7 +726,8 @@ { final BOpStats stats = statsMap.get(joinId2); assertNotNull(stats); - System.err.println("join2: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); // @todo depends on where the shards are. @@ -757,7 +740,8 @@ { final BOpStats stats = statsMap.get(sliceId); assertNotNull(stats); - System.err.println("slice: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("slice: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); // @todo? Modified: branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java 2010-09-15 19:52:00 UTC (rev 3558) +++ branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java 2010-09-15 20:04:06 UTC (rev 3559) @@ -407,14 +407,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(1, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the query solution stats. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println(stats.toString()); + if (log.isInfoEnabled()) + log.info(stats.toString()); // query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -514,14 +516,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(2, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: "+stats.toString()); + if (log.isInfoEnabled()) + log.info("start: "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -530,24 +534,12 @@ assertEquals(1L, stats.chunksOut.get()); } -// // validate the stats for the access path. -// { -// final BOpStats stats = statsMap.get(predId); -// assertNotNull(stats); -// System.err.println("pred : "+stats.toString()); -// -// // verify query solution stats details. -// assertEquals(1L, stats.chunksIn.get()); -// assertEquals(1L, stats.unitsIn.get()); -// assertEquals(1L, stats.unitsOut.get()); -// assertEquals(1L, stats.chunksOut.get()); -// } - // validate the stats for the join operator. { final BOpStats stats = statsMap.get(joinId); assertNotNull(stats); - System.err.println("join : "+stats.toString()); + if (log.isInfoEnabled()) + log.info("join : "+stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -737,14 +729,16 @@ // validate the stats map. assertNotNull(statsMap); assertEquals(3, statsMap.size()); - System.err.println(statsMap.toString()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); } // validate the stats for the start operator. { final BOpStats stats = statsMap.get(startId); assertNotNull(stats); - System.err.println("start: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("start: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -753,24 +747,12 @@ assertEquals(1L, stats.chunksOut.get()); } - // // validate the stats for the access path. - // { - // final BOpStats stats = statsMap.get(predId); - // assertNotNull(stats); - // System.err.println("pred : "+stats.toString()); - // - // // verify query solution stats details. - // assertEquals(1L, stats.chunksIn.get()); - // assertEquals(1L, stats.unitsIn.get()); - // assertEquals(1L, stats.unitsOut.get()); - // assertEquals(1L, stats.chunksOut.get()); - // } - // validate the stats for the 1st join operator. { final BOpStats stats = statsMap.get(joinId1); assertNotNull(stats); - System.err.println("join1: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join1: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); @@ -783,7 +765,8 @@ { final BOpStats stats = statsMap.get(joinId2); assertNotNull(stats); - System.err.println("join2: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("join2: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); // @todo depends on where the shards are. @@ -796,7 +779,8 @@ { final BOpStats stats = statsMap.get(sliceId); assertNotNull(stats); - System.err.println("slice: " + stats.toString()); + if (log.isInfoEnabled()) + log.info("slice: " + stats.toString()); // verify query solution stats details. assertEquals(1L, stats.chunksIn.get()); // @todo? This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-09-15 21:43:36
|
Revision: 3561 http://bigdata.svn.sourceforge.net/bigdata/?rev=3561&view=rev Author: mrpersonick Date: 2010-09-15 21:43:28 +0000 (Wed, 15 Sep 2010) Log Message: ----------- adding Sesame to BOp conversion Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataTripleSource.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/ChunkedArraysIterator.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BigdataBindingSetResolverator.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/Var.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -2,7 +2,6 @@ import java.io.ObjectStreamException; import java.util.UUID; - import com.bigdata.cache.ConcurrentWeakValueCache; import com.bigdata.relation.rule.Rule; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -365,6 +365,16 @@ } + public Predicate<E> setBOpId(final int bopId) { + + final Predicate<E> tmp = this.clone(); + + tmp.annotations.put(Annotations.BOP_ID, bopId); + + return tmp; + + } + public String toString() { return toString(null/* bindingSet */); @@ -377,6 +387,8 @@ final StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append("("); for (int i = 0; i < arity; i++) { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -27,7 +27,19 @@ package com.bigdata.bop.engine; +import java.util.Iterator; +import java.util.List; import com.bigdata.bop.BOp; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IPredicate; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.Var; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.bset.CopyBindingSetOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.journal.ITx; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.relation.rule.IProgram; import com.bigdata.relation.rule.IRule; @@ -60,10 +72,15 @@ * * @return */ - public static BOp convert(final IStep step) { + public static BindingSetPipelineOp convert(final IStep step) { + if (step instanceof Rule) + return convert((Rule) step); + else if (step instanceof Program) + return convert((Program) step); + throw new UnsupportedOperationException(); - + } /** @@ -73,12 +90,71 @@ * * @return */ - public static BOp convert(final Rule rule) { + public static BindingSetPipelineOp convert(final Rule rule) { - throw new UnsupportedOperationException(); + int bopId = 1; + + BindingSetPipelineOp left = new CopyBindingSetOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, bopId++),// + })); + + Iterator<Predicate> tails = rule.getTail(); + + while (tails.hasNext()) { + + final int joinId = bopId++; + + final Predicate<?> pred = tails.next().setBOpId(bopId++); + + System.err.println(pred); + + final BindingSetPipelineOp joinOp = new PipelineJoin<E>(// + left, pred,// + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, joinId),// + })); + + left = joinOp; + + } + + System.err.println(toString(left)); + + return left; + + } + + private static String toString(BOp bop) { + + StringBuilder sb = new StringBuilder(); + + toString(bop, sb, 0); + + // chop off the last \n + sb.setLength(sb.length()-1); + + return sb.toString(); + + } + + private static void toString(final BOp bop, final StringBuilder sb, + final int indent) { + + for (int i = 0; i < indent; i++) { + sb.append(' '); + } + sb.append(bop).append('\n'); + if (bop != null) { + List<BOp> args = bop.args(); + for (BOp arg : args) { + toString(arg, sb, indent+4); + } + } + } - + /** * Convert a program into an operator tree. * @@ -86,7 +162,7 @@ * * @return */ - public static BOp convert(final Program program) { + public static BindingSetPipelineOp convert(final Program program) { throw new UnsupportedOperationException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/DefaultEvaluationPlan2.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -31,9 +31,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; - import org.apache.log4j.Logger; - import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.journal.ITx; @@ -64,7 +62,7 @@ * @todo not serializable but used by {@link #rangeCount(int)}, which is a * problem. */ - private final IJoinNexus joinNexus; + private final IRangeCountFactory rangeCountFactory; private final IRule rule; @@ -145,15 +143,31 @@ * @param rule * The rule. */ - public DefaultEvaluationPlan2(IJoinNexus joinNexus, IRule rule) { + public DefaultEvaluationPlan2(final IJoinNexus joinNexus, + final IRule rule) { - if (joinNexus == null) + this(joinNexus.getRangeCountFactory(), rule); + + } + + /** + * Computes an evaluation plan for the rule. + * + * @param rangeCountFactory + * The range count factory. + * @param rule + * The rule. + */ + public DefaultEvaluationPlan2(final IRangeCountFactory rangeCountFactory, + final IRule rule) { + + if (rangeCountFactory == null) throw new IllegalArgumentException(); if (rule == null) throw new IllegalArgumentException(); - this.joinNexus = joinNexus; + this.rangeCountFactory = rangeCountFactory; this.rule = rule; @@ -439,7 +453,7 @@ } - final long rangeCount = joinNexus.getRangeCountFactory() + final long rangeCount = rangeCountFactory .rangeCount(rule.getTail(tailIndex)); this.rangeCount[tailIndex] = rangeCount; Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/ChunkedArraysIterator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/ChunkedArraysIterator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/striterator/ChunkedArraysIterator.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -0,0 +1,309 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Oct 24, 2007 + */ + +package com.bigdata.striterator; + +import java.util.Arrays; +import java.util.NoSuchElementException; + +/** + * Fully buffered iterator. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: ChunkedArrayIterator.java 2265 2009-10-26 12:51:06Z thompsonbry $ + */ +public class ChunkedArraysIterator<E> implements IChunkedOrderedIterator<E> { + + private boolean open = true; + + /** buffer iterator. */ + private ICloseableIterator<E[]> bufferIt; + + /** current buffer. */ + private E[] buffer; + + /** The order of the elements in the buffer or <code>null</code> iff not known. */ + private final IKeyOrder<E> keyOrder; + + /** + * The index of the next entry in {@link #buffer} that will be returned by + * {@link #next()}. + */ + private int i = 0; + +// /** +// * The element most recently returned by {@link #next()}. +// */ +// private E current = null; + +// /** +// * The #of elements that this iterator buffered. +// */ +// public int getBufferCount() { +// +// return bufferCount; +// +// } + + /** + * An iterator that visits the elements in the given iterator of arrays. + * + * @param a + * The iterator of arrays of elements. + */ + public ChunkedArraysIterator(final ICloseableIterator<E[]> a) { + + this(a, null); + + } + + /** + * An iterator that visits the elements in the given iterator of arrays. + * + * @param a + * The iterator of arrays of elements. + * @param keyOrder + * The order of the elements in the buffer or <code>null</code> + * iff not known. + */ + public ChunkedArraysIterator(final ICloseableIterator<E[]> a, + final IKeyOrder<E> keyOrder) { + + if (a == null) + throw new IllegalArgumentException(); + + this.bufferIt = a; + + this.keyOrder = keyOrder; + + } + + public boolean hasNext() { + + if(!open) return false; + + if (buffer == null) { + + return bufferIt.hasNext(); + + } +// else { +// +// assert i <= buffer.length; +// +// if (i == buffer.length) { +// +// return false; +// +// } +// +// } + + return true; + + } + + public E next() { + + if (!hasNext()) { + + throw new NoSuchElementException(); + + } + + if (buffer == null) { + + buffer = bufferIt.next(); + + } + + E e = buffer[i++]; + + if (i == buffer.length) { + + buffer = null; + + i = 0; + + } + + return e; + +// current = buffer[i++]; +// +// return current; + + } + + /** + * @throws UnsupportedOperationException + */ + public void remove() { + + throw new UnsupportedOperationException(); + + } + +// /** +// * Return the backing array. +// * +// * @see #getBufferCount() +// */ +// public E[] array() { +// +// assertOpen(); +// +// return buffer; +// +// } + + /** + * Returns the remaining statements. + * + * @throws NoSuchElementException + * if {@link #hasNext()} returns false. + */ + @SuppressWarnings("unchecked") + public E[] nextChunk() { + + if (!hasNext()) { + + throw new NoSuchElementException(); + + } + + final E[] ret; + + if (buffer == null) { + + /* + * We need to fetch the next buffer from the source iterator, and + * then we can just return it directly. + */ + buffer = bufferIt.next(); + + ret = buffer; + + } else if (i == 0) { + + /* + * Nothing has been returned to the caller by next() so we can just + * return the current buffer in this case. + */ + ret = buffer; + + } else { + + /* + * We have a buffer but we've already started return elements from + * it via next(), so we need to create a new buffer to return. + */ + final int remaining = buffer.length - i; + + /* + * Dynamically instantiation an array of the same component type + * as the objects that we are visiting. + */ + + ret = (E[]) java.lang.reflect.Array.newInstance(buffer.getClass() + .getComponentType(), remaining); + + + System.arraycopy(buffer, i, ret, 0, remaining); + + } + + // reset the current buffer + + buffer = null; + + i = 0; + + return ret; + + } + + public IKeyOrder<E> getKeyOrder() { + + return keyOrder; + + } + + public E[] nextChunk(IKeyOrder<E> keyOrder) { + + if (keyOrder == null) + throw new IllegalArgumentException(); + + final E[] chunk = nextChunk(); + + if (!keyOrder.equals(getKeyOrder())) { + + // sort into the required order. + + Arrays.sort(chunk, 0, chunk.length, keyOrder.getComparator()); + + } + + return chunk; + + } + + /* + * Note: Do NOT eagerly close the iterator since the makes it impossible to + * implement {@link #remove()}. + */ + public void close() { + + if (!open) { + + // already closed. + + return; + + } + + bufferIt.close(); + + open = false; + + buffer = null; + + i = 0; + + } + +// private final void assertOpen() { +// +// if (!open) { +// +// throw new IllegalStateException(); +// +// } +// +// } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/internal/IVUtility.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -136,10 +136,8 @@ return iv1.compareTo(iv2); // otherwise we need to try to convert them into comparable numbers - final AbstractLiteralIV num1 = - (AbstractLiteralIV) iv1; - final AbstractLiteralIV num2 = - (AbstractLiteralIV) iv2; + final AbstractLiteralIV num1 = (AbstractLiteralIV) iv1; + final AbstractLiteralIV num2 = (AbstractLiteralIV) iv2; // if one's a BigDecimal we should use the BigDecimal comparator for both if (dte1 == DTE.XSDDecimal || dte2 == DTE.XSDDecimal) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/rules/RDFJoinNexus.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -605,7 +605,7 @@ } - final SPORelation r = (SPORelation) (IMutableRelation<?>) getRelation(); + final SPORelation r = (SPORelation) (IMutableRelation) getRelation(); /* * Use a thread pool to write out the statement and the Added: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BigdataBindingSetResolverator.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BigdataBindingSetResolverator.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BigdataBindingSetResolverator.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -0,0 +1,222 @@ +package com.bigdata.rdf.store; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; + +import org.openrdf.model.Value; + +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IVariable; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.rule.eval.ISolution; +import com.bigdata.striterator.AbstractChunkedResolverator; +import com.bigdata.striterator.IChunkedOrderedIterator; + +/** + * Efficiently resolve term identifiers in Bigdata {@link ISolution}s to RDF + * {@link BigdataValue}s. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: BigdataSolutionResolverator.java 3448 2010-08-18 20:55:58Z thompsonbry $ + */ +public class BigdataBindingSetResolverator + extends + AbstractChunkedResolverator<IBindingSet, IBindingSet, AbstractTripleStore> { + + /** + * + * @param db + * Used to resolve term identifiers to {@link Value} objects. + * @param src + * The source iterator (will be closed when this iterator is + * closed). + * + * FIXME must accept reverse bnodes map (from term identifier to + * blank nodes) for resolution of blank nodes within a Sesame + * connection context. + */ + public BigdataBindingSetResolverator(final AbstractTripleStore db, + final IChunkedOrderedIterator<IBindingSet> src) { + + super(db, src, new BlockingBuffer<IBindingSet[]>( + db.getChunkOfChunksCapacity(), + db.getChunkCapacity(), + db.getChunkTimeout(), + TimeUnit.MILLISECONDS)); + + } + + /** + * Strengthens the return type. + */ + public BigdataBindingSetResolverator start(ExecutorService service) { + + return (BigdataBindingSetResolverator) super.start(service); + + } + + /** + * Resolve a chunk of {@link ISolution}s into a chunk of + * {@link IBindingSet}s in which term identifiers have been resolved to + * {@link BigdataValue}s. + */ + protected IBindingSet[] resolveChunk(final IBindingSet[] chunk) { + + if (log.isInfoEnabled()) + log.info("Fetched chunk: size=" + chunk.length); + + /* + * Create a collection of the distinct term identifiers used in this + * chunk. + */ + + final Collection<IV> ids = new HashSet<IV>(chunk.length + * state.getSPOKeyArity()); + + for (IBindingSet solution : chunk) { + + final IBindingSet bindingSet = solution; + + assert bindingSet != null; + + final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet + .iterator(); + + while (itr.hasNext()) { + + final Map.Entry<IVariable, IConstant> entry = itr.next(); + + final IV iv = (IV) entry.getValue().get(); + + if (iv == null) { + + throw new RuntimeException("NULL? : var=" + entry.getKey() + + ", " + bindingSet); + + } + + ids.add(iv); + + } + + } + + if (log.isInfoEnabled()) + log.info("Resolving " + ids.size() + " term identifiers"); + + // batch resolve term identifiers to terms. + final Map<IV, BigdataValue> terms = state.getLexiconRelation() + .getTerms(ids); + + /* + * Assemble a chunk of resolved elements. + */ + { + + final IBindingSet[] chunk2 = new IBindingSet[chunk.length]; + int i = 0; + for (IBindingSet e : chunk) { + + final IBindingSet f = getBindingSet(e, terms); + + chunk2[i++] = f; + + } + + // return the chunk of resolved elements. + return chunk2; + + } + + } + + /** + * Resolve the term identifiers in the {@link ISolution} using the map + * populated when we fetched the current chunk and return the + * {@link IBindingSet} for that solution in which term identifiers have been + * resolved to their corresponding {@link BigdataValue}s. + * + * @param solution + * A solution whose {@link Long}s will be interpreted as term + * identifiers and resolved to the corresponding + * {@link BigdataValue}s. + * + * @return The corresponding {@link IBindingSet} in which the term + * identifiers have been resolved to {@link BigdataValue}s. + * + * @throws IllegalStateException + * if the {@link IBindingSet} was not materialized with the + * {@link ISolution}. + * + * @todo this points out a problem where we would be better off strongly + * typing the term identifiers with their own class rather than using + * {@link Long} since we can not distinguish a {@link Long} + * materialized by a join against some non-RDF relation from a + * {@link Long} that is a term identifier. + */ + private IBindingSet getBindingSet(final IBindingSet solution, + final Map<IV, BigdataValue> terms) { + + if (solution == null) + throw new IllegalArgumentException(); + + if (terms == null) + throw new IllegalArgumentException(); + + final IBindingSet bindingSet = solution; + + if(bindingSet == null) { + + throw new IllegalStateException("BindingSet was not materialized"); + + } + + final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet + .iterator(); + + while (itr.hasNext()) { + + final Map.Entry<IVariable, IConstant> entry = itr.next(); + + final Object boundValue = entry.getValue().get(); + + if (!(boundValue instanceof IV)) { + + continue; + + } + + final IV iv = (IV) boundValue; + + final BigdataValue value = terms.get(iv); + + if (value == null) { + + throw new RuntimeException("Could not resolve termId=" + + iv); + } + + /* + * Replace the binding. + * + * FIXME This probably needs to strip out the BigdataSail#NULL_GRAPH + * since that should not become bound. + */ + bindingSet.set(entry.getKey(), new Constant<BigdataValue>( + value)); + + } + + return bindingSet; + + } + +} Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -13,6 +13,7 @@ import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.UUID; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import org.openrdf.model.Literal; @@ -48,7 +49,9 @@ import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; import com.bigdata.BigdataStatics; +import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.Constant; +import com.bigdata.bop.HashBindingSet; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; @@ -61,6 +64,10 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.engine.Rule2BOpUtility; +import com.bigdata.bop.engine.RunningQuery; import com.bigdata.bop.solutions.ISortOrder; import com.bigdata.btree.keys.IKeyBuilderFactory; import com.bigdata.rdf.internal.DummyIV; @@ -74,7 +81,6 @@ import com.bigdata.rdf.internal.constraints.InlineNE; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; -import com.bigdata.rdf.rules.RuleContextEnum; import com.bigdata.rdf.sail.BigdataSail.Options; import com.bigdata.rdf.spo.DefaultGraphSolutionExpander; import com.bigdata.rdf.spo.ExplicitSPOFilter; @@ -84,11 +90,13 @@ import com.bigdata.rdf.spo.SPOStarJoin; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; -import com.bigdata.rdf.store.BigdataSolutionResolverator; +import com.bigdata.rdf.store.BigdataBindingSetResolverator; import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; +import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.accesspath.IElementFilter; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; import com.bigdata.relation.rule.IProgram; import com.bigdata.relation.rule.IQueryOptions; import com.bigdata.relation.rule.IRule; @@ -97,17 +105,13 @@ import com.bigdata.relation.rule.Program; import com.bigdata.relation.rule.QueryOptions; import com.bigdata.relation.rule.Rule; -import com.bigdata.relation.rule.eval.ActionEnum; -import com.bigdata.relation.rule.eval.DefaultEvaluationPlanFactory2; -import com.bigdata.relation.rule.eval.IEvaluationPlanFactory; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.relation.rule.eval.IJoinNexusFactory; import com.bigdata.relation.rule.eval.IRuleTaskFactory; import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.relation.rule.eval.NestedSubqueryWithJoinThreadsTask; import com.bigdata.relation.rule.eval.RuleStats; import com.bigdata.search.FullTextIndex; import com.bigdata.search.IHit; +import com.bigdata.striterator.ChunkedArraysIterator; import com.bigdata.striterator.DistinctFilter; import com.bigdata.striterator.IChunkedOrderedIterator; @@ -473,6 +477,18 @@ return super.evaluate(union, bindings); + } catch (Exception ex) { + + // Use Sesame 2 evaluation + + ex.printStackTrace(); + + if (log.isInfoEnabled()) { + log.info("could not evaluate natively, punting to Sesame"); + } + + return super.evaluate(union, bindings); + } } @@ -590,6 +606,18 @@ return super.evaluate(join, bindings); + } catch (Exception ex) { + + // Use Sesame 2 evaluation + + ex.printStackTrace(); + + if (log.isInfoEnabled()) { + log.info("could not evaluate natively, punting to Sesame"); + } + + return super.evaluate(join, bindings); + } } @@ -682,6 +710,18 @@ return super.evaluate(join, bindings); + } catch (Exception ex) { + + // Use Sesame 2 evaluation + + ex.printStackTrace(); + + if (log.isInfoEnabled()) { + log.info("could not evaluate natively, punting to Sesame"); + } + + return super.evaluate(join, bindings); + } } @@ -1598,64 +1638,91 @@ */ protected CloseableIteration<BindingSet, QueryEvaluationException> execute( final IStep step) - throws QueryEvaluationException { + throws Exception { - final boolean backchain = // - tripleSource.getDatabase().getAxioms().isRdfSchema() - && tripleSource.includeInferred - && tripleSource.conn.isQueryTimeExpander(); + final BindingSetPipelineOp query = Rule2BOpUtility.convert(step); - if (log.isDebugEnabled()) { - log.debug("Running tupleExpr as native rule:\n" + step); - log.debug("backchain: " + backchain); + if (log.isInfoEnabled()) { + log.info(query); } - // run the query as a native rule. - final IChunkedOrderedIterator<ISolution> itr1; - try { - final IEvaluationPlanFactory planFactory = - DefaultEvaluationPlanFactory2.INSTANCE; - - /* - * alternative evaluation orders for LUBM Q9 (default is 1 4, 2, 3, - * 0, 5). All three evaluation orders are roughly as good as one - * another. Note that tail[2] (z rdf:type ...) is entailed by the - * ontology and could be dropped from evaluation. - */ - // final IEvaluationPlanFactory planFactory = new - // FixedEvaluationPlanFactory( - // // new int[] { 1, 4, 3, 0, 5, 2 } good - // // new int[] { 1, 3, 0, 4, 5, 2 } good - // ); - - final IJoinNexusFactory joinNexusFactory = database - .newJoinNexusFactory(RuleContextEnum.HighLevelQuery, - ActionEnum.Query, IJoinNexus.BINDINGS, - null, // filter - false, // justify - backchain, // - planFactory, // - queryHints - ); - - final IJoinNexus joinNexus = joinNexusFactory.newInstance(database - .getIndexManager()); - itr1 = joinNexus.runQuery(step); - - } catch (Exception ex) { - throw new QueryEvaluationException(ex); - } + final int startId = query.getProperty(Predicate.Annotations.BOP_ID); - /* - * Efficiently resolve term identifiers in Bigdata ISolutions to RDF - * Values in Sesame 2 BindingSets and align the resulting iterator with - * the Sesame 2 API. - */ + final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); + + final UUID queryId = UUID.randomUUID(); + final RunningQuery runningQuery = queryEngine.eval(queryId, query, + new LocalChunkMessage<IBindingSet>(queryEngine, queryId, + startId, -1/* partitionId */, + newBindingSetIterator(new HashBindingSet()))); + + final IAsynchronousIterator<IBindingSet[]> it1 = + runningQuery.iterator(); + + final IChunkedOrderedIterator<IBindingSet> it2 = + new ChunkedArraysIterator<IBindingSet>(it1); + CloseableIteration<BindingSet, QueryEvaluationException> result = new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( - new BigdataSolutionResolverator(database, itr1).start(database + new BigdataBindingSetResolverator(database, it2).start(database .getExecutorService())); +// final boolean backchain = // +// tripleSource.getDatabase().getAxioms().isRdfSchema() +// && tripleSource.includeInferred +// && tripleSource.conn.isQueryTimeExpander(); +// +// if (log.isDebugEnabled()) { +// log.debug("Running tupleExpr as native rule:\n" + step); +// log.debug("backchain: " + backchain); +// } +// +// // run the query as a native rule. +// final IChunkedOrderedIterator<ISolution> itr1; +// try { +// final IEvaluationPlanFactory planFactory = +// DefaultEvaluationPlanFactory2.INSTANCE; +// +// /* +// * alternative evaluation orders for LUBM Q9 (default is 1 4, 2, 3, +// * 0, 5). All three evaluation orders are roughly as good as one +// * another. Note that tail[2] (z rdf:type ...) is entailed by the +// * ontology and could be dropped from evaluation. +// */ +// // final IEvaluationPlanFactory planFactory = new +// // FixedEvaluationPlanFactory( +// // // new int[] { 1, 4, 3, 0, 5, 2 } good +// // // new int[] { 1, 3, 0, 4, 5, 2 } good +// // ); +// +// final IJoinNexusFactory joinNexusFactory = database +// .newJoinNexusFactory(RuleContextEnum.HighLevelQuery, +// ActionEnum.Query, IJoinNexus.BINDINGS, +// null, // filter +// false, // justify +// backchain, // +// planFactory, // +// queryHints +// ); +// +// final IJoinNexus joinNexus = joinNexusFactory.newInstance(database +// .getIndexManager()); +// itr1 = joinNexus.runQuery(step); +// +// } catch (Exception ex) { +// throw new QueryEvaluationException(ex); +// } +// +// /* +// * Efficiently resolve term identifiers in Bigdata ISolutions to RDF +// * Values in Sesame 2 BindingSets and align the resulting iterator with +// * the Sesame 2 API. +// */ +// CloseableIteration<BindingSet, QueryEvaluationException> result = +// new Bigdata2Sesame2BindingSetIterator<QueryEvaluationException>( +// new BigdataSolutionResolverator(database, itr1).start(database +// .getExecutorService())); + // use the basic filter iterator for remaining filters if (step instanceof ProxyRuleWithSesameFilters) { Collection<Filter> filters = @@ -1675,6 +1742,21 @@ } + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + @SuppressWarnings("serial") private class UnknownOperatorException extends RuntimeException { private TupleExpr operator; Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -112,6 +112,7 @@ import org.openrdf.sail.SailConnectionListener; import org.openrdf.sail.SailException; +import com.bigdata.bop.engine.QueryEngine; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITransactionService; import com.bigdata.journal.ITx; @@ -513,6 +514,11 @@ * {@link BigdataSailConnection} instances and across all transactions. */ private Map<String, String> namespaces; + + /** + * The query engine. + */ + final private QueryEngine queryEngine; /** * When true, the RDFS closure will be maintained by the <em>SAIL</em> @@ -915,6 +921,10 @@ namespaces = Collections.synchronizedMap(new LinkedHashMap<String, String>()); + queryEngine = new QueryEngine(database.getIndexManager()); + + queryEngine.init(); + } /** @@ -1332,7 +1342,13 @@ } + public QueryEngine getQueryEngine() { + + return queryEngine; + + } + /** * Inner class implements the {@link SailConnection}. Some additional * functionality is available on this class, including @@ -1406,6 +1422,13 @@ */ private Lock lock; + + public BigdataSail getBigdataSail() { + + return BigdataSail.this; + + } + /** * Return the assertion buffer. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataTripleSource.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataTripleSource.java 2010-09-15 20:45:14 UTC (rev 3560) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataTripleSource.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -42,6 +42,12 @@ } + public BigdataSail getSail() { + + return conn.getBigdataSail(); + + } + /** * This wraps * {@link BigdataSailConnection#getStatements(Resource, URI, Value, boolean, Resource[])}. Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-15 21:43:28 UTC (rev 3561) @@ -0,0 +1,170 @@ +/** +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 16, 2009 + */ + +package com.bigdata.rdf.sail; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.Properties; +import org.apache.log4j.Logger; +import org.openrdf.model.Literal; +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.LiteralImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; +import org.openrdf.query.Binding; +import org.openrdf.query.BindingSet; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.TupleQuery; +import org.openrdf.query.TupleQueryResult; +import org.openrdf.query.impl.BindingImpl; +import com.bigdata.rdf.axioms.NoAxioms; +import com.bigdata.rdf.store.BD; +import com.bigdata.rdf.vocab.NoVocabulary; + +/** + * @author <a href="mailto:mrp...@us...">Mike Personick</a> + * @version $Id$ + */ +public class TestBOps extends ProxyBigdataSailTestCase { + + protected static final Logger log = Logger.getLogger(TestBOps.class); + + @Override + public Properties getProperties() { + + Properties props = super.getProperties(); + + props.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "false"); + props.setProperty(BigdataSail.Options.AXIOMS_CLASS, NoAxioms.class.getName()); + props.setProperty(BigdataSail.Options.VOCABULARY_CLASS, NoVocabulary.class.getName()); + props.setProperty(BigdataSail.Options.JUSTIFY, "false"); + props.setProperty(BigdataSail.Options.TEXT_INDEX, "false"); + + return props; + + } + + /** + * + */ + public TestBOps() { + } + + /** + * @param arg0 + */ + public TestBOps(String arg0) { + super(arg0); + } + + public void testSimpleJoin() throws Exception { + + final BigdataSail sail = getSail(); + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final String ns = BD.NAMESPACE; + + URI mike = new URIImpl(ns+"Mike"); + URI bryan = new URIImpl(ns+"Bryan"); + URI person = new URIImpl(ns+"Person"); + URI likes = new URIImpl(ns+"likes"); + URI rdf = new URIImpl(ns+"RDF"); + Literal l1 = new LiteralImpl("Mike"); + Literal l2 = new LiteralImpl("Bryan"); +/**/ + cxn.setNamespace("ns", ns); + + cxn.add(mike, RDF.TYPE, person); + cxn.add(mike, likes, rdf); + cxn.add(mike, RDFS.LABEL, l1); + cxn.add(bryan, RDF.TYPE, person); + cxn.add(bryan, likes, rdf); + cxn.add(bryan, RDFS.LABEL, l2); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.flush();//commit(); + + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select * " + + "WHERE { " + + " ?s rdf:type ns:Person . " + + " ?s ns:likes ?likes . " + + " ?s rdfs:label ?label . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", mike), + new BindingImpl("likes", rdf), + new BindingImpl("label", l1) + })); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", bryan), + new BindingImpl("likes", rdf), + new BindingImpl("label", l2) + })); + + compare(result, solution); + + } + + } finally { + cxn.close(); + sail.__tearDownUnitTest(); + } + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-16 19:43:17
|
Revision: 3573 http://bigdata.svn.sourceforge.net/bigdata/?rev=3573&view=rev Author: thompsonbry Date: 2010-09-16 19:43:08 +0000 (Thu, 16 Sep 2010) Log Message: ----------- Tracked down some problems with distributed query evaluation and added more test suites. Broke out the "map binding sets over shards" capability into its own package, fixed a bug where it was failing on predicates which were only partly bound, updated the unit tests, refactored the implementation to include an interface which may be used to realize a variety of different algorithms for efficiently mapping binding sets across shards, detailed several such implementations, and provided two such implementations - one for fully bound predicates and another which is a general purpose technique and is what we had been using historically. Several of the described algorithms can be significantly more efficient for various conditions. I have filed an issue to implement and test these various alternative algorithms. See https://sourceforge.net/apps/trac/bigdata/ticket/162. Modified the PipelineOp#newBuffer() method to accept the BOpStats from the caller and to wrap the buffer such that it automatically tracks the #of written units and chunks. This was necessary for some operators where we otherwise did not have the necessary scope to properly track those statistics. I plan to do a similar thing with the source. Fixed some problems with SliceOp and how binding sets are routed to the query controller. Still working through the distributed query evaluation test suite. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/DistinctBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/btree/AbstractNode.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/AbstractUnsynchronizedArrayBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/UnsyncLocalOutputBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/pipeline/UnsyncDistributedOutputBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/service/ndx/ISplitter.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/concurrent/Haltable.java branches/QUADS_QUERY_BRANCH/bigdata/src/resources/logging/log4j.properties branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestConditionalRoutingOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/join/TestPipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestDistinctBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Algorithm_AsGivenPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Algorithm_FullyBoundPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Algorithm_GroupByLocatorScan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Algorithm_LowShardCount.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Algorithm_NestedLocatorScan.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Bundle.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/IShardMapper.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/MapBindingSetsOverShardsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/shards/Splitter.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestBOpStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/nodes/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/nodes/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/nodes/TestMapBindingSetsOverNodes.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/shards/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/shards/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/shards/TestMapBindingSetsOverShards.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestMapBindingSetsOverNodes.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestMapBindingSetsOverShards.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -137,7 +137,7 @@ * * @return The value of the annotation. * - * @throws IllegalArgumentException + * @throws IllegalStateException * if the named annotation is not bound. * * @todo Note: This variant without generics is required for some java @@ -153,6 +153,14 @@ BOp clone(); /** + * Return the {@link Annotations#BOP_ID}. + * + * @throws IllegalStateException + * if that annotation is not bound. + */ + int getId(); + + /** * Return the evaluation context for the operator. The default is * {@link BOpEvaluationContext#ANY}. Operators which must be mapped against * shards, mapped against nodes, or evaluated on the query controller must Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -323,12 +323,18 @@ final Object tmp = annotations.get(name); if (tmp == null) - throw new IllegalArgumentException("Required property: " + name); + throw new IllegalStateException("Required property: " + name); return tmp; } + public int getId() { + + return (Integer) getRequiredProperty(Annotations.BOP_ID); + + } + public String toString() { final StringBuilder sb = new StringBuilder(); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -236,6 +236,9 @@ * @todo modify to accept {@link IChunkMessage} or an interface available * from getChunk() on {@link IChunkMessage} which provides us with * flexible mechanisms for accessing the chunk data. + * <p> + * When doing that, modify to automatically track the {@link BOpStats} + * as the <i>source</i> is consumed. */ // * @throws IllegalArgumentException // * if the <i>indexManager</i> is <code>null</code> Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPipelineOp.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -59,9 +59,13 @@ * operators which write on the database) then the operator MAY return an * immutable empty buffer. * + * @param stats + * The statistics on this object will automatically be updated as + * elements and chunks are output onto the returned buffer. + * * @return The buffer. */ - IBlockingBuffer<E[]> newBuffer(); + IBlockingBuffer<E[]> newBuffer(BOpStats stats); /** * Return a {@link FutureTask} which computes the operator against the Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -209,11 +209,68 @@ } - public IBlockingBuffer<E[]> newBuffer() { + public IBlockingBuffer<E[]> newBuffer(final BOpStats stats) { - return new BlockingBuffer<E[]>(getChunkOfChunksCapacity(), - getChunkCapacity(), getChunkTimeout(), chunkTimeoutUnit); + if (stats == null) + throw new IllegalArgumentException(); + + return new BlockingBufferWithStats<E[]>(getChunkOfChunksCapacity(), + getChunkCapacity(), getChunkTimeout(), chunkTimeoutUnit, stats); } + private static class BlockingBufferWithStats<E> extends BlockingBuffer<E> { + + private final BOpStats stats; + + /** + * @param chunkOfChunksCapacity + * @param chunkCapacity + * @param chunkTimeout + * @param chunktimeoutunit + * @param stats + */ + public BlockingBufferWithStats(int chunkOfChunksCapacity, + int chunkCapacity, long chunkTimeout, + TimeUnit chunktimeoutunit, final BOpStats stats) { + + this.stats = stats; + + } + + /** + * Overridden to track {@link BOpStats#unitsOut} and + * {@link BOpStats#chunksOut}. + * <p> + * Note: {@link BOpStats#chunksOut} will report the #of chunks added to + * this buffer. However, the buffer MAY combine chunks either on add() + * or when drained by the iterator so the actual #of chunks read back + * from the iterator MAY differ. + * <p> + * {@inheritDoc} + */ + @Override + public boolean add(final E e, final long timeout, final TimeUnit unit) + throws InterruptedException { + + final boolean ret = super.add(e, timeout, unit); + + if (e.getClass().getComponentType() != null) { + + stats.unitsOut.add(((Object[]) e).length); + + } else { + + stats.unitsOut.increment(); + + } + + stats.chunksOut.increment(); + + return ret; + + } + + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/ConditionalRoutingOp.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -183,8 +183,8 @@ sink.add(def); else sink.add(Arrays.copyOf(def, ndef)); - stats.chunksOut.increment(); - stats.unitsOut.add(ndef); +// stats.chunksOut.increment(); +// stats.unitsOut.add(ndef); } if (nalt > 0) { @@ -192,8 +192,8 @@ sink2.add(alt); else sink2.add(Arrays.copyOf(alt, nalt)); - stats.chunksOut.increment(); - stats.unitsOut.add(nalt); +// stats.chunksOut.increment(); +// stats.unitsOut.add(nalt); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -36,6 +36,7 @@ import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IChunkAccessor; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -82,40 +83,39 @@ } /** - * Copy the source to the sink. + * Copy the source to the sink. + * + * @todo Optimize this. When using an {@link IChunkAccessor} we should be + * able to directly output the same chunk. */ static private class CopyTask implements Callable<Void> { - private final BOpStats stats; + private final BOpContext<IBindingSet> context; - private final IAsynchronousIterator<IBindingSet[]> source; - - private final IBlockingBuffer<IBindingSet[]> sink; - CopyTask(final BOpContext<IBindingSet> context) { - stats = context.getStats(); + this.context = context; - this.source = context.getSource(); - - this.sink = context.getSink(); - } public Void call() throws Exception { + final IAsynchronousIterator<IBindingSet[]> source = context.getSource(); + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); try { + final BOpStats stats = context.getStats(); while (source.hasNext()) { final IBindingSet[] chunk = source.next(); stats.chunksIn.increment(); stats.unitsIn.add(chunk.length); sink.add(chunk); - stats.chunksOut.increment(); - stats.unitsOut.add(chunk.length); +// stats.chunksOut.increment(); +// stats.unitsOut.add(chunk.length); } sink.flush(); return null; } finally { sink.close(); + source.close(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BOpStats.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -106,7 +106,7 @@ chunksIn.add(o.chunksIn.get()); unitsIn.add(o.unitsIn.get()); unitsOut.add(o.unitsOut.get()); - chunksOut.add(o.chunksIn.get()); + chunksOut.add(o.chunksOut.get()); } public String toString() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -34,12 +34,15 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import org.apache.log4j.Logger; import com.bigdata.bop.BOp; +import com.bigdata.util.InnerCause; /** * The run state for a {@link RunningQuery}. This class is NOT thread-safe. @@ -83,6 +86,24 @@ private final UUID queryId; /** + * The query deadline. + * + * @see BOp.Annotations#TIMEOUT + * @see RunningQuery#getDeadline() + */ + private final long deadline; + + /** + * Set to <code>true</code> iff the query evaluation is complete due to + * normal termination. + * <p> + * Note: This is package private to expose it to {@link RunningQuery}. + * + * @see #haltOp(HaltOpMessage) + */ + /*private*/ final AtomicBoolean allDone = new AtomicBoolean(false); + + /** * The #of run state transitions which have occurred for this query. */ private long nsteps = 0; @@ -131,6 +152,8 @@ this.queryId = query.getQueryId(); + this.deadline = query.getDeadline(); + // this.nops = query.bopIndex.size(); } @@ -193,8 +216,11 @@ /** * @return <code>true</code> if this is the first time we will evaluate the * op. + * + * @throws TimeoutException + * if the deadline for the query has passed. */ - public boolean startOp(final StartOpMessage msg) { + public boolean startOp(final StartOpMessage msg) throws TimeoutException { nsteps++; @@ -257,35 +283,40 @@ // + ",fanIn=" + msg.nchunks); if (TableLog.tableLog.isInfoEnabled()) { - TableLog.tableLog -.info(getTableRow("startOp", msg.serviceId, + TableLog.tableLog.info(getTableRow("startOp", msg.serviceId, msg.bopId, msg.partitionId, msg.nchunks/* fanIn */, null/* cause */, null/* stats */)); } // check deadline. - final long deadline = query.getDeadline(); - if (deadline < System.currentTimeMillis()) { if (log.isTraceEnabled()) log.trace("expired: queryId=" + queryId + ", deadline=" + deadline); - query.future.halt(new TimeoutException()); + throw new TimeoutException(); - query.cancel(true/* mayInterruptIfRunning */); - } return firstTime; } /** - * Update termination criteria counters. @return <code>true</code> if the - * operator life cycle is over. + * Update termination criteria counters. If the query evaluation is over due + * to normal termination then {@link #allDone} is set to <code>true</code> + * as a side effect. + * + * @return <code>true</code> if the operator life cycle is over. + * + * @throws TimeoutException + * if the deadline has expired. + * @throws ExecutionException + * if the {@link HaltOpMessage#cause} was non-<code>null</code>, + * if which case it wraps {@link HaltOpMessage#cause}. */ - public boolean haltOp(final HaltOpMessage msg) { + public boolean haltOp(final HaltOpMessage msg) throws TimeoutException, + ExecutionException { nsteps++; @@ -354,9 +385,6 @@ } - // Figure out if this operator is done. - final boolean isDone = isOperatorDone(msg.bopId); - // System.err.println("haltOp : nstep=" + nsteps + ", bopId=" + msg.bopId // + ",totalRunningTaskCount=" + totalRunningTaskCount // + ",totalAvailableTaskCount=" + totalAvailableChunkCount @@ -378,41 +406,53 @@ /* * Test termination criteria */ - final long deadline = query.getDeadline(); + + // true if this operator is done. + final boolean isOpDone = isOperatorDone(msg.bopId); + // true if the entire query is done. + final boolean isAllDone = totalRunningTaskCount == 0 + && totalAvailableChunkCount == 0; + if (msg.cause != null) { - // operator failed on this chunk. - log.error("Error: Canceling query: queryId=" + queryId + ",bopId=" - + msg.bopId + ",partitionId=" + msg.partitionId, msg.cause); +// /* +// * @todo probably just wrap and throw rather than logging since this +// * class does not have enough insight into non-error exceptions +// * while Haltable does. +// */ +// if (!InnerCause.isInnerCause(msg.cause, InterruptedException.class) +// && !InnerCause.isInnerCause(msg.cause, +// TimeoutException.class)) { +// +// // operator failed on this chunk. +// log.error("Error: Canceling query: queryId=" + queryId +// + ",bopId=" + msg.bopId + ",partitionId=" +// + msg.partitionId, msg.cause); +// } - query.future.halt(msg.cause); + throw new ExecutionException(msg.cause); - query.cancel(true/* mayInterruptIfRunning */); + } else if (isAllDone) { - } else if (totalRunningTaskCount == 0 && totalAvailableChunkCount == 0) { - // success (all done). if (log.isTraceEnabled()) log.trace("success: queryId=" + queryId); - query.future.halt(query.getStats()); - - query.cancel(true/* mayInterruptIfRunning */); - + this.allDone.set(true); + } else if (deadline < System.currentTimeMillis()) { if (log.isTraceEnabled()) log.trace("expired: queryId=" + queryId + ", deadline=" + deadline); - query.future.halt(new TimeoutException()); + throw new TimeoutException(); - query.cancel(true/* mayInterruptIfRunning */); - } - return isDone; + return isOpDone; + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -38,6 +38,7 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; @@ -63,7 +64,7 @@ /** * Metadata about running queries. */ -public class RunningQuery implements Future<Map<Integer,BOpStats>>, IRunningQuery { +public class RunningQuery implements Future<Void>, IRunningQuery { private final static transient Logger log = Logger .getLogger(RunningQuery.class); @@ -75,20 +76,6 @@ .getLogger(ChunkTask.class); /** - * The run state of the query and the result of the computation iff it - * completes execution normally (without being interrupted, cancelled, etc). - * <p> - * Note: Package private in order to expose this field to {@link RunState}. - */ - final /*private*/ Haltable<Map<Integer,BOpStats>> future = new Haltable<Map<Integer,BOpStats>>(); - - /** - * The runtime statistics for each {@link BOp} in the query and - * <code>null</code> unless this is the query controller. - */ - final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; - - /** * The class executing the query on this node. */ final private QueryEngine queryEngine; @@ -123,20 +110,15 @@ final private BindingSetPipelineOp query; /** - * The buffer used for the overall output of the query pipeline. - * <p> - * Note: In scale out, this only exists on the query controller. In order to - * ensure that the results are transferred to the query controller, the - * top-level operator in the query plan must specify - * {@link BOpEvaluationContext#CONTROLLER}. For example, {@link SliceOp} - * uses this {@link BOpEvaluationContext}. + * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. */ - final private IBlockingBuffer<IBindingSet[]> queryBuffer; + protected final Map<Integer, BOp> bopIndex; /** - * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. + * The run state of the query and the result of the computation iff it + * completes execution normally (without being interrupted, cancelled, etc). */ - protected final Map<Integer, BOp> bopIndex; + final private Haltable<Void> future = new Haltable<Void>(); /** * A collection of {@link Future}s for currently executing operators for @@ -145,6 +127,23 @@ private final ConcurrentHashMap<BSBundle, Future<?>> operatorFutures = new ConcurrentHashMap<BSBundle, Future<?>>(); /** + * The runtime statistics for each {@link BOp} in the query and + * <code>null</code> unless this is the query controller. + */ + final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; + + /** + * The buffer used for the overall output of the query pipeline. + * <p> + * Note: In scale out, this only exists on the query controller. In order to + * ensure that the results are transferred to the query controller, the + * top-level operator in the query plan must specify + * {@link BOpEvaluationContext#CONTROLLER}. For example, {@link SliceOp} + * uses this {@link BOpEvaluationContext}. + */ + final private IBlockingBuffer<IBindingSet[]> queryBuffer; + + /** * A lock guarding {@link RunState#totalRunningTaskCount}, * {@link RunState#totalAvailableChunkCount}, * {@link RunState#availableChunkCountMap}. This is <code>null</code> unless @@ -159,6 +158,11 @@ * query controller. */ final private RunState runState; + + /** + * Flag used to prevent retriggering of {@link #lifeCycleTearDownQuery()}. + */ + final AtomicBoolean didQueryTearDown = new AtomicBoolean(false); /** * The chunks available for immediate processing (they must have been @@ -193,13 +197,18 @@ // set the deadline. if (!this.deadline .compareAndSet(Long.MAX_VALUE/* expect */, deadline/* update */)) { + // the deadline is already set. throw new IllegalStateException(); + } if (deadline < System.currentTimeMillis()) { + // deadline has already expired. + future.halt(new TimeoutException()); cancel(true/* mayInterruptIfRunning */); + } } @@ -252,7 +261,9 @@ * Return the operator tree for this query. */ public BindingSetPipelineOp getQuery() { + return query; + } /** @@ -276,11 +287,23 @@ } /** - * + * @param queryEngine + * The {@link QueryEngine} on which the query is running. In + * scale-out, a query is typically instantiated on many + * {@link QueryEngine}s. * @param queryId - * @param begin + * The identifier for that query. + * @param controller + * <code>true</code> iff the {@link QueryEngine} is the query + * controller for this query (the {@link QueryEngine} which will + * coordinate the query evaluation). * @param clientProxy + * The query controller. In standalone, this is the same as the + * <i>queryEngine</i>. In scale-out, this is a proxy for the + * query controller whenever the query is instantiated on a node + * other than the query controller itself. * @param query + * The query. * * @throws IllegalArgumentException * if any argument is <code>null</code>. @@ -318,20 +341,42 @@ this.query = query; - this.bopIndex = BOpUtility.getIndex(query); + bopIndex = BOpUtility.getIndex(query); - this.statsMap = controller ? new ConcurrentHashMap<Integer, BOpStats>() + statsMap = controller ? new ConcurrentHashMap<Integer, BOpStats>() : null; runStateLock = controller ? new ReentrantLock() : null; runState = controller ? new RunState(this) : null; - // Note: only exists on the query controller. - this.queryBuffer = controller ? newQueryBuffer() : null; - -// System.err -// .println("new RunningQuery:: queryId=" + queryId + if (controller) { + + final BOpStats queryStats = query.newStats(); + + statsMap.put((Integer) query + .getRequiredProperty(BOp.Annotations.BOP_ID), queryStats); + + if (!query.isMutation()) { + + queryBuffer = query.newBuffer(queryStats); + + } else { + + // Note: Not used for mutation queries. + queryBuffer = null; + + } + + } else { + + // Note: only exists on the query controller. + queryBuffer = null; + + } + + // System.err + // .println("new RunningQuery:: queryId=" + queryId // + ", isController=" + controller + ", queryController=" // + clientProxy + ", queryEngine=" // + queryEngine.getServiceUUID()); @@ -339,22 +384,6 @@ } /** - * Return the buffer on which the solutions will be written (if any). This - * is based on the top-level operator in the query plan. - * - * @return The buffer for the solutions -or- <code>null</code> if the - * top-level operator in the query plan is a mutation operator. - */ - protected IBlockingBuffer<IBindingSet[]> newQueryBuffer() { - - if (query.isMutation()) - return null; - - return ((BindingSetPipelineOp) query).newBuffer(); - - } - - /** * Take a chunk generated by some pass over an operator and make it * available to the target operator. How this is done depends on whether the * query is running against a standalone database or the scale-out database. @@ -372,10 +401,10 @@ * @param sink * The intermediate results to be passed to that target operator. * - * @return The #of chunks made available for consumption by the sink. This - * will always be ONE (1) for scale-up. For scale-out, there will be - * one chunk per index partition over which the intermediate results - * were mapped. + * @return The #of {@link IChunkMessage} sent. This will always be ONE (1) + * for scale-up. For scale-out, there will be at least one + * {@link IChunkMessage} per index partition over which the + * intermediate results were mapped. */ protected <E> int handleOutputChunk(final int sinkId, final IBlockingBuffer<IBindingSet[]> sink) { @@ -478,6 +507,11 @@ if (runState.startOp(msg)) lifeCycleSetUpOperator(msg.bopId); + } catch(TimeoutException ex) { + + future.halt(ex); + cancel(true/* mayInterruptIfRunning */); + } finally { runStateLock.unlock(); @@ -508,6 +542,8 @@ if (tmp != null) tmp.add(msg.taskStats); + Throwable cause = null; + boolean allDone = false; runStateLock.lock(); try { @@ -520,14 +556,53 @@ */ lifeCycleTearDownOperator(msg.bopId); + + if(runState.allDone.get()) { + + allDone = true; + + } } - + + } catch(Throwable ex) { + + cause = ex; + } finally { runStateLock.unlock(); } + + /* + * Handle query termination once we have released the runStateLock. + * + * Note: In scale-out, query termination can involve RMI to the nodes on + * which query operators are known to be running and to nodes on which + * resources were allocated which were scoped to the query or an + * operator's evaluation. Those RMI messages should not go out while we + * are holding the runStateLock since that could cause deadlock with + * call backs on haltOp() from the query peers for that query. + */ + + if (cause != null) { + + /* + * Timeout, interrupted, operator error, or internal error in + * RunState. + */ + + future.halt(cause); + cancel(true/* mayInterruptIfRunning */); + + } else if (allDone) { + + // Normal termination. + future.halt((Void) null); + cancel(true/* mayInterruptIfRunning */); + + } } @@ -753,13 +828,15 @@ + bop); } - sink = (p == null ? queryBuffer : op.newBuffer()); + final BOpStats stats = op.newStats(); + + sink = (p == null ? queryBuffer : op.newBuffer(stats)); - altSink = altSinkId == null ? null : op.newBuffer(); + altSink = altSinkId == null ? null : op.newBuffer(stats); // context : @todo pass in IChunkMessage or IChunkAccessor context = new BOpContext<IBindingSet>(RunningQuery.this, - partitionId, op.newStats(), msg.getChunkAccessor() + partitionId, stats, msg.getChunkAccessor() .iterator(), sink, altSink); // FutureTask for operator execution (not running yet). @@ -903,11 +980,7 @@ * <p> * Since this involves RMI to the nodes, we should not issue those RMIs * while holding the {@link #runStateLock} (and this could even deadlock - * with callback from those nodes). Perhaps - * {@link RunState#haltOp(HaltOpMessage)} should throw back the - * {@link HaltOpMessage} or a {@link TimeoutException} if the deadline has - * expired and then let {@link RunningQuery#haltOp(HaltOpMessage)} handle - * the termination of the query, which it can do without holding the lock. + * with call back from those nodes). * <p> * When the controller sends a node a terminate signal for an operator, it * should not bother to RMI back to the controller (unless this is done for @@ -931,22 +1004,24 @@ // close the output sink. queryBuffer.close(); } - // life cycle hook for the end of the query. - lifeCycleTearDownQuery(); + if(didQueryTearDown.compareAndSet(false/*expect*/, true/*update*/)) { + // life cycle hook for the end of the query. + lifeCycleTearDownQuery(); + } // remove from the collection of running queries. queryEngine.runningQueries.remove(queryId, this); // true iff we cancelled something. return cancelled; } - final public Map<Integer, BOpStats> get() throws InterruptedException, + final public Void get() throws InterruptedException, ExecutionException { return future.get(); } - final public Map<Integer, BOpStats> get(long arg0, TimeUnit arg1) + final public Void get(long arg0, TimeUnit arg1) throws InterruptedException, ExecutionException, TimeoutException { return future.get(arg0, arg1); @@ -977,4 +1052,18 @@ } + public String toString() { + final StringBuilder sb = new StringBuilder(getClass().getName()); + sb.append("{queryId=" + queryId); + sb.append(",deadline=" + deadline.get()); + sb.append(",isDone=" + isDone()); + sb.append(",isCancelled=" + isCancelled()); + sb.append(",runState=" + runState); + sb.append(",controller=" + controller); + sb.append(",clientProxy=" + clientProxy); + sb.append(",query=" + query); + sb.append("}"); + return sb.toString(); + } + } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -47,6 +47,7 @@ import com.bigdata.bop.engine.IQueryPeer; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.RunningQuery; +import com.bigdata.bop.fed.shards.MapBindingSetsOverShardsBuffer; import com.bigdata.io.DirectBufferPool; import com.bigdata.io.DirectBufferPoolAllocator.IAllocationContext; import com.bigdata.journal.TemporaryStoreFactory; @@ -55,6 +56,7 @@ import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IBuffer; +import com.bigdata.relation.rule.eval.pipeline.DistributedJoinTask; import com.bigdata.resources.ResourceManager; import com.bigdata.service.IBigdataFederation; import com.bigdata.service.ResourceService; @@ -363,6 +365,14 @@ * {@link ByteBuffer} and notifying the receiving service that there are * intermediate results which it can pull when it is ready to process them. * This pattern allows the receiver to impose flow control on the producer. + * + * @todo Figure out how (or if) we will combine binding set streams emerging + * from concurrent tasks executing on a given node destined for the + * same shard/node. (There is code in the {@link DistributedJoinTask} + * which does this for the same shard, but it does it on the receiver + * side.) Pay attention to the #of threads running in the join, the + * potential concurrency of threads targeting the same (bopId,shardId) + * and how to best combine their data together. */ @Override protected <E> int handleOutputChunk(final int sinkId, @@ -405,20 +415,33 @@ * * @todo Set the capacity of the the "map" buffer to the size of the * data contained in the sink (in fact, we should just process the - * sink data in place). + * sink data in place using an expanded IChunkAccessor interface). + * + * @todo high volume operators will need different capacity + * parameters. + * + * FIXME the chunkSize will limit us to RMI w/ the payload inline + * when it is the same as the threshold for NIO chuck transfers. + * This needs to be adaptive and responsive to the actual data scale + * of the operator's outputs */ @SuppressWarnings("unchecked") final IPredicate<E> pred = ((IShardwisePipelineOp) bop).getPredicate(); final IKeyOrder<E> keyOrder = pred.getKeyOrder(); final long timestamp = pred.getTimestamp(); final int capacity = 1000;// @todo - final int capacity2 = 1000;// @todo + final int chunkOfChunksCapacity = 10;// @todo small queue + final int chunkSize = 100;// @todo modest chunks. final MapBindingSetsOverShardsBuffer<IBindingSet, E> mapper = new MapBindingSetsOverShardsBuffer<IBindingSet, E>( getFederation(), pred, keyOrder, timestamp, capacity) { @Override - IBuffer<IBindingSet[]> newBuffer(final PartitionLocator locator) { - // @todo chunkCapacity and chunkOfChunksCapacity plus timeout stuff. - return new BlockingBuffer<IBindingSet[]>(capacity2); + protected IBuffer<IBindingSet[]> newBuffer(final PartitionLocator locator) { + return new BlockingBuffer<IBindingSet[]>( + chunkOfChunksCapacity,// + chunkSize,// + BlockingBuffer.DEFAULT_CONSUMER_CHUNK_TIMEOUT,// + BlockingBuffer.DEFAULT_CONSUMER_CHUNK_TIMEOUT_UNIT// + ); } }; /* @@ -454,17 +477,11 @@ * * @todo This stage should probably be integrated with the stage * which maps the binding sets over the shards (immediately above) - * to minimize copying or visiting in the data. - * - * FIXME Review the definition of an "output chunk" from the - * perspective of the atomic query termination decision. I think - * that it probably corresponds to a "message" sent to a node. For - * each message sent, we must later observe the evaluate of the - * operator on that node+shard. If the receiver is permitted to - * combine messages, then it must tell us how many messages were - * consumed. + * to minimize copying or visiting in the data. This could be done + * by hooking the method which outputs a chunk to instead directly + * send the IChunkMessage. */ - int nchunksout = 0; + int messageSendCount = 0; for (Map.Entry<PartitionLocator, IBuffer<IBindingSet[]>> e : mapper .getSinks().entrySet()) { @@ -484,11 +501,11 @@ sendChunkMessage(locator.getDataServiceUUID(), sinkId, locator .getPartitionId(), allocationContext, shardSink); - nchunksout++; + messageSendCount++; } - return nchunksout; + return messageSendCount; } case CONTROLLER: { Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java 2010-09-16 19:40:48 UTC (rev 3572) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/MapBindingSetsOverShardsBuffer.java 2010-09-16 19:43:08 UTC (rev 3573) @@ -1,499 +0,0 @@ -package com.bigdata.bop.fed; - -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.Map; - -import org.apache.log4j.Logger; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.btree.BytesUtil; -import com.bigdata.btree.IIndex; -import com.bigdata.btree.keys.IKeyBuilder; -import com.bigdata.journal.NoSuchIndexException; -import com.bigdata.journal.TimestampUtility; -import com.bigdata.mdi.IMetadataIndex; -import com.bigdata.mdi.PartitionLocator; -import com.bigdata.relation.IRelation; -import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer; -import com.bigdata.relation.accesspath.IBuffer; -import com.bigdata.relation.rule.eval.pipeline.DistributedJoinTask; -import com.bigdata.service.IBigdataFederation; -import com.bigdata.service.Split; -import com.bigdata.service.ndx.AbstractSplitter; -import com.bigdata.striterator.IKeyOrder; - -/** - * Unsynchronized (non-thread safe) buffer maps the {@link IBindingSet}s across - * the index partition(s) associated with an {@link IPredicate} and - * {@link IKeyOrder}. For each source chunk, "as bound" versions of the target - * {@link IPredicate} are constructed and the {@link IBindingSet}s in the chunk - * are reordered based on {@link IKeyOrder#getFromKey(IKeyBuilder, IPredicate)} - * for each asBound predicate. The {@link PartitionLocator}s are discovered for - * each fromKey using an ordered locator scan and the binding sets are output - * onto a shard or node specific {@link IBuffer} created by a concrete subclass. - * The subclass is responsible for getting the binding sets from this node onto - * the node associated with each output buffer. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: UnsyncDistributedOutputBuffer.java 3448 2010-08-18 20:55:58Z - * thompsonbry $ - * @param <E> - * The generic type of the elements in the buffer. - * @param <F> - * The generic type of the elements in the relation associated with - * the {@link IPredicate}. - * - * @todo This could be refactored such that it no longer implemented - * {@link IBuffer} but instead was a {@link BOp} with binding sets - * streaming in from its source. However, unlike a normal {@link BOp} it - * would have a compound sink and it would have to be tightly integrated - * with the {@link QueryEngine} to be used. - * - * @todo Figure out how we will combine binding set streams emerging from - * concurrent tasks executing on a given node destined for the same - * shard/node. (There is code in the {@link DistributedJoinTask} which - * does this for the same shard, but it does it on the receiver side.) Pay - * attention to the #of threads running in the join, the potential - * concurrency of threads targeting the same (bopId,shardId) and how to - * best combine their data together. - * - * @todo Optimize locator lookup by caching in {@link AbstractSplitter} and look - * at the code path for obtaining {@link PartitionLocator}s from the MDI. - * <p> - * For reads, we are permitted to cache the locators just as much as we - * like (but indirection would be introduced by a shared disk - * architecture). - * <p> - * For writes (or in a shard disk architecture) it is possible that the - * target shard will have moved by the time the receiver has notice of the - * intent to write on that shard or once the receiver has accepted the - * binding sets for that shard. The logic which moves the binding sets - * around will have to handle such 'stale locator' exceptions - * automatically. - * - * @todo This is not tracking the #of output chunks or the fanOut (#of - * shards/nodes which will receive binding sets). Given that the query - * engine will be managing the buffers on which the data are written, it - * might also update the appropriate statistics. - */ -public abstract class MapBindingSetsOverShardsBuffer<E extends IBindingSet, F> - extends AbstractUnsynchronizedArrayBuffer<E> { - - private static transient final Logger log = Logger.getLogger(MapBindingSetsOverShardsBuffer.class); - - /** - * The predicate from which we generate the asBound binding sets. This - * predicate and the {@link IKeyOrder} together determine the required - * access path. - */ - private final IPredicate<F> pred; - - /** - * Identifies the index for the access path required by the {@link #pred - * predicate}. - */ - private final IKeyOrder<F> keyOrder; - - /** - * The timestamp associated with the operation on the target access path. If - * the binding sets will be used to read on the shards of the target access - * path, then this is the read timestamp. If they will be used to write on - * the target access path, then this is the write timestamp. - */ - private final long timestamp; - - /** - * The {@link IKeyBuilder} for the index associated with the access path - * required by the predicate. - */ - private final IKeyBuilder keyBuilder; - - /** - * Used to efficient assign binding sets to index partitions. - */ - private final Splitter splitter; - -// /** -// */ -// private final BOpStats stats; - - /** - * @param fed - * The federation. - * @param pred - * The predicate associated with the target operator. The - * predicate identifies which variables and/or constants form the - * key for the access path and hence selects the shards on which - * the target operator must read or write. For example, when the - * target operator is a JOIN, this is the {@link IPredicate} - * associated with the right hand operator of the join. - * @param keyOrder - * Identifies the access path for the target predicate. - * @param timestamp - * The timestamp associated with the operation on the target - * access path. If the binding sets will be used to read on the - * shards of the target access path, then this is the read - * timestamp. If they will be used to write on the target access - * path, then this is the write timestamp. - * @param capacity - * The capacity of this buffer. - */ - public MapBindingSetsOverShardsBuffer( - final IBigdataFederation<?> fed,// - final IPredicate<F> pred, // - final IKeyOrder<F> keyOrder,// - final long timestamp,// - final int capacity) { - - super(capacity); - - if (fed == null) - throw new IllegalArgumentException(); - - if (pred == null) - throw new IllegalArgumentException(); - - if (keyOrder == null) - throw new IllegalArgumentException(); - -// this.context = context; - - this.pred = pred; - - this.keyOrder = keyOrder; - - this.timestamp = timestamp; - - /* - * Note: we can use the read view of the relation to get the IKeyBuilder - * even if we will be writing on the relation since the IKeyBuilder - * semantics can not be readily changed once an index has been created. - */ - { - - @SuppressWarnings("unchecked") - final IRelation<F> relation = (IRelation<F>) fed - .getResourceLocator().locate(pred.getOnlyRelationName(), - timestamp); - - final IIndex index = relation.getIndex(keyOrder); - - this.keyBuilder = index.getIndexMetadata().getKeyBuilder(); - - } - - /* - * Resolve a scale-out view of the metadata index for the target - * predicate. - */ - { - - final String namespace = pred.getOnlyRelationName(); - - final IMetadataIndex mdi = fed.getMetadataIndex(namespace + "." - + keyOrder.getIndexName(), timestamp); - - if (mdi == null) { - - throw new NoSuchIndexException("name=" + namespace - + ", timestamp=" + TimestampUtility.toString(timestamp)); - - } - - this.splitter = new Splitter(mdi); - - } - -// this.stats = context.getStats(); - - } - - /** - * Helper class efficiently splits an array of sorted keys into groups - * associated with a specific index partition. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - static private class Splitter extends AbstractSplitter { - - private final IMetadataIndex mdi; - - public Splitter(final IMetadataIndex mdi) { - - if (mdi == null) - throw new IllegalArgumentException(); - - this.mdi = mdi; - - } - - @Override - protected IMetadataIndex getMetadataIndex(long ts) { - - return mdi; - - } - - } - - /** - * Helper class used to place the binding sets into order based on the - * {@link #fromKey} associated with the {@link #asBound} predicate. - * - * @author <a href="mailto:tho...@us...">Bryan - * Thompson</a> - */ - private class Bundle implements Comparable<Bundle> { - - /** The binding set. */ - final IBindingSet bindingSet; - - /** The asBound predicate. */ - final IPredicate<F> asBound; - - /** The fromKey generated from that asBound predicate. */ - final byte[] fromKey; - - public Bundle(final IBindingSet bindingSet) { - - this.bindingSet = bindingSet; - - this.asBound = pred.asBound(bindingSet); - - this.fromKey = keyOrder.getFromKey(keyBuilder, asBound); - - } - - /** - * Imposes an unsigned byte[] order on the {@link #fromKey}. - */ - public int compareTo(final Bundle o) { - - return BytesUtil.compareBytes(this.fromKey, o.fromKey); - - } - - /** - * Implemented to shut up findbugs, but not used. - */ - @SuppressWarnings("unchecked") - public boolean equals(final Object o) { - - if (this == o) - return true; - - if (!(o instanceof MapBindingSetsOverShardsBuffer.Bundle)) - return false; - - final MapBindingSetsOverShardsBuffer.Bundle t = (MapBindingSetsOverShardsBuffer.Bundle) o; - - if (compareTo(t) != 0) - return false; - - if (!bindingSet.equals(t.bindingSet)) - return false; - - if (!asBound.equals(t.asBound)) - return false; - - return true; - - } - - /** - * Implemented to shut up find bugs. - */ - public int hashCode() { - - if (hash == 0) { - - hash = Arrays.hashCode(fromKey); - - } - - return hash; - - } - private int hash = 0; - - } - - /** - * Maps the chunk of {@link IBindingSet}s across the index partition(s) for - * the sink join dimension. - * - * @param a - * A chunk of {@link IBindingSet}s. - */ - protected void handleChunk(final E[] chunk) { - - @SuppressWarnings("unchecked") - final Bundle[] bundles = new MapBindingSetsOverShardsBuffer.Bundle[chunk.length]; - - /* - * Create the asBound version of the predicate and the associated - * fromKey for each bindingSet in the chunk. - */ - for (int i = 0; i < chunk.length; i++) { - - bundles[i] = new Bundle(chunk[i]); - - } - - /* - * Sort the binding sets in the chunk by the fromKey associated with - * each asBound predicate. - */ - Arrays.sort(bundles); - - /* - * Construct a byte[][] out of the sorted fromKeys and then generate - * slices (Splits) which group the binding sets based on the target - * shard. - */ - final LinkedList<Split> splits; - { - - final byte[][] keys = new byte[bundles.length][]; - - for (int i = 0; i < bundles.length; i++) { - - keys[i] = bundles[i].fromKey; - - } - - splits = splitter.splitKeys(timestamp, 0/* fromIndex */, - bundles.length/* toIndex */, keys); - - } - - if (log.isTraceEnabled()) - log.trace("nsplits=" + splits.size() + ", pred=" + pred); - - /* - * For each split, write the binding sets in that split onto the - * corresponding buffer. - */ - for (Split split : splits) { - - // Note: pmd is a PartitionLocator, so this cast is valid. - final IBuffer<IBindingSet[]> sink = getBuffer((PartitionLocator) split.pmd); - - final IBindingSet[] slice = new IBindingSet[split.ntuples]; - - for (int j = 0, i = split.fromIndex; i < split.toIndex; i++, j++) { - - final IBindingSet bset = bundles[i].bindingSet; - - slice[j] = bset; - - if (log.isTraceEnabled()) - log - .trace("Mapping: keyOrder=" + keyOrder + ",bset=" - + bset + " onto partitionId=" - + split.pmd.getPartitionId()); - - } - -// for (int i = split.fromIndex; i < split.toIndex; i++) { -// -// final Bundle bundle = bundles[i]; -// -// sink.add(bundle.bindingSet); -// -//// stats.unitsOut.increment(); -// -// } - - sink.add(slice); - - } - - } - - /** - * Extended to flush each buffer which targets a specific index partition as - * well. - * <p> - * {@inheritDoc} - */ - @Override - public long flush() { - - final long n = super.flush(); - - for (IBuffer<IBindingSet[]> sink : sinks.values()) { - - if (!sink.isEmpty()) - sink.flush(); - - } - - return n; - - } - - /** - * The allocated sinks. - * <p> - * Note: Since the collection is not thread-safe, synchronization is - * required when adding to the collection and when visiting the elements of - * the collection. However, the {@link MapBindingSetsOverShardsBuffer} is not - * thread-safe either so this should be Ok. - */ - private final LinkedHashMap<PartitionLocator, IBuffer<IBindingSet[]>/* sink */> sinks = new LinkedHashMap<PartitionLocator, IBuffer<IBindingSet[]>>(); - - /** - * An immutable view of the si... [truncated message content] |
From: <mrp...@us...> - 2010-09-20 19:43:55
|
Revision: 3598 http://bigdata.svn.sourceforge.net/bigdata/?rev=3598&view=rev Author: mrpersonick Date: 2010-09-20 19:43:44 +0000 (Mon, 20 Sep 2010) Log Message: ----------- adding Sesame to BOp conversion Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -41,6 +41,7 @@ import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; import com.bigdata.btree.IRangeQuery; +import com.bigdata.journal.ITx; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.ISolutionExpander; @@ -93,7 +94,7 @@ final String relationName) { this(values, relationName, -1/* partitionId */, false/* optional */, - null/* constraint */, null/* expander */); + null/* constraint */, null/* expander */, ITx.READ_COMMITTED); } @@ -116,7 +117,7 @@ public Predicate(final IVariableOrConstant<?>[] values, final String relationName, final int partitionId, final boolean optional, final IElementFilter<E> constraint, - final ISolutionExpander<E> expander) { + final ISolutionExpander<E> expander, final long timestamp) { this(values, NV.asMap(new NV[] {// new NV(Annotations.RELATION_NAME,new String[]{relationName}),// @@ -124,6 +125,7 @@ new NV(Annotations.OPTIONAL,optional),// new NV(Annotations.CONSTRAINT,constraint),// new NV(Annotations.EXPANDER,expander),// + new NV(Annotations.TIMESTAMP, timestamp) })); if (relationName == null) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -73,10 +73,10 @@ * * @return */ - public static BindingSetPipelineOp convert(final IStep step) { + public static BindingSetPipelineOp convert(final IStep step, final int startId) { if (step instanceof Rule) - return convert((Rule) step); + return convert((Rule) step, startId); else if (step instanceof Program) return convert((Program) step); @@ -91,9 +91,9 @@ * * @return */ - public static BindingSetPipelineOp convert(final Rule rule) { + public static BindingSetPipelineOp convert(final Rule rule, final int startId) { - int bopId = 1; + int bopId = startId; final BindingSetPipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/magic/MagicPredicate.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -30,6 +30,7 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.ap.Predicate; +import com.bigdata.journal.ITx; import com.bigdata.rdf.internal.IV; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.ISolutionExpander; @@ -120,7 +121,7 @@ IVariableOrConstant<IV>... terms// ) { - super(terms, relationName[0], partitionId, false, constraint, expander); + super(terms, relationName[0], partitionId, false, constraint, expander, ITx.READ_COMMITTED); } Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPOPredicate.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -32,6 +32,7 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.ap.Predicate; +import com.bigdata.journal.ITx; import com.bigdata.rdf.internal.IV; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.ISolutionExpander; @@ -51,19 +52,19 @@ */ private static final long serialVersionUID = 1L; - /** - * The arity is 3 unless the context position was given (as either a - * variable or bound to a constant) in which case it is 4. - * - * @todo rather than having a conditional arity, modify the SPOPredicate - * constructor to pass on either args[3] or args[3] depending on - * whether we are using triples or quads. - */ - public final int arity() { - - return get(3/*c*/) == null ? 3 : 4; - - } +// /** +// * The arity is 3 unless the context position was given (as either a +// * variable or bound to a constant) in which case it is 4. +// * +// * @todo rather than having a conditional arity, modify the SPOPredicate +// * constructor to pass on either args[3] or args[3] depending on +// * whether we are using triples or quads. +// */ +// public final int arity() { +// +// return get(3/*c*/) == null ? 3 : 4; +// +// } /** * Required shallow copy constructor. @@ -230,12 +231,12 @@ ) { super( -// (c == null ? new IVariableOrConstant[] { s, p, o } -// : new IVariableOrConstant[] { s, p, o, c }), + (c == null ? new IVariableOrConstant[] { s, p, o } + : new IVariableOrConstant[] { s, p, o, c }), - new IVariableOrConstant[] { s, p, o, c }, +// new IVariableOrConstant[] { s, p, o, c }, - relationName[0], partitionId, optional, constraint, expander); + relationName[0], partitionId, optional, constraint, expander, ITx.READ_COMMITTED); // if (relationName == null) // throw new IllegalArgumentException(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -49,6 +49,7 @@ import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; import com.bigdata.BigdataStatics; +import com.bigdata.bop.BOpContext; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.Constant; import com.bigdata.bop.HashBindingSet; @@ -64,7 +65,9 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; +import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.MockRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.Rule2BOpUtility; import com.bigdata.bop.engine.RunningQuery; @@ -94,6 +97,7 @@ import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; @@ -1640,14 +1644,13 @@ final IStep step) throws Exception { - final BindingSetPipelineOp query = Rule2BOpUtility.convert(step); + final int startId = 1; + final BindingSetPipelineOp query = Rule2BOpUtility.convert(step, startId); if (log.isInfoEnabled()) { log.info(query); } - final int startId = query.getProperty(Predicate.Annotations.BOP_ID); - final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); final UUID queryId = UUID.randomUUID(); @@ -1655,7 +1658,7 @@ new LocalChunkMessage<IBindingSet>(queryEngine, queryId, startId, -1/* partitionId */, newBindingSetIterator(new HashBindingSet()))); - + final IAsynchronousIterator<IBindingSet[]> it1 = runningQuery.iterator(); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-20 19:40:52 UTC (rev 3597) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-20 19:43:44 UTC (rev 3598) @@ -136,24 +136,28 @@ "select * " + "WHERE { " + " ?s rdf:type ns:Person . " + - " ?s ns:likes ?likes . " + - " ?s rdfs:label ?label . " + + " ?s ns:likes ns:RDF . " + +// " ?s rdfs:label ?label . " + "}"; final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); TupleQueryResult result = tupleQuery.evaluate(); + + while (result.hasNext()) { + System.err.println(result.next()); + } Collection<BindingSet> solution = new LinkedList<BindingSet>(); solution.add(createBindingSet(new Binding[] { new BindingImpl("s", mike), - new BindingImpl("likes", rdf), - new BindingImpl("label", l1) +// new BindingImpl("likes", rdf), +// new BindingImpl("label", l1) })); solution.add(createBindingSet(new Binding[] { new BindingImpl("s", bryan), - new BindingImpl("likes", rdf), - new BindingImpl("label", l2) +// new BindingImpl("likes", rdf), +// new BindingImpl("label", l2) })); compare(result, solution); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-20 20:14:36
|
Revision: 3599 http://bigdata.svn.sourceforge.net/bigdata/?rev=3599&view=rev Author: thompsonbry Date: 2010-09-20 20:14:28 +0000 (Mon, 20 Sep 2010) Log Message: ----------- Added a Tee and a DataSetJoin operator for use in named and default graph queries. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Tee.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestDataSetJoin.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -195,7 +195,7 @@ * identifier for the {@link BOp} within the context of its owning * query. */ - String BOP_ID = "bopId"; + String BOP_ID = BOp.class.getName()+".bopId"; /** * The timeout for the operator evaluation (milliseconds). @@ -210,7 +210,7 @@ * be interpreted with respect to the time when the query began to * execute. */ - String TIMEOUT = "timeout"; + String TIMEOUT = BOp.class.getName()+".timeout"; /** * The default timeout for operator evaluation. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BindingSetPipelineOp.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -48,9 +48,16 @@ /** * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of - * the ancestor in the operator tree which serves as an alternative sink - * for binding sets. + * the ancestor in the operator tree which serves as the default sink + * for binding sets (default is the parent). */ + String SINK_REF = BindingSetPipelineOp.class.getName() + ".sinkRef"; + + /** + * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of + * the ancestor in the operator tree which serves as the alternative + * sink for binding sets (default is no alternative sink). + */ String ALT_SINK_REF = BindingSetPipelineOp.class.getName() + ".altSinkRef"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/CopyBindingSetOp.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -41,14 +41,13 @@ import com.bigdata.relation.accesspath.IBlockingBuffer; /** - * This operator copies its source to its sink. It is used to feed the first - * join in the pipeline. The operator should have no children but may be - * decorated with annotations as necessary. + * This operator copies its source to its sink. Specializations exist which are + * used to feed the the initial set of intermediate results into a pipeline ( + * {@link StartOp}) and which are used to replicate intermediate results to more + * than one sink ({@link Tee}). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo unit tests. */ public class CopyBindingSetOp extends BindingSetPipelineOp { @@ -99,8 +98,10 @@ } public Void call() throws Exception { - final IAsynchronousIterator<IBindingSet[]> source = context.getSource(); + final IAsynchronousIterator<IBindingSet[]> source = context + .getSource(); final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + final IBlockingBuffer<IBindingSet[]> sink2 = context.getSink2(); try { final BOpStats stats = context.getStats(); while (source.hasNext()) { @@ -108,11 +109,17 @@ stats.chunksIn.increment(); stats.unitsIn.add(chunk.length); sink.add(chunk); + if (sink2 != null) + sink2.add(chunk); } sink.flush(); + if (sink2 != null) + sink2.flush(); return null; } finally { sink.close(); + if (sink2 != null) + sink2.close(); source.close(); } } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Tee.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Tee.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Tee.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,118 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 20, 2010 + */ + +package com.bigdata.bop.bset; + +import java.util.Map; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.rdf.rules.TMUtility; +import com.bigdata.relation.RelationFusedView; +import com.bigdata.relation.rule.Slice; + +/** + * TEE(op):[sinkRef=X; altSinkRef=Y] + * <p> + * Pipeline operator copies its source to both sink and altSink. The sink and + * the altSink must both be ancestors of the operator. The sinkRef MAY be + * omitted when one of the targets is the immediate parent of the TEE. + * Evaluation scope: {@link BOpEvaluationContext#ANY}. + * <p> + * <h2>Example - Truth Maintenance</h2> + * <p> + * In truth maintenance we establish a focus store which is brought to a fixed + * point by applying some rules and a transitive closure operator. Once the + * fixed point is reached, the assertions in the focus store are either inserted + * onto the database or (for retraction) removed from database unless a proof + * can be found that an assertion is still entailed. + * <p> + * The {@link Tee} operator can be used in truth maintenance to read on the + * UNION of the focus store and the database - see {@link TMUtility}. This is + * handled as the "union" of two JOINs using a {@link Tee} as follows: + * + * <pre> + * slice := SLICE( join2 )[bopId=3] + * join2 := JOIN( join1, bar.spo(A,loves,B))[bopId=2] + * join1 := JOIN( tee, foo.spo(A,loves,B))[bopId=1; sinkRef=3] + * tee := TEE( ... )[altSinkRef=2], + * </pre> + * + * The {@link Tee} copies its inputs to both the default sink (its parent, which + * is join1) and the alternate sink (join2). join1 routes its outputs around + * join2, sending them directly to their lowest common ancestor. This has the + * effect of creating a union of their outputs at the receiver. In this example, + * a {@link Slice} is used as the target for both of the join operators. Since + * this is a pipeline construction, the joins will be evaluated in parallel as + * intermediate results arrive for those operators. Normally the {@link Tee} + * will be fed by a {@link StartOp} or another {@link PipelineJoin}. + * + * @todo The union of access paths was historically handled by + * {@link RelationFusedView}. That class should be removed once queries + * are rewritten to use the union of joins. + * + * @todo The {@link TMUtility} will have to be updated to use this operator + * rather than specifying multiple source "names" for the relation of the + * predicate. + * + * @todo The FastClosureRuleTask will also need to be updated to use a + * {@link Union} over the joins rather than a {@link RelationFusedView}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class Tee extends CopyBindingSetOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /** + * Deep copy constructor. + * @param op + */ + public Tee(final Tee op) { + super(op); + } + + /** + * Shallow copy constructor. + * @param args + * @param annotations + */ + public Tee(BOp[] args, Map<String, Object> annotations) { + + super(args, annotations); + + getRequiredProperty(BindingSetPipelineOp.Annotations.ALT_SINK_REF); + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Tee.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/Union.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -31,6 +31,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; +import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; @@ -40,42 +41,29 @@ import com.bigdata.util.concurrent.Haltable; /** - * The union of two or more {@link BindingSetPipelineOp} operators. + * UNION(ops)[maxParallel(default all)] + * <p> + * Executes each of the operands in the union as a subqueries. Each subquery is + * run as a separate query but is linked to the parent query in which the UNION + * is being evaluated. The subqueries do not receive bindings from the parent + * and may be executed independently. By default, the subqueries are run with + * unlimited parallelism. + * <p> + * UNION is useful when independent queries are evaluated and their outputs are + * merged. Outputs from the UNION operator flow to the parent operator and will + * be mapped across shards or nodes as appropriate for the parent. UNION runs on + * the query controller. In order to avoid routing intermediate results through + * the controller, the {@link BindingSetPipelineOp.Annotations#SINK_REF} of each + * child operand should be overriden to specify the parent of the UNION + * operator. + * <p> + * UNION can not be used when the intermediate results must be routed into the + * subqueries. However, a {@link Tee} pattern may help in such cases. For + * example, a {@link Tee} may be used to create a union of pipeline joins for + * two access paths during truth maintenance. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo I have some basic questions about the ability to use a UNION of two - * predicates in scale-out. I think that this might be more accurately - * modeled as the UNION of two joins. That is, rather than: - * - * <pre> - * JOIN( ..., - * UNION( foo.spo(A,loves,B), - * bar.spo(A,loves,B) ) - * ) - * </pre> - * - * using - * - * <pre> - * UNION( JOIN( ..., foo.spo(A,loves,B) ), - * JOIN( ..., bar.spo(A,loves,B) ) - * ) - * </pre> - * - * which would be a binding set union rather than an element union. - * - * @todo The union of access paths was historically handled by - * {@link RelationFusedView}. That class should be removed once queries - * are rewritten to use the union of joins. - * - * @todo The {@link TMUtility} will have to be updated to use this operator - * rather than specifying multiple source "names" for the relation of the - * predicate. - * - * @todo The FastClosureRuleTask will also need to be updated to use a - * {@link Union} over the joins rather than a {@link RelationFusedView}. */ public class Union extends BindingSetPipelineOp { @@ -101,35 +89,35 @@ public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - return new FutureTask<Void>(new UnionTask(this, context)); - +// return new FutureTask<Void>(new UnionTask(this, context)); + throw new UnsupportedOperationException(); } - /** - * Pipeline union impl. - * - * FIXME All this does is copy its inputs to its outputs. Since we only run - * one chunk of input at a time, it seems that the easiest way to implement - * a union is to have the operators in the union just target the same sink. - */ - private static class UnionTask extends Haltable<Void> implements Callable<Void> { - - public UnionTask(// - final Union op,// - final BOpContext<IBindingSet> context - ) { - - if (op == null) - throw new IllegalArgumentException(); - if (context == null) - throw new IllegalArgumentException(); - } - - public Void call() throws Exception { - // TODO Auto-generated method stub - throw new UnsupportedOperationException(); - } - - } +// /** +// * Pipeline union impl. +// * +// * FIXME All this does is copy its inputs to its outputs. Since we only run +// * one chunk of input at a time, it seems that the easiest way to implement +// * a union is to have the operators in the union just target the same sink. +// */ +// private static class UnionTask extends Haltable<Void> implements Callable<Void> { +// +// public UnionTask(// +// final Union op,// +// final BOpContext<IBindingSet> context +// ) { +// +// if (op == null) +// throw new IllegalArgumentException(); +// if (context == null) +// throw new IllegalArgumentException(); +// } +// +// public Void call() throws Exception { +// // TODO Auto-generated method stub +// throw new UnsupportedOperationException(); +// } +// +// } } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,160 @@ +RunningQuery: + + * FIXME Raise this into an annotation that we can tweak from the unit + * tests and then debug the problem. + * + * FIXME Add an annotation or method to mark operators which must be + * evaluated using operator-at-a-time evaluation. SORT is the main + * example here (it must be operator at a time of necessity) but other + * operators may implemented with operator at a time assumptions. This + * might be on PipelineOp and could be trinary {Chunked,Blocked,All}. + +Note: Many of the maxParallel annotations related to thread consumption will go +away with Java7 and async file IO. Other annotations, such as the #of 1M buffers +to allocate to an operator, need to be introduced to handle high volume queries. + +Note: UNION, STEPS, and STAR(transitive closure) are all evaluated on the query +controller. + +--- +UNION(ops)[maxParallel(default all)] + +Executes each of the operands in the union as subqueries. Each subquery is run +as a separate RunningQuery but is linked to the parent query in which the UNION +is being evaluated. The subqueries do not receive bindings from the parent and +may be executed independently. + +--- +STEPS(ops)[maxParallel(default 1)] + +The operands are executed as independent subqueries. Unlike UNION, STEPS does +not copy its source binding sets. + +--- + +STAR(op) [maxItr(default all)] + +Evaluate the operand until its mutation count remains unchanged from one round +to the next. The operand must write on a resource. The fixed point is determined +by examining BOPStats.mutationCount. + +Do with INSERT/REMOVE since all involve mutation. + +--- +DataSetJoin([left,var])[graphs={graphIds}; maxParallel=50] + +SPARQL specific join binds var to each of the given graphIds values for each +source binding set. This join operator is useful when the multiplicity of the +graphIds set is modest (between 2 and ~5000). This differs from a pipeline join +by joining against inline data and by being more specialized (it lacks a pred). +An alternative would be to develop an inline access path and then specify a std +predicate which references the data in its annotation. That could then generalize +to a predicate which references persistent data, query or tx local data, or inline +data. However, the DataSetJoin is still far simpler since it just binds the var +and send out the asBound binding set and does not need to worry about internal +parallelism, alternative sinks, or chunking. + +Note: SPARQL default graph queries require us to apply a +distinct {s,p,o} filter to each default graph access path. For scale-out, that +is a distributed distinct access path filter. A DHT is used when the scale is +moderate. A distributed external merge sort SORT is used when the scale is very +large. + +Special cases exist for: + + - Whenever C is a constant, we are guaranteed that the SPO will be distinct and + do not need to apply a distributed distinct filter. + + - The SPOC access path can be optimized because we know that C is strictly + ascending. We can note the last observed {s,p,o} and skip to the next possible + o in the index (o:=o+1) using an advancer pattern (this could also just scan + until o changes). These are the possibly distinct {s,p,o} triples, which can + then be sent to the DHT unless we have a guarantee that S never crosses a + shard boundary (this is trivially true for standalone can this constraint can + be imposed on scale-out, but can cause problems if some subjects are very + highly referenced). + + - ? + +--- +INSERT(op,pred) : insert elements into an index. +DELETE(op,pred) : remove elements from an index. + +The access path mutation operators construct elements from the source binding +sets and the asBBound predicates. For each element so constructed, they insert/ +remove the corresponding element into/from the access path. These operators +update a mutation counter IFF the access path was modified for the constructed +element. STAR relies on the mutation operator to detect a fixed point. + +The mutation access paths need to use the appropriate concurrency control to +ensure the constraint on the mutable B+Tree is respected. This is either +the UnisolatedReadWriteIndex or the LockManager/ConcurrencyManager. + +The basic mutation operators write on an access path and may be combined using +STEPS in order to update all of the indices associated with a relation. + + - For incremental TM, we also need to construct an element for the just index + from the rule and assert it onto that index. + + - For the lexicon, we also need to write on the full text index. + + - For SIDs mode, we also need to capture the logic to ground the statements by + binding the SIDs. + + - triggers could be integrated here. perhaps events backed by a queue which + could be either restart safe or query local? + +---- +Parallel distributed closure : TBD. Review notes posted on trak. + +---- +done. TEE(op):[sinkRef=X; altSinkRef=Y] + +Pipeline operator copies its source to both sink and altSink. The sink and the +altSink must both be ancestors of the operator. The sinkRef MAY be omitted when +one of the targets is the immediate parent of the TEE. Evaluation scope: ANY. + +TM rules. JOIN of AP UNION is the same as the UNION of JOINs of the APs. This +gets translated into a pattern of routing in the pipeline such that the two JOINs +appear one after the other and the first join has its default sink _overridden_ +to reference the same target as the second join. This has the effect of creating +a union of their outputs at the receiver and the benefit that the JOINs run in +parallel. + +- We MUST also satisfy the requirement that the source binding sets are seen by +both joins. This can be done using an operator which copies its source binding +sets to both its default and alternative sinks. That would be an ANY scope op. + +This is basically an OR "pattern". + +---- +Lexicon joins - + +==== +Features: + + - operator-at-once evaluation. The operator is triggered once its possible + triggers are done. This is just an application of the same utility method + which we use to decide when a query is done. + + - subquery evaluation (linked parent to child). a subquery may be cancelled + by a slice without cancelling the parent. cancelling the parent terminates + all subqueries. whenever a query or subquery is terminated, we need to go + through its operator and query life cycle tear down methods (unit tests). + + - default graph access path using DHT. See DataSetJoin, which has some notes. + + - query and connection local resources: creating, destroying and using resources. + references to query local resources permit reuse of intermediate results across + different. + + CREATE FOO AS TEMP GRAPH ON LOCAL TEMP STORE SPO ONLY SHARED LEXICON + + - "thick" resources which can be sent along with the query or access either by + RMI or copied to the node where the query is running on demand. (This could + be just alternative access path instantiations which are selected by the query + optimizer or defaulted based on the amount of data to be moved to/from the + node if not specified.) + + - The predicate could have fromRevision/toRevision annotations which would be + used for fast computation of the delta between two historical commit points. Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,254 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 20, 2010 + */ + +package com.bigdata.bop.rdf.join; + +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.constraint.INConstraint; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IChunkAccessor; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.rdf.internal.IV; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; + +/** + * DataSetJoin(left,var)[graphs={graphIds}; maxParallel=50] + * <p> + * SPARQL specific join binds <i>var</i> to each of the given graphIds values + * for each source binding set. This join operator is useful when the + * multiplicity of the graphs is small to moderate. If there are a very large + * number of graphs, then the operator tree is to cumbersome and you would do + * better off joining against an index (whether temporary or permanent) + * containing the graphs. + * <p> + * The evaluation context is {@link BOpEvaluationContext#ANY}. + * + * @todo An alternative would be to develop an inline access path and then + * specify a standard predicate which references the data in its + * annotation. That could then generalize to a predicate which references + * persistent data, query or tx local data, or inline data. However, the + * DataSetJoin is still far simpler since it just binds the var and send + * out the asBound binding set and does not need to worry about internal + * parallelism, alternative sinks, or chunking. + * + * @todo SPARQL default graph queries require us to apply a distinct {s,p,o} + * filter to each default graph access path. For scale-out, that is a + * distributed distinct access path filter. A DHT is used when the scale + * is moderate. A distributed external merge sort SORT is used when the + * scale is very large. + * <p> + * Special cases exist for: + * <ul> + * + * <li>Whenever C is a constant, we are guaranteed that the SPO will be + * distinct and do not need to apply a distributed distinct filter.</li> + * <li> + * The SPOC access path can be optimized because we know that C is + * strictly ascending. We can note the last observed {s,p,o} and skip to + * the next possible o in the index (o:=o+1) using an advancer pattern + * (this could also just scan until o changes). These are the possibly + * distinct {s,p,o} triples, which can then be sent to the DHT unless we + * have a guarantee that S never crosses a shard boundary (this is + * trivially true for standalone can this constraint can be imposed on + * scale-out, but can cause problems if some subjects are very highly + * referenced).</li> + * <li> + * There will be some cases where we do better by doing a + * {@link PipelineJoin} and filtering using an {@link INConstraint}. + * However, this is probably only true for very small temporary graphs and + * in high volume scale-out joins where a cost analysis shows that it will + * be more efficient to read all the shards with the IN filter.</li> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class DataSetJoin extends BindingSetPipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends BindingSetPipelineOp.Annotations { + + /** + * The variable to be bound. + */ + String VAR = DataSetJoin.class.getName() + ".var"; + + /** + * The {@link IV}s to be bound. This is logically a set and SHOULD NOT + * include duplicates. The elements in this array SHOULD be ordered for + * improved efficiency. + */ + String GRAPHS = DataSetJoin.class.getName() + ".graphs"; + + } + + /** + * Deep copy constructor. + * + * @param op + */ + public DataSetJoin(DataSetJoin op) { + super(op); + } + + /** + * Shallow copy constructor. + * @param args + * @param annotations + */ + public DataSetJoin(BOp[] args, Map<String, Object> annotations) { + super(args, annotations); + getVar(); + getGraphs(); + } + + public IVariable<?> getVar() { + return (IVariable<?>)getRequiredProperty(Annotations.VAR); + } + + public IV[] getGraphs() { + return (IV[]) getRequiredProperty(Annotations.GRAPHS); + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new DataSetJoinTask(this,context)); + + } + + /** + * Copy the source to the sink. + * + * @todo Optimize this. When using an {@link IChunkAccessor} we should be + * able to directly output the same chunk. + */ + static private class DataSetJoinTask implements Callable<Void> { + + private final DataSetJoin op; + + private final BOpContext<IBindingSet> context; + + private final IVariable<?> var; + private final IV[] graphs; + + DataSetJoinTask(final DataSetJoin op, + final BOpContext<IBindingSet> context) { + + this.op = op; + + this.context = context; + + var = op.getVar(); + + graphs = op.getGraphs(); + + } + + /** + * FIXME unit tests. + */ + public Void call() throws Exception { + final IAsynchronousIterator<IBindingSet[]> source = context + .getSource(); + final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); + try { + final BOpStats stats = context.getStats(); + while (source.hasNext()) { + final IBindingSet[] chunk = source.next(); + stats.chunksIn.increment(); + stats.unitsIn.add(chunk.length); + handleChunk_(chunk, sink); + } + sink.flush(); + return null; + } finally { + sink.close(); + source.close(); + } + } + + /** + * Cross product join. For each source binding set and each graph, + * output one binding set in which the variable is bound to that graph. + * + * @param chunk + * A chunk of {@link IBindingSet}s from the source. + * @param sink + * Where to write the data. + * + * @todo Should we choose the nesting order of the loops based on the + * multiplicity of the source chunk size and the #of graphs to be + * bound? That way the inner loop decides the chunk size of the + * output. + * <p> + * Should we always emit an asBound source chunk for a given + * graphId? That will cluster better when the target predicate is + * mapped over CSPO. + */ + private void handleChunk_(final IBindingSet[] chunk, + final IBlockingBuffer<IBindingSet[]> sink) { + + final IBindingSet[] chunkOut = new IBindingSet[chunk.length + * graphs.length]; + + int n = 0; + + for (IBindingSet bset : chunk) { + + for (IV c : graphs) { + + bset = bset.clone(); + + bset.set(var, new Constant<IV>(c)); + + chunkOut[n++] = bset; + + } + + } + + sink.add(chunkOut); + + } + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/rdf/join/DataSetJoin.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestAll.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -92,6 +92,9 @@ // join operators. suite.addTest(com.bigdata.bop.join.TestAll.suite()); + // Specialized RDF join operators : @todo move to bigdata-rdf. + suite.addTest(com.bigdata.bop.rdf.join.TestAll.suite()); + // aggregation operators. suite.addTest(com.bigdata.bop.solutions.TestAll.suite()); Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bset/TestCopyBindingSets.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -176,5 +176,14 @@ assertEquals(1L, stats.chunksOut.get()); } + + /** + * {@link Tee} is just a specialized {@link CopyBindingSetOp} which requires + * that the alternate sink is also specified. Write a unit test of those + * semantics for {@link CopyBindingSetOp}. + */ + public void test_copyToSinkAndAltSink() { + fail("write test"); + } } Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/TestAll.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,70 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.rdf; + + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + + super(arg0); + + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("RDF operators"); + + suite.addTest(com.bigdata.bop.rdf.join.TestAll.suite()); + + return suite; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/TestAll.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestAll.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestAll.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,70 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2007. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +package com.bigdata.bop.rdf.join; + + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Aggregates test suites into increasing dependency order. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestAll extends TestCase { + + /** + * + */ + public TestAll() { + + } + + /** + * @param arg0 + */ + public TestAll(String arg0) { + + super(arg0); + + } + + /** + * Returns a test that will run each of the implementation specific test + * suites in turn. + */ + public static Test suite() + { + + final TestSuite suite = new TestSuite("RDF join operators"); + + suite.addTestSuite(TestDataSetJoin.class); + + return suite; + + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestAll.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestDataSetJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestDataSetJoin.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestDataSetJoin.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -0,0 +1,56 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 20, 2010 + */ + +package com.bigdata.bop.rdf.join; + +import junit.framework.TestCase2; + +/** + * Test {@link DataSetJoin} + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestDataSetJoin extends TestCase2 { + + /** + * + */ + public TestDataSetJoin() { + } + + /** + * @param name + */ + public TestDataSetJoin(String name) { + super(name); + } + + public void test_something() { + fail("write tests"); + } + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/rdf/join/TestDataSetJoin.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-20 19:43:44 UTC (rev 3598) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-20 20:14:28 UTC (rev 3599) @@ -121,6 +121,7 @@ * that go around the sail. */ cxn.flush();//commit(); + cxn.commit();// if (log.isInfoEnabled()) { log.info("\n" + sail.getDatabase().dumpStore()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-09-22 23:37:57
|
Revision: 3614 http://bigdata.svn.sourceforge.net/bigdata/?rev=3614&view=rev Author: mrpersonick Date: 2010-09-22 23:37:50 +0000 (Wed, 22 Sep 2010) Log Message: ----------- adding Sesame to BOp conversion Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-22 23:36:12 UTC (rev 3613) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-22 23:37:50 UTC (rev 3614) @@ -403,4 +403,13 @@ */ public int hashCode(); + /** + * Sets the {@link com.bigdata.bop.BOp.Annotations#BOP_ID} annotation. + * + * @param bopId + * The bop id. + * + * @return The newly annotated {@link IPredicate}. + */ + public IPredicate<E> setBOpId(int bopId); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-22 23:36:12 UTC (rev 3613) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-22 23:37:50 UTC (rev 3614) @@ -27,26 +27,34 @@ package com.bigdata.bop.engine; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.log4j.Logger; import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.BOpUtility; import com.bigdata.bop.BindingSetPipelineOp; +import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; -import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; -import com.bigdata.bop.Var; import com.bigdata.bop.ap.E; import com.bigdata.bop.ap.Predicate; -import com.bigdata.bop.bset.CopyBindingSetOp; import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.journal.ITx; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.relation.rule.IProgram; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.IStep; import com.bigdata.relation.rule.Program; -import com.bigdata.relation.rule.Rule; +import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; +import com.bigdata.relation.rule.eval.IRangeCountFactory; /** * Utility class converts {@link IRule}s to {@link BOp}s. @@ -63,6 +71,8 @@ */ public class Rule2BOpUtility { + protected static final Logger log = Logger.getLogger(Rule2BOpUtility.class); + /** * Convert an {@link IStep} into an operator tree. This should handle * {@link IRule}s and {@link IProgram}s as they are currently implemented @@ -73,12 +83,11 @@ * * @return */ - public static BindingSetPipelineOp convert(final IStep step, final int startId) { + public static BindingSetPipelineOp convert(final IStep step, + final int startId, final QueryEngine queryEngine) { - if (step instanceof Rule) - return convert((Rule) step, startId); - else if (step instanceof Program) - return convert((Program) step); + if (step instanceof IRule) + return convert((IRule) step, startId, queryEngine); throw new UnsupportedOperationException(); @@ -91,7 +100,8 @@ * * @return */ - public static BindingSetPipelineOp convert(final Rule rule, final int startId) { + public static BindingSetPipelineOp convert(final IRule rule, + final int startId, final QueryEngine queryEngine) { int bopId = startId; @@ -100,98 +110,119 @@ new NV(Predicate.Annotations.BOP_ID, bopId++),// })); - Iterator<Predicate> tails = rule.getTail(); + /* + * First put the tails in the correct order based on the logic in + * DefaultEvaluationPlan2. + */ + final BOpContextBase context = new BOpContextBase(queryEngine); + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred).getAccessPath(pred) + .rangeCount(false); + } + + }, rule); + + final int[] order = plan.getOrder(); + + /* + * Map the constraints from the variables they use. This way, we can + * properly attach constraints to only the first tail in which the + * variable appears. This way we only run the appropriate constraint + * once, instead of for every tail. + */ + final Map<IVariable<?>, Collection<IConstraint>> constraintsByVar = + new HashMap<IVariable<?>, Collection<IConstraint>>(); + for (int i = 0; i < rule.getConstraintCount(); i++) { + final IConstraint c = rule.getConstraint(i); + + if (log.isDebugEnabled()) { + log.debug(c); + } + + final Set<IVariable<?>> uniqueVars = new HashSet<IVariable<?>>(); + final Iterator<IVariable<?>> vars = BOpUtility.getSpannedVariables(c); + while (vars.hasNext()) { + final IVariable<?> v = vars.next(); + uniqueVars.add(v); + } + + for (IVariable<?> v : uniqueVars) { + + if (log.isDebugEnabled()) { + log.debug(v); + } + + Collection<IConstraint> constraints = constraintsByVar.get(v); + if (constraints == null) { + constraints = new LinkedList<IConstraint>(); + constraintsByVar.put(v, constraints); + } + constraints.add(c); + } + } + BindingSetPipelineOp left = startOp; - while (tails.hasNext()) { - + for (int i = 0; i < order.length; i++) { + final int joinId = bopId++; - final Predicate<?> pred = tails.next().setBOpId(bopId++); + // assign a bop id to the predicate + final IPredicate<?> pred = rule.getTail(order[i]).setBOpId(bopId++); - System.err.println(pred); + /* + * Collect all the constraints for this predicate based on which + * variables make their first appearance in this tail + */ + final Collection<IConstraint> constraints = + new LinkedList<IConstraint>(); + /* + * Peek through the predicate's args to find its variables. Use + * these to attach constraints to the join based on the variables + * that make their first appearance in this tail. + */ + for (BOp arg : pred.args()) { + if (arg instanceof IVariable) { + final IVariable<?> v = (IVariable) arg; + /* + * We do a remove because we don't ever need to run these + * constraints again during subsequent joins once they + * have been run once at the initial appearance of the + * variable. + * + * FIXME revisit this when we dynamically re-order running + * joins + */ + if (constraintsByVar.containsKey(v)) + constraints.addAll(constraintsByVar.remove(v)); + } + } + final BindingSetPipelineOp joinOp = new PipelineJoin<E>(// left, pred,// NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId),// + new NV(BOp.Annotations.BOP_ID, joinId),// + new NV(PipelineJoin.Annotations.CONSTRAINTS, + constraints.size() > 0 ? + constraints.toArray(new IConstraint[constraints.size()]) : null),// + new NV(PipelineJoin.Annotations.OPTIONAL, pred.isOptional()),// })); left = joinOp; } + // just for now while i'm debugging System.err.println(toString(left)); -// test_query_join2(); - return left; } - public static void test_query_join2() { - - final String namespace = "ns"; - final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; - final int joinId2 = 4; - final int predId2 = 5; - - final BindingSetPipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - })); - - final Predicate<?> pred1Op = new Predicate<E>(new IVariableOrConstant[] { - Var.var("x"), Var.var("y") }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.PARTITION_ID, - Integer.valueOf(-1)),// - new NV(Predicate.Annotations.OPTIONAL, - Boolean.FALSE),// - new NV(Predicate.Annotations.CONSTRAINT, null),// - new NV(Predicate.Annotations.EXPANDER, null),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>(new IVariableOrConstant[] { - Var.var("y"), Var.var("z") }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.PARTITION_ID, - Integer.valueOf(-1)),// - new NV(Predicate.Annotations.OPTIONAL, - Boolean.FALSE),// - new NV(Predicate.Annotations.CONSTRAINT, null),// - new NV(Predicate.Annotations.EXPANDER, null),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Predicate.Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final BindingSetPipelineOp join1Op = new PipelineJoin<E>(// - startOp, pred1Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - })); - - final BindingSetPipelineOp join2Op = new PipelineJoin<E>(// - join1Op, pred2Op,// - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - })); - - final BindingSetPipelineOp query = join2Op; - - System.err.println(toString(query)); - - } - private static String toString(BOp bop) { StringBuilder sb = new StringBuilder(); @@ -218,6 +249,13 @@ for (BOp arg : args) { toString(arg, sb, indent+4); } + IConstraint[] constraints = + bop.getProperty(PipelineJoin.Annotations.CONSTRAINTS); + if (constraints != null) { + for (IConstraint c : constraints) { + toString(c, sb, indent+4); + } + } } } @@ -228,6 +266,8 @@ * @param program * * @return + * + * FIXME What is the pattern for UNION? */ public static BindingSetPipelineOp convert(final Program program) { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-22 23:36:12 UTC (rev 3613) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-22 23:37:50 UTC (rev 3614) @@ -592,6 +592,11 @@ IStep query = createNativeQuery(join); if (query == null) { + + if (log.isDebugEnabled()) { + log.debug("query == null"); + } + return new EmptyIteration<BindingSet, QueryEvaluationException>(); } @@ -1522,8 +1527,12 @@ result = com.bigdata.bop.Var.var(name); } else { final IV iv = val.getIV(); - if (iv == null) + if (iv == null) { + if (log.isDebugEnabled()) { + log.debug("null IV: " + val); + } return null; + } result = new Constant<IV>(iv); } return result; @@ -1584,6 +1593,7 @@ if (log.isDebugEnabled()) { log.debug("var: " + var); log.debug("constant: " + constant); + log.debug("constant.getIV(): " + constant.getIV()); } if (var == null || constant == null || constant.getIV() == null) { if (log.isDebugEnabled()) { @@ -1644,15 +1654,16 @@ final IStep step) throws Exception { + final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); + final int startId = 1; - final BindingSetPipelineOp query = Rule2BOpUtility.convert(step, startId); + final BindingSetPipelineOp query = + Rule2BOpUtility.convert(step, startId, queryEngine); if (log.isInfoEnabled()) { log.info(query); } - final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); - final UUID queryId = UUID.randomUUID(); final RunningQuery runningQuery = queryEngine.eval(queryId, query, new LocalChunkMessage<IBindingSet>(queryEngine, queryId, Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-22 23:36:12 UTC (rev 3613) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestBOps.java 2010-09-22 23:37:50 UTC (rev 3614) @@ -29,6 +29,7 @@ import java.util.Collection; import java.util.LinkedList; import java.util.Properties; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.openrdf.model.Literal; import org.openrdf.model.URI; @@ -44,6 +45,7 @@ import org.openrdf.query.TupleQueryResult; import org.openrdf.query.impl.BindingImpl; import com.bigdata.rdf.axioms.NoAxioms; +import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.store.BD; import com.bigdata.rdf.vocab.NoVocabulary; @@ -137,27 +139,209 @@ "select * " + "WHERE { " + " ?s rdf:type ns:Person . " + - " ?s ns:likes ns:RDF . " + -// " ?s rdfs:label ?label . " + + " ?s ns:likes ?likes . " + + " ?s rdfs:label ?label . " + "}"; final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); TupleQueryResult result = tupleQuery.evaluate(); - while (result.hasNext()) { - System.err.println(result.next()); - } +// while (result.hasNext()) { +// System.err.println(result.next()); +// } Collection<BindingSet> solution = new LinkedList<BindingSet>(); solution.add(createBindingSet(new Binding[] { new BindingImpl("s", mike), -// new BindingImpl("likes", rdf), -// new BindingImpl("label", l1) + new BindingImpl("likes", rdf), + new BindingImpl("label", l1) })); solution.add(createBindingSet(new Binding[] { new BindingImpl("s", bryan), -// new BindingImpl("likes", rdf), + new BindingImpl("likes", rdf), + new BindingImpl("label", l2) + })); + + compare(result, solution); + + } + + } finally { + cxn.close(); + sail.__tearDownUnitTest(); + } + + } + + public void testSimpleConstraint() throws Exception { + + final BigdataSail sail = getSail(); + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final String ns = BD.NAMESPACE; + + URI jill = new URIImpl(ns+"Jill"); + URI jane = new URIImpl(ns+"Jane"); + URI person = new URIImpl(ns+"Person"); + URI age = new URIImpl(ns+"age"); + URI IQ = new URIImpl(ns+"IQ"); + Literal l1 = new LiteralImpl("Jill"); + Literal l2 = new LiteralImpl("Jane"); + Literal age1 = vf.createLiteral(20); + Literal age2 = vf.createLiteral(30); + Literal IQ1 = vf.createLiteral(130); + Literal IQ2 = vf.createLiteral(140); +/**/ + cxn.setNamespace("ns", ns); + + cxn.add(jill, RDF.TYPE, person); + cxn.add(jill, RDFS.LABEL, l1); + cxn.add(jill, age, age1); + cxn.add(jill, IQ, IQ1); + cxn.add(jane, RDF.TYPE, person); + cxn.add(jane, RDFS.LABEL, l2); + cxn.add(jane, age, age2); + cxn.add(jane, IQ, IQ2); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.flush();//commit(); + cxn.commit();// + + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select * " + + "WHERE { " + + " ?s rdf:type ns:Person . " + + " ?s ns:age ?age . " + + " ?s ns:IQ ?iq . " + + " ?s rdfs:label ?label . " + + " FILTER( ?age < 25 && ?iq > 125 ) . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + +// while (result.hasNext()) { +// System.err.println(result.next()); +// } + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", jill), + new BindingImpl("age", age1), + new BindingImpl("iq", IQ1), + new BindingImpl("label", l1) + })); + + compare(result, solution); + + } + + } finally { + cxn.close(); + sail.__tearDownUnitTest(); + } + + } + + public void testSimpleOptional() throws Exception { + + final BigdataSail sail = getSail(); + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final String ns = BD.NAMESPACE; + + URI mike = new URIImpl(ns+"Mike"); + URI bryan = new URIImpl(ns+"Bryan"); + URI person = new URIImpl(ns+"Person"); + URI likes = new URIImpl(ns+"likes"); + URI rdf = new URIImpl(ns+"RDF"); + Literal l1 = new LiteralImpl("Mike"); + Literal l2 = new LiteralImpl("Bryan"); +/**/ + cxn.setNamespace("ns", ns); + + cxn.add(mike, RDF.TYPE, person); + cxn.add(mike, likes, rdf); + cxn.add(mike, RDFS.LABEL, l1); + cxn.add(bryan, RDF.TYPE, person); + cxn.add(bryan, likes, rdf); +// cxn.add(bryan, RDFS.LABEL, l2); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.flush();//commit(); + cxn.commit();// + + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select * " + + "WHERE { " + + " ?s rdf:type ns:Person . " + + " ?s ns:likes ?likes . " + + " OPTIONAL { ?s rdfs:label ?label . } " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + +// while (result.hasNext()) { +// System.err.println(result.next()); +// } + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", mike), + new BindingImpl("likes", rdf), + new BindingImpl("label", l1) + })); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", bryan), + new BindingImpl("likes", rdf), // new BindingImpl("label", l2) })); @@ -172,4 +356,96 @@ } + public void testOrEquals() throws Exception { + + final BigdataSail sail = getSail(); + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final LexiconRelation lex = sail.getDatabase().getLexiconRelation(); + + final String ns = BD.NAMESPACE; + + URI mike = new URIImpl(ns+"Mike"); + URI bryan = new URIImpl(ns+"Bryan"); + URI martyn = new URIImpl(ns+"Martyn"); + URI person = new URIImpl(ns+"Person"); + URI p = new URIImpl(ns+"p"); + Literal l1 = new LiteralImpl("Mike"); + Literal l2 = new LiteralImpl("Bryan"); + Literal l3 = new LiteralImpl("Martyn"); +/**/ + cxn.setNamespace("ns", ns); + + cxn.add(mike, RDF.TYPE, person); + cxn.add(mike, RDFS.LABEL, l1); + cxn.add(bryan, RDF.TYPE, person); + cxn.add(bryan, RDFS.COMMENT, l2); + cxn.add(martyn, RDF.TYPE, person); + cxn.add(martyn, p, l3); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.flush();//commit(); + cxn.commit();// + + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { + + String query = + "PREFIX rdf: <"+RDF.NAMESPACE+"> " + + "PREFIX rdfs: <"+RDFS.NAMESPACE+"> " + + "PREFIX ns: <"+ns+"> " + + + "select * " + + "WHERE { " + + " ?s rdf:type ns:Person . " + + " ?s ?p ?label . " + + " FILTER ( ?p = rdfs:label || ?p = rdfs:comment ) . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + TupleQueryResult result = tupleQuery.evaluate(); + +// while (result.hasNext()) { +// System.err.println(result.next()); +// } + + Collection<BindingSet> solution = new LinkedList<BindingSet>(); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", mike), + new BindingImpl("p", RDFS.LABEL), + new BindingImpl("label", l1) + })); + solution.add(createBindingSet(new Binding[] { + new BindingImpl("s", bryan), + new BindingImpl("p", RDFS.COMMENT), + new BindingImpl("label", l2) + })); + + compare(result, solution); + + } + + } finally { + cxn.close(); + sail.__tearDownUnitTest(); + } + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-25 00:31:41
|
Revision: 3630 http://bigdata.svn.sourceforge.net/bigdata/?rev=3630&view=rev Author: thompsonbry Date: 2010-09-25 00:31:32 +0000 (Sat, 25 Sep 2010) Log Message: ----------- Added support for RMI access paths. All operators now assume "ANY" as their evaluation context. This means that you must explicitly override the evaluation context for scale-out JOINS in order to use shard-partitioned access paths versus remote access paths. The default of "ANY" is not valid for some operators. Such operators now check in their shallow copy constructor to verify that a legal evaluation context was explicitly set. At this point we have the tools to evaluate a default graph query in scale-out. The Rule2BOpUtility needs to be modified to become aware of standalone versus scale-out and to use the appropriate operator patterns for standalone and scale-out. The correct operator pattern depends on a number of factors. I will be working up a cost model and pulling the logic out of DefaultGraphSolutionExpander and NamedGraphSolutionExpander. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpEvaluationContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/StartOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ndx/AbstractSampleIndex.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/MemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/ap/TestPredicateAccessPath.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestPipelineUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestRunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestMemorySortOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/solutions/TestSliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-jini/src/test/com/bigdata/bop/fed/jini/TestJiniFederatedQueryEngine.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestRemoteAccessPath.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/AbstractChunkedOrderedIteratorOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -90,13 +90,13 @@ } - protected int getFullyBufferedReadThreshold() { +// protected int getFullyBufferedReadThreshold() { +// +// return getProperty(Annotations.FULLY_BUFFERED_READ_THRESHOLD, +// Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); +// +// } - return getProperty(Annotations.FULLY_BUFFERED_READ_THRESHOLD, - Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); - - } - protected long getChunkTimeout() { return getProperty(Annotations.CHUNK_TIMEOUT, Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -161,10 +161,8 @@ int getId(); /** - * Return the evaluation context for the operator. The default is - * {@link BOpEvaluationContext#ANY}. Operators which must be mapped against - * shards, mapped against nodes, or evaluated on the query controller must - * override this method. + * Return the evaluation context for the operator as specified by + * {@link Annotations#EVALUATION_CONTEXT}. */ BOpEvaluationContext getEvaluationContext(); @@ -245,6 +243,14 @@ String TIMESTAMP = BOp.class.getName() + ".timestamp"; /** + * This annotation determines where an operator will be evaluated + * (default {@value #DEFAULT_EVALUATION_CONTEXT}). + */ + String EVALUATION_CONTEXT = BOp.class.getName() + ".evaluationContext"; + + BOpEvaluationContext DEFAULT_EVALUATION_CONTEXT = BOpEvaluationContext.ANY; + + /** * For hash partitioned operators, this is the set of the member nodes * for the operator. * <p> Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -362,16 +362,11 @@ } - /** - * The default implementation returns {@link BOpEvaluationContext#ANY} and - * must be overridden by operators which have a different {@link BOpEvaluationContext}. - * <p> - * {@inheritDoc} - */ - public BOpEvaluationContext getEvaluationContext() { - - return BOpEvaluationContext.ANY; - + final public BOpEvaluationContext getEvaluationContext() { + + return getProperty(Annotations.EVALUATION_CONTEXT, + Annotations.DEFAULT_EVALUATION_CONTEXT); + } public final boolean isMutation() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -38,6 +38,7 @@ import com.bigdata.btree.IRangeQuery; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.TimestampUtility; +import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; @@ -50,7 +51,7 @@ import com.bigdata.striterator.IKeyOrder; /** - * The evaluation context for the operator (NOT serializable). + * Base class for the bigdata operation evaluation context (NOT serializable). * * @param <E> * The generic type of the objects processed by the operator. @@ -147,12 +148,13 @@ * order to support mutation operator we will also have to pass in the * {@link #writeTimestamp} or differentiate this in the method name. */ - public IRelation getRelation(final IPredicate<?> pred) { + @SuppressWarnings("unchecked") + public <E> IRelation<E> getRelation(final IPredicate<E> pred) { /* * Note: This uses the federation as the index manager when locating a - * resource for scale-out. However, s/o reads must use the local index - * manager when actually obtaining the index view for the relation. + * resource for scale-out since that let's us look up the relation in + * the global row store, which is being used as a catalog. */ final IIndexManager tmp = getFederation() == null ? getIndexManager() : getFederation(); @@ -160,7 +162,7 @@ final long timestamp = (Long) pred .getRequiredProperty(BOp.Annotations.TIMESTAMP); - return (IRelation<?>) tmp.getResourceLocator().locate( + return (IRelation<E>) tmp.getResourceLocator().locate( pred.getOnlyRelationName(), timestamp); } @@ -194,11 +196,25 @@ * Obtain an access path reading from relation for the specified predicate * (from the tail of some rule). * <p> - * Note that passing in the {@link IRelation} is important since it - * otherwise must be discovered using the {@link IResourceLocator}. By - * requiring the caller to resolve it before hand and pass it into this - * method the contention and demand on the {@link IResourceLocator} cache is - * reduced. + * Note: Passing in the {@link IRelation} is important since it otherwise + * must be discovered using the {@link IResourceLocator}. By requiring the + * caller to resolve it before hand and pass it into this method the + * contention and demand on the {@link IResourceLocator} cache is reduced. + * <p> + * <h2>Scale-Out</h2> + * <p> + * Note: You MUST be extremely careful when using expanders with a local + * access path for a shared-partitioned or hash-partitioned index. Only + * expanders whose semantics remain valid with a partial view of the index + * will behave as expected. Here are some examples that DO NOT work: + * <ul> + * <li>"DISTINCT" on a partitioned local access path is not coherent</li> + * <li>Expanders which generate reads against keys not found on that shard + * are not coherent.</li> + * </ul> + * If you have requirements such as these, then either use a remote access + * path or change your query plan design more radically to take advantage of + * efficient shard-wise scans in scale-out. * * @param relation * The relation. @@ -211,19 +227,39 @@ * * @todo replaces * {@link IJoinNexus#getTailAccessPath(IRelation, IPredicate)}. + * + * @todo Reconcile with IRelation#getAccessPath(IPredicate) once the bop + * conversion is done. It has much of the same logic (this also + * handles remote access paths now). + * + * @todo Support mutable relation views. */ - @SuppressWarnings("unchecked") - public IAccessPath<?> getAccessPath(final IRelation<?> relation, - final IPredicate<?> predicate) { +// @SuppressWarnings("unchecked") + public <E> IAccessPath<E> getAccessPath(final IRelation<E> relation, + final IPredicate<E> predicate) { if (relation == null) throw new IllegalArgumentException(); if (predicate == null) throw new IllegalArgumentException(); - // FIXME This should be as assigned by the query planner so the query is fully declarative. - final IKeyOrder keyOrder = relation.getKeyOrder((IPredicate) predicate); + /* + * FIXME This should be as assigned by the query planner so the query is + * fully declarative. + */ + final IKeyOrder<E> keyOrder; + { + final IKeyOrder<E> tmp = predicate.getKeyOrder(); + if (tmp != null) { + // use the specified index. + keyOrder = tmp; + } else { + // ask the relation for the best index. + keyOrder = relation.getKeyOrder(predicate); + } + } + if (keyOrder == null) throw new RuntimeException("No access path: " + predicate); @@ -233,26 +269,24 @@ .getRequiredProperty(BOp.Annotations.TIMESTAMP); final int flags = predicate.getProperty( - PipelineOp.Annotations.FLAGS, - PipelineOp.Annotations.DEFAULT_FLAGS) + IPredicate.Annotations.FLAGS, + IPredicate.Annotations.DEFAULT_FLAGS) | (TimestampUtility.isReadOnly(timestamp) ? IRangeQuery.READONLY : 0); final int chunkOfChunksCapacity = predicate.getProperty( - PipelineOp.Annotations.CHUNK_OF_CHUNKS_CAPACITY, - PipelineOp.Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); + BufferAnnotations.CHUNK_OF_CHUNKS_CAPACITY, + BufferAnnotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); final int chunkCapacity = predicate.getProperty( - PipelineOp.Annotations.CHUNK_CAPACITY, - PipelineOp.Annotations.DEFAULT_CHUNK_CAPACITY); + BufferAnnotations.CHUNK_CAPACITY, + BufferAnnotations.DEFAULT_CHUNK_CAPACITY); final int fullyBufferedReadThreshold = predicate.getProperty( - PipelineOp.Annotations.FULLY_BUFFERED_READ_THRESHOLD, - PipelineOp.Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); + IPredicate.Annotations.FULLY_BUFFERED_READ_THRESHOLD, + IPredicate.Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); - final IIndexManager indexManager = getIndexManager(); - - if (predicate.getPartitionId() != -1) { + if (partitionId != -1) { /* * Note: This handles a read against a local index partition. For @@ -269,12 +303,14 @@ // return ((AbstractRelation<?>) relation) // .getAccessPathForIndexPartition(indexManager, // (IPredicate) predicate); + /* - * @todo This condition should probably be an error since the expander - * will be ignored. + * @todo This is an error since expanders are currently ignored on + * shard-wise access paths. While it is possible to enable expanders + * for shard-wise access paths. */ -// if (predicate.getSolutionExpander() != null) -// throw new IllegalArgumentException(); + if (predicate.getSolutionExpander() != null) + throw new IllegalArgumentException(); final String namespace = relation.getNamespace();//predicate.getOnlyRelationName(); @@ -286,60 +322,70 @@ final ILocalBTreeView ndx = (ILocalBTreeView) indexManager .getIndex(name, timestamp); - return new AccessPath(relation, indexManager, timestamp, + return new AccessPath<E>(relation, indexManager, timestamp, predicate, keyOrder, ndx, flags, chunkOfChunksCapacity, chunkCapacity, fullyBufferedReadThreshold).init(); } - /* - * Find the best access path for the predicate for that relation. - * - * @todo Replace this with IRelation#getAccessPath(IPredicate) once the - * bop conversion is done. It is the same logic. - */ - IAccessPath accessPath; - { - // accessPath = relation.getAccessPath((IPredicate) predicate); - final IIndex ndx = relation.getIndex(keyOrder); + // Decide on a local or remote view of the index. + final IIndexManager indexManager; + if (predicate.isRemoteAccessPath()) { + // use federation in scale-out for a remote access path. + indexManager = fed != null ? fed : this.indexManager; + } else { + indexManager = this.indexManager; + } - if (ndx == null) { - - throw new IllegalArgumentException("no index? relation=" - + relation.getNamespace() + ", timestamp=" - + timestamp + ", keyOrder=" + keyOrder + ", pred=" - + predicate + ", indexManager=" + getIndexManager()); + // Obtain the index. + final String fqn = AbstractRelation.getFQN(relation, keyOrder); + final IIndex ndx = AbstractRelation.getIndex(indexManager, fqn, timestamp); - } + if (ndx == null) { - accessPath = new AccessPath((IRelation) relation, indexManager, - timestamp, (IPredicate) predicate, - (IKeyOrder) keyOrder, ndx, flags, chunkOfChunksCapacity, - chunkCapacity, fullyBufferedReadThreshold).init(); + throw new IllegalArgumentException("no index? relation=" + + relation.getNamespace() + ", timestamp=" + timestamp + + ", keyOrder=" + keyOrder + ", pred=" + predicate + + ", indexManager=" + getIndexManager()); } - - /* - * @todo No expander's for bops, at least not right now. They could be - * added in easily enough, which would support additional features for - * standalone query evaluation (runtime materialization of some - * entailments). - * - * FIXME temporarily enabled expanders (mikep) - */ - final ISolutionExpander<?> expander = predicate.getSolutionExpander(); - - if (expander != null) { - - // allow the predicate to wrap the access path - accessPath = expander.getAccessPath(accessPath); - - } - // return that access path. + // Obtain the access path for that relation and index. + final IAccessPath<E> accessPath = new AccessPath<E>( + relation, indexManager, timestamp, + predicate, keyOrder, ndx, flags, + chunkOfChunksCapacity, chunkCapacity, + fullyBufferedReadThreshold).init(); + + // optionally wrap with an expander pattern. + return expander(predicate, accessPath); + + } + + /** + * Optionally wrap with an expander pattern. + * + * @param predicate + * @param accessPath + * @return + * @param <E> + */ + private <E> IAccessPath<E> expander(final IPredicate<E> predicate, + final IAccessPath<E> accessPath) { + + final ISolutionExpander<E> expander = predicate.getSolutionExpander(); + + if (expander != null) { + + // allow the predicate to wrap the access path + return expander.getAccessPath(accessPath); + + } + return accessPath; + } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpEvaluationContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpEvaluationContext.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpEvaluationContext.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -22,7 +22,11 @@ * The operator may be evaluated anywhere, including piecewise evaluation on * any node of the cluster where its inputs are available. This is used for * operators which do not need to concentrate or coordinate their inputs - * such as {@link ConditionalRoutingOp}. + * such as {@link ConditionalRoutingOp}. It may also be used in combination + * with a remote access path to impose a DISTINCT filter across one or more + * shards or nodes. + * + * @see IPredicate.Annotations#REMOTE_ACCESS_PATH */ ANY, /** Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -0,0 +1,84 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Sep 24, 2010 + */ + +package com.bigdata.bop; + +import com.bigdata.relation.accesspath.BlockingBuffer; +import com.bigdata.relation.accesspath.IBuffer; + +/** + * Annotations for {@link BlockingBuffer} as used by various kinds of operators. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface BufferAnnotations { + + /** + * The maximum #of chunks that can be buffered before an the producer would + * block (default {@value #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). Note that + * partial chunks may be combined into full chunks whose nominal capacity is + * specified by {@link #CHUNK_CAPACITY}. + */ + String CHUNK_OF_CHUNKS_CAPACITY = BlockingBuffer.class.getName() + + ".chunkOfChunksCapacity"; + + /** + * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} + */ + int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; + + /** + * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk of + * {@link IBindingSet}s (default {@value #CHUNK_CAPACITY}). Partial chunks + * may be automatically combined into full chunks. + * + * @see #CHUNK_OF_CHUNKS_CAPACITY + */ + String CHUNK_CAPACITY = IBuffer.class.getName() + ".chunkCapacity"; + + /** + * Default for {@link #CHUNK_CAPACITY} + */ + int DEFAULT_CHUNK_CAPACITY = 100; + + /** + * The timeout in milliseconds that the {@link BlockingBuffer} will wait for + * another chunk to combine with the current chunk before returning the + * current chunk (default {@value #DEFAULT_CHUNK_TIMEOUT}). This may be ZERO + * (0) to disable the chunk combiner. + */ + String CHUNK_TIMEOUT = BlockingBuffer.class.getName() + ".chunkTimeout"; + + /** + * The default for {@link #CHUNK_TIMEOUT}. + * + * @todo this is probably much larger than we want. Try 10ms. + */ + int DEFAULT_CHUNK_TIMEOUT = 20; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BufferAnnotations.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ChunkedOrderedIteratorOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -1,17 +1,7 @@ package com.bigdata.bop; -import com.bigdata.btree.ILocalBTreeView; -import com.bigdata.journal.IIndexManager; -import com.bigdata.rawstore.Bytes; -import com.bigdata.relation.accesspath.AccessPath; -import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.accesspath.IBuffer; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.service.IBigdataFederation; -import com.bigdata.service.IDataService; import com.bigdata.striterator.IChunkedOrderedIterator; -import com.bigdata.striterator.ICloseableIterator; /** * Interface for evaluating operations producing chunks of elements (tuples @@ -25,96 +15,11 @@ public interface ChunkedOrderedIteratorOp<E> extends BOp { /** - * Well known annotations pertaining to the binding set pipeline. + * Well known annotations. */ - public interface Annotations extends BOp.Annotations { + public interface Annotations extends BOp.Annotations, BufferAnnotations { - /** - * The maximum #of chunks that can be buffered before an the producer - * would block (default {@value #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). - * Note that partial chunks may be combined into full chunks whose - * nominal capacity is specified by {@link #CHUNK_CAPACITY}. - */ - String CHUNK_OF_CHUNKS_CAPACITY = BlockingBuffer.class.getName() - + ".chunkOfChunksCapacity"; - /** - * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} - */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 1000; - - /** - * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk - * of {@link IBindingSet}s (default {@value #CHUNK_CAPACITY}). Partial - * chunks may be automatically combined into full chunks. - * - * @see #CHUNK_OF_CHUNKS_CAPACITY - */ - String CHUNK_CAPACITY = IBuffer.class.getName() + ".chunkCapacity"; - - /** - * Default for {@link #CHUNK_CAPACITY} - */ - int DEFAULT_CHUNK_CAPACITY = 100; - - /** - * The timeout in milliseconds that the {@link BlockingBuffer} will wait - * for another chunk to combine with the current chunk before returning - * the current chunk (default {@value #DEFAULT_CHUNK_TIMEOUT}). This may - * be ZERO (0) to disable the chunk combiner. - */ - String CHUNK_TIMEOUT = BlockingBuffer.class.getName() + ".chunkTimeout"; - - /** - * The default for {@link #CHUNK_TIMEOUT}. - * - * @todo this is probably much larger than we want. Try 10ms. - */ - int DEFAULT_CHUNK_TIMEOUT = 1000; - - /** - * If the estimated rangeCount for an - * {@link AccessPath#iterator()} is LTE this threshold then use - * a fully buffered (synchronous) iterator. Otherwise use an - * asynchronous iterator whose capacity is governed by - * {@link #CHUNK_OF_CHUNKS_CAPACITY}. - */ - String FULLY_BUFFERED_READ_THRESHOLD = AccessPath.class - .getName() - + ".fullyBufferedReadThreadshold"; - - /** - * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD} - */ - int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 20*Bytes.kilobyte32; - } - /** - * Execute the operator, returning an iterator from which the element may be - * read. Operator evaluation may be halted using - * {@link ICloseableIterator#close()}. - * - * @param fed - * The {@link IBigdataFederation} IFF the operator is being - * evaluated on an {@link IBigdataFederation}. When evaluating - * operations against an {@link IBigdataFederation}, this - * reference provides access to the scale-out view of the indices - * and to other bigdata services. - * @param joinNexus - * An evaluation context with hooks for the <em>local</em> - * execution environment. When evaluating operators against an - * {@link IBigdataFederation} the {@link IJoinNexus} MUST be - * formulated with the {@link IIndexManager} of the local - * {@link IDataService} order perform efficient reads against the - * shards views as {@link ILocalBTreeView}s. It is an error if - * the {@link IJoinNexus#getIndexManager()} returns the - * {@link IBigdataFederation} since each read would use RMI. This - * condition should be checked by the operator implementation. - * - * @return An iterator from which the elements may be read. - */ - IChunkedOrderedIterator<E> eval(IBigdataFederation<?> fed, - IJoinNexus joinNexus); - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IPredicate.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -30,6 +30,7 @@ import java.io.Serializable; +import com.bigdata.btree.IRangeQuery; import com.bigdata.mdi.PartitionLocator; import com.bigdata.relation.IMutableRelation; import com.bigdata.relation.IRelation; @@ -61,7 +62,7 @@ /** * Interface declaring well known annotations. */ - public interface Annotations extends BOp.Annotations { + public interface Annotations extends BOp.Annotations, BufferAnnotations { /** * The name of the relation on which the predicate will read. @@ -104,6 +105,85 @@ * not address a specific shard. */ String PARTITION_ID = "partitionId"; + + int DEFAULT_PARTITION_ID = -1; + + /** + * Boolean option determines whether the predicate will use a local + * access path or a remote access path (default + * {@value #DEFAULT_REMOTE_ACCESS_PATH}). + * <p> + * <em>Local access paths</em> are much more efficient and should be + * used for most purposes. However, it is not possible to impose certain + * kinds of filters on a sharded or hash partitioned operations across + * local access paths. In particular, a DISTINCT filter can not be + * imposed using sharded or hash partitioned. + * <p> + * When the access path is local, the parent operator must be annotated + * to use a {@link BOpEvaluationContext#SHARDED shard wise} or + * {@link BOpEvaluationContext#HASHED node-wise} mapping of the binding + * sets. + * <p> + * <em>Remote access paths</em> use a scale-out index view. This view + * makes the scale-out index appear as if it were monolithic rather than + * sharded or hash partitioned. The monolithic view of a scale-out index + * can be used to impose a DISTINCT filter since all tuples will flow + * back to the caller. + * <p> + * When the access path is remote, the parent operator should use + * {@link BOpEvaluationContext#ANY} to prevent the binding sets from + * being moved around when the access path is remote. + * + * @see BOpEvaluationContext + */ + String REMOTE_ACCESS_PATH = "remoteAccessPath"; + + boolean DEFAULT_REMOTE_ACCESS_PATH = false; + + /** + * If the estimated rangeCount for an {@link AccessPath#iterator()} is + * LTE this threshold then use a fully buffered (synchronous) iterator. + * Otherwise use an asynchronous iterator whose capacity is governed by + * {@link #CHUNK_OF_CHUNKS_CAPACITY}. + * + * @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD + */ + String FULLY_BUFFERED_READ_THRESHOLD = PipelineOp.class.getName() + + ".fullyBufferedReadThreshold"; + + /** + * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}. + * + * @todo Experiment with this. It should probably be something close to + * the branching factor, e.g., 100. + */ + int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100; + + /** + * Flags for the iterator ({@link IRangeQuery#KEYS}, + * {@link IRangeQuery#VALS}, {@link IRangeQuery#PARALLEL}). + * <p> + * Note: The {@link IRangeQuery#PARALLEL} flag here is an indication + * that the iterator may run in parallel across the index partitions. + * This only effects scale-out and only for simple triple patterns since + * the pipeline join does something different (it runs inside the index + * partition using the local index, not the client's view of a + * distributed index). + * + * @see #DEFAULT_FLAGS + */ + String FLAGS = PipelineOp.class.getName() + ".flags"; + + /** + * The default flags will visit the keys and values of the non-deleted + * tuples and allows parallelism in the iterator (when supported). + * + * @todo consider making parallelism something that the query planner + * must specify explicitly. + */ + final int DEFAULT_FLAGS = IRangeQuery.KEYS | IRangeQuery.VALS + | IRangeQuery.PARALLEL; + } /** @@ -275,6 +355,13 @@ public int getVariableCount(IKeyOrder<E> keyOrder); /** + * Return <code>true</code> if this is a remote access path. + * + * @see Annotations#REMOTE_ACCESS_PATH + */ + public boolean isRemoteAccessPath(); + + /** * Return the variable or constant at the specified index. * * @param index Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -30,16 +30,10 @@ import java.util.Map; import java.util.concurrent.TimeUnit; -import org.apache.log4j.Level; -import org.apache.log4j.Priority; - import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.btree.IRangeQuery; -import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IBlockingBuffer; -import com.bigdata.relation.accesspath.IBuffer; /** * An pipeline operator reads from a source and writes on a sink. This is an @@ -62,103 +56,8 @@ /** * Well known annotations pertaining to the binding set pipeline. */ - public interface Annotations extends BOp.Annotations { + public interface Annotations extends BOp.Annotations, BufferAnnotations { - /** - * The maximum #of chunks that can be buffered before an the producer - * would block (default {@value #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). - * Note that partial chunks may be combined into full chunks whose - * nominal capacity is specified by {@link #CHUNK_CAPACITY}. - * - * @see #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY - */ - String CHUNK_OF_CHUNKS_CAPACITY = PipelineOp.class.getName() - + ".chunkOfChunksCapacity"; - - /** - * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} - * - * @todo was 100. dialed down to reduce heap consumption for arrays. - * test performance @ 100 and 1000. - */ - int DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = 100; - - /** - * Sets the capacity of the {@link IBuffer}s used to accumulate a chunk - * of {@link IBindingSet}s (default {@value #CHUNK_CAPACITY}). Partial - * chunks may be automatically combined into full chunks. - * - * @see #DEFAULT_CHUNK_CAPACITY - * @see #CHUNK_OF_CHUNKS_CAPACITY - */ - String CHUNK_CAPACITY = PipelineOp.class.getName() + ".chunkCapacity"; - - /** - * Default for {@link #CHUNK_CAPACITY} - */ - int DEFAULT_CHUNK_CAPACITY = 100; - - /** - * The timeout in milliseconds that the {@link BlockingBuffer} will wait - * for another chunk to combine with the current chunk before returning - * the current chunk (default {@value #DEFAULT_CHUNK_TIMEOUT}). This may - * be ZERO (0) to disable the chunk combiner. - * - * @see #DEFAULT_CHUNK_TIMEOUT - */ - String CHUNK_TIMEOUT = PipelineOp.class.getName() + ".chunkTimeout"; - - /** - * The default for {@link #CHUNK_TIMEOUT}. - * - * @todo Experiment with values for this. Low values will push chunks - * through quickly. High values will cause chunks to be combined - * and move larger chunks around. [But if we factor BlockingBuffer - * out of the query engine then this will go away]. - */ - int DEFAULT_CHUNK_TIMEOUT = 20; - - /** - * If the estimated rangeCount for an {@link AccessPath#iterator()} is - * LTE this threshold then use a fully buffered (synchronous) iterator. - * Otherwise use an asynchronous iterator whose capacity is governed by - * {@link #CHUNK_OF_CHUNKS_CAPACITY}. - * - * @see #DEFAULT_FULLY_BUFFERED_READ_THRESHOLD - */ - String FULLY_BUFFERED_READ_THRESHOLD = PipelineOp.class.getName() - + ".fullyBufferedReadThreshold"; - - /** - * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD}. - * - * @todo Experiment with this. It should probably be something close to - * the branching factor, e.g., 100. - */ - int DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = 100; - - /** - * Flags for the iterator ({@link IRangeQuery#KEYS}, - * {@link IRangeQuery#VALS}, {@link IRangeQuery#PARALLEL}). - * <p> - * Note: The {@link IRangeQuery#PARALLEL} flag here is an indication - * that the iterator may run in parallel across the index partitions. - * This only effects scale-out and only for simple triple patterns since - * the pipeline join does something different (it runs inside the index - * partition using the local index, not the client's view of a - * distributed index). - * - * @see #DEFAULT_FLAGS - */ - String FLAGS = PipelineOp.class.getName() + ".flags"; - - /** - * The default flags will visit the keys and values of the non-deleted - * tuples and allows parallelism in the iterator (when supported). - */ - final int DEFAULT_FLAGS = IRangeQuery.KEYS | IRangeQuery.VALS - | IRangeQuery.PARALLEL; - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/ap/Predicate.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -42,14 +42,9 @@ import com.bigdata.bop.IVariable; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.bop.NV; -import com.bigdata.btree.IRangeQuery; import com.bigdata.journal.ITx; -import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.ISolutionExpander; -import com.bigdata.relation.rule.eval.IJoinNexus; -import com.bigdata.service.IBigdataFederation; -import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; /** @@ -183,8 +178,9 @@ public int getPartitionId() { - return (Integer)annotations.get(Annotations.PARTITION_ID); - + return (Integer) getProperty(Annotations.PARTITION_ID, + Annotations.DEFAULT_PARTITION_ID); + } @SuppressWarnings("unchecked") @@ -256,6 +252,11 @@ } return nunbound; } + + final public boolean isRemoteAccessPath() { + return getProperty(Annotations.REMOTE_ACCESS_PATH, + Annotations.DEFAULT_REMOTE_ACCESS_PATH); + } public Predicate<E> asBound(final IBindingSet bindingSet) { @@ -493,27 +494,4 @@ */ private int hash = 0; - /** - * @todo This does not allow us to override the iterator behavior based on - * the annotations. It also provides expander logic for scaleup and - * handles reading on a shard. It ignores the {@link IKeyOrder} - * associated with the {@link IPredicate} and there is no way to - * specify the {@link IRangeQuery} flags. - */ - @SuppressWarnings("unchecked") - public IChunkedOrderedIterator<E> eval(final IBigdataFederation<?> fed, - final IJoinNexus joinNexus) { - - // Resolve the relation name to the IRelation object. - final IRelation<E> relation = (IRelation<E>) joinNexus - .getTailRelationView(this/* predicate */); - - if (relation == null) - throw new RuntimeException("Not found: " + getOnlyRelationName()); - - return joinNexus.getTailAccessPath(relation, this/* predicate */) - .iterator(); - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/StartOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/StartOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bset/StartOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -3,7 +3,6 @@ import java.util.Map; import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpEvaluationContext; /** * A version of {@link CopyBindingSetOp} which is always evaluated on the query @@ -21,11 +20,18 @@ } public StartOp(BOp[] args, Map<String, Object> annotations) { - super(args, annotations); + + super(args, annotations); + + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } + } - final public BOpEvaluationContext getEvaluationContext() { - return BOpEvaluationContext.CONTROLLER; - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -38,6 +38,7 @@ import org.apache.log4j.Logger; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContextBase; +import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IConstraint; @@ -48,6 +49,7 @@ import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.relation.rule.IProgram; import com.bigdata.relation.rule.IRule; @@ -108,6 +110,8 @@ final BindingSetPipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// new NV(Predicate.Annotations.BOP_ID, bopId++),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// })); /* @@ -210,6 +214,9 @@ constraints.size() > 0 ? constraints.toArray(new IConstraint[constraints.size()]) : null),// new NV(PipelineJoin.Annotations.OPTIONAL, pred.isOptional()),// + // Note: shard-partitioned joins! + new NV( Predicate.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.SHARDED),// })); left = joinOp; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/notes.txt 2010-09-25 00:31:32 UTC (rev 3630) @@ -1,15 +1,37 @@ RunningQuery: -- FIXME Raise this into an annotation that we can tweak from the unit - tests and then debug the problem. [Write unit tests at the - RunState level.] - -- FIXME Add an annotation or method to mark operators which must be - evaluated using operator-at-a-time evaluation. SORT is the - main example here (it must be operator at a time of necessity) - but other operators may implemented with operator at a time - assumptions. Add a unit tests for sort on the query engine. + - TestJiniFederatedQueryEngine. + - Join=ANY, Predicate=RMI. + + - Unit tests of the default and named graph access path patterns. + + - Cost model for the default graph and named graph access path + patterns so we can choose the right one for each query. + + - Subqueries {Union,Steps,Star}. Implement subquery support. (We + can't test Star until we have the mutation API in place.) + + - PipelineType {Vectored,OneShot}. + + A vectored operator processes its inputs in chunks, producing + output chunks each time it runs. + + An one shot operator runs exactly once for a given query and + must wait for all of its inputs to become available before it + can begin. For example, SORT is a one shot operator. + + - Mutation {Insert, Delete}. Concurrency control. Scale-out + mutation operators must go through the ConcurrencyManager in order + to respect the isolation levels imposed within AbstractTask. For + standalone, we can using either UnisolatedReadWriteIndex or the + ConcurrencyManager as appropriate (but how can we tell which is + appropriate!?!). + + - Mutation {Create, Destroy}. This gets into resource management, + so defer for the moment but tackle in the context of RDFS closure + using STAR. + - MemoryType {Chunked,Blocked}. Blocked operators need to inherit some interface which @@ -56,16 +78,6 @@ */ Blocked, - - - PipelineType {Vectored,OneShot}. - - A vectored operator processes its inputs in chunks, producing - output chunks each time it runs. - - An one shot operator runs exactly once for a given query and - must wait for all of its inputs to become available before it - can begin. For example, SORT is a one shot operator. - Note: Many of the maxParallel annotations related to thread consumption will go away with Java7 and async file IO. Other annotations, such as the #of 1M buffers to allocate to an operator, @@ -210,8 +222,12 @@ unbound and then applies an IN filter. This works better because we can handle the reads on the SPOC index with C unbound very efficiently in scale-out by using a multi-block iterator on the - index segments. + index segments. [However, we must still impose DISTINCT on the + access path.] + - For very high volume operations we could do distributed merge + sorts to impose distinct and do operator at a time processing. + - @todo Add annotation to Predicate to indicate the use of an RMI access path in scale-out. Modify PipelineJoin such that it can be used as an ANY operator -or- a SHARDED operator. For default graph @@ -318,9 +334,6 @@ ==== Features: - - (***) Fix termination problems in RunningQuery relating to binding - set chunk / chunk message multiplicity. - - operator-at-once evaluation. The operator is triggered once its possible triggers are done. This is just an application of the same utility method which we use to decide when a query is done. @@ -350,7 +363,6 @@ used for fast computation of the delta between two historical commit points. - * FIXME Unit tests for non-distinct {@link IElementFilter}s on an * {@link IPredicate}, unit tests for distinct element filter on an * {@link IPredicate} which is capable of distributed operations. Do not use Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/eval/JoinGraph.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -34,9 +34,7 @@ import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; -import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; @@ -189,6 +187,15 @@ if (sampleSize <= 0) throw new IllegalArgumentException(); + switch (getEvaluationContext()) { + case CONTROLLER: + break; + default: + throw new UnsupportedOperationException( + Annotations.EVALUATION_CONTEXT + "=" + + getEvaluationContext()); + } + V = new Vertex[v.length]; for (int i = 0; i < v.length; i++) { @@ -263,14 +270,4 @@ } - /** - * This operator must be evaluated on the query controller. - */ - @Override - public BOpEvaluationContext getEvaluationContext() { - - return BOpEvaluationContext.CONTROLLER; - - } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -87,6 +87,12 @@ * Note: In order to support pipelining, query plans need to be arranged in a * "left-deep" manner and there may not be intervening operators between the * pipeline join operator and the {@link IPredicate} on which it will read. + * <p> + * Note: In scale-out, the {@link PipelineJoin} is generally annotated as a + * {@link BOpEvaluationContext#SHARDED} or {@link BOpEvaluationContext#HASHED} + * operator and the {@link IPredicate} is annotated for local access paths. If + * you need to use remote access paths, then the {@link PipelineJoin} should be + * annotated as a {@link BOpEvaluationContext#ANY} operator. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -278,15 +284,15 @@ } - /** - * Returns {@link BOpEvaluationContext#SHARDED} - */ - @Override - final public BOpEvaluationContext getEvaluationContext() { - - return BOpEvaluationContext.SHARDED; - - } + // /** + // * Returns {@link BOpEvaluationContext#SHARDED} + // */ + // @Override + // final public BOpEvaluationContext getEvaluationContext() { + // + // return BOpEvaluationContext.SHARDED; + // + // } public IPredicate<E> getPredicate() { @@ -339,14 +345,14 @@ public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { - return new FutureTask<Void>(new JoinTask(this, context)); + return new FutureTask<Void>(new JoinTask<E>(this, context)); } /** * Pipeline join impl. */ - private static class JoinTask extends Haltable<Void> implements Callable<Void> { + private static class JoinTask<E> extends Haltable<Void> implements Callable<Void> { /** * The join that is being executed. @@ -394,12 +400,12 @@ /** * The source for the elements to be joined. */ - final private IPredicate<?> right; + final private IPredicate<E> right; /** * The relation associated with the {@link #right} operand. */ - final private IRelation<?> relation; + final private IRelation<E> relation; /** * The partition identifier -or- <code>-1</code> if we are not reading @@ -476,7 +482,7 @@ * @param context */ public JoinTask(// - final PipelineJoin<?> joinOp,// + final PipelineJoin<E> joinOp,// final BOpContext<IBindingSet> context ) { @@ -851,7 +857,7 @@ * Aggregate the source bindingSets that license the same * asBound predicate. */ - final Map<IPredicate<?>, Collection<IBindingSet>> map = combineBindingSets(chunk); + final Map<IPredicate<E>, Collection<IBindingSet>> map = combineBindingSets(chunk); /* * Generate an AccessPathTask from each distinct asBound @@ -898,7 +904,7 @@ final IBindingSet bindingSet = chunk[0]; // constrain the predicate to the given bindings. - IPredicate<?> predicate = right.asBound(bindingSet); + IPredicate<E> predicate = right.asBound(bindingSet); if (partitionId != -1) { @@ -917,7 +923,8 @@ } - new AccessPathTask(predicate,Arrays.asList(chunk)).call(); + new JoinTask.AccessPathTask(predicate, Arrays.asList(chunk)) + .call(); } @@ -937,13 +944,13 @@ * bindingSets in the chunk from which the predicate was * generated. */ - protected Map<IPredicate<?>, Collection<IBindingSet>> combineBindingSets( + protected Map<IPredicate<E>, Collection<IBindingSet>> combineBindingSets( final IBindingSet[] chunk) { if (log.isDebugEnabled()) log.debug("chunkSize=" + chunk.length); - final Map<IPredicate<?>, Collection<IBindingSet>> map = new LinkedHashMap<IPredicate<?>, Collection<IBindingSet>>( + final Map<IPredicate<E>, Collection<IBindingSet>> map = new LinkedHashMap<IPredicate<E>, Collection<IBindingSet>>( chunk.length); for (IBindingSet bindingSet : chunk) { @@ -951,7 +958,7 @@ halted(); // constrain the predicate to the given bindings. - IPredicate<?> predicate = right.asBound(bindingSet); + IPredicate<E> predicate = right.asBound(bindingSet); if (partitionId != -1) { @@ -1025,16 +1032,16 @@ * @throws Exception */ protected AccessPathTask[] getAccessPathTasks( - final Map<IPredicate<?>, Collection<IBindingSet>> map) { + final Map<IPredicate<E>, Collection<IBindingSet>> map) { final int n = map.size(); if (log.isDebugEnabled()) log.debug("#distinct predicates=" + n); - final AccessPathTask[] tasks = new AccessPathTask[n]; + final AccessPathTask[] tasks = new JoinTask.AccessPathTask[n]; - final Iterator<Map.Entry<IPredicate<?>, Collection<IBindingSet>>> itr = map + final Iterator<Map.Entry<IPredicate<E>, Collection<IBindingSet>>> itr = map .entrySet().iterator(); int i = 0; @@ -1043,7 +1050,7 @@ halted(); - final Map.Entry<IPredicate<?>, Collection<IBindingSet>> entry = itr + final Map.Entry<IPredicate<E>, Collection<IBindingSet>> entry = itr .next(); tasks[i++] = new AccessPathTask(entry.getKey(), entry @@ -1203,7 +1210,7 @@ * {@link IPredicate} for this join dimension. The asBound * {@link IPredicate} is {@link IAccessPath#getPredicate()}. */ - final private IAccessPath<?> accessPath; + final private IAccessPath<E> accessPath; /** * Return the <em>fromKey</em> for the {@link IAccessPath} generated @@ -1215,7 +1222,7 @@ */ protected byte[] getFromKey() { - return ((AccessPath<?>) accessPath).getFromKey(); + return ((AccessPath<E>) accessPath).getFromKey(); } @@ -1239,7 +1246,7 @@ if (this == o) return true; - if (!(o instanceof AccessPathTask)) + if (!(o instanceof JoinTask.AccessPathTask)) return false; return accessPath.getPredicate().equals( @@ -1262,7 +1269,7 @@ * join dimension that all result in the same asBound * {@link IPredicate}. */ - public AccessPathTask(final IPredicate<?> predicate, + public AccessPathTask(final IPredicate<E> predicate, final Collection<IBindingSet> bindingSets) { if (predicate == null) Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-24 19:37:50 UTC (rev 3629) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/mutation/InsertOp.java 2010-09-25 00:31:32 UTC (rev 3630) @@ -33,7 +33,6 @@ import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; -import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IPredicate; @@ -44,7 +43,6 @@ import com.bigdata.btree.UnisolatedReadWriteIndex; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.journal.IIndexManager; -import com.bigdata.journal.ITx; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.relation.accesspath.IBlockingBuffer; @@ -293,6 +291,9 @@ * * FIXME This must obtain the appropriate lock for the mutable * index in scale-out. + * + * FIXME Allow remote writes as well if a remote access path is + * marked on the {@link IPredicate}. */ public <T> ILocalBTreeView getMu... [truncated message content] |
From: <tho...@us...> - 2010-09-28 09:48:09
|
Revision: 3647 http://bigdata.svn.sourceforge.net/bigdata/?rev=3647&view=rev Author: thompsonbry Date: 2010-09-28 09:48:02 +0000 (Tue, 28 Sep 2010) Log Message: ----------- Removed references to the test class "E" and to MockRunningQuery from the main code. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-28 00:25:49 UTC (rev 3646) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/Rule2BOpUtility.java 2010-09-28 09:48:02 UTC (rev 3647) @@ -45,7 +45,6 @@ import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; -import com.bigdata.bop.ap.E; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.join.PipelineJoin; @@ -206,7 +205,7 @@ } } - final BindingSetPipelineOp joinOp = new PipelineJoin<E>(// + final BindingSetPipelineOp joinOp = new PipelineJoin(// left, pred,// NV.asMap(new NV[] {// new NV(BOp.Annotations.BOP_ID, joinId),// @@ -256,8 +255,8 @@ for (BOp arg : args) { toString(arg, sb, indent+4); } - IConstraint[] constraints = - bop.getProperty(PipelineJoin.Annotations.CONSTRAINTS); + IConstraint[] constraints = (IConstraint[]) bop + .getProperty(PipelineJoin.Annotations.CONSTRAINTS); if (constraints != null) { for (IConstraint c : constraints) { toString(c, sb, indent+4); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-28 00:25:49 UTC (rev 3646) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2010-09-28 09:48:02 UTC (rev 3647) @@ -2,6 +2,7 @@ import info.aduna.iteration.CloseableIteration; import info.aduna.iteration.EmptyIteration; + import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -15,6 +16,7 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; + import org.apache.log4j.Logger; import org.openrdf.model.Literal; import org.openrdf.model.URI; @@ -48,8 +50,8 @@ import org.openrdf.query.algebra.evaluation.impl.EvaluationStrategyImpl; import org.openrdf.query.algebra.evaluation.iterator.FilterIterator; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + import com.bigdata.BigdataStatics; -import com.bigdata.bop.BOpContext; import com.bigdata.bop.BindingSetPipelineOp; import com.bigdata.bop.Constant; import com.bigdata.bop.HashBindingSet; @@ -65,9 +67,7 @@ import com.bigdata.bop.constraint.NE; import com.bigdata.bop.constraint.NEConstant; import com.bigdata.bop.constraint.OR; -import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.LocalChunkMessage; -import com.bigdata.bop.engine.MockRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.engine.Rule2BOpUtility; import com.bigdata.bop.engine.RunningQuery; @@ -97,7 +97,6 @@ import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.IBlockingBuffer; import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.accesspath.ThickAsynchronousIterator; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-09-29 20:59:31
|
Revision: 3686 http://bigdata.svn.sourceforge.net/bigdata/?rev=3686&view=rev Author: thompsonbry Date: 2010-09-29 20:59:24 +0000 (Wed, 29 Sep 2010) Log Message: ----------- Javadoc and dropped unused class. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/DefaultSolutionExpander.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-29 19:55:30 UTC (rev 3685) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/AbstractRelation.java 2010-09-29 20:59:24 UTC (rev 3686) @@ -235,9 +235,11 @@ } /** - * Core impl. + * Core implementation. This makes it possible to substitute a different + * {@link IAccessPath} implementation. * * @param relation + * (optional). * @param indexManager * @param timestamp * @param predicate Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/DefaultSolutionExpander.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/DefaultSolutionExpander.java 2010-09-29 19:55:30 UTC (rev 3685) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/rule/DefaultSolutionExpander.java 2010-09-29 20:59:24 UTC (rev 3686) @@ -1,94 +0,0 @@ -/* - -Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -*/ -/* - * Created on Sep 3, 2008 - */ - -package com.bigdata.relation.rule; - -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IPredicate; -import com.bigdata.relation.accesspath.IAccessPath; -import com.bigdata.relation.rule.eval.IJoinNexus; - -/** - * A base class for {@link ISolutionExpander} implementations. The base class - * provides various helper methods designed to make it easier to override the - * evaluation behavior of an {@link IPredicate} during {@link IRule} evaluation. - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ - */ -public class DefaultSolutionExpander implements ISolutionExpander { - - /** - * - */ - private static final long serialVersionUID = -9174057768088016404L; - - public void expand(IJoinNexus joinNexus, IBindingSet bindingSet, - IPredicate predicate, boolean isSolution) { - - throw new UnsupportedOperationException(); - - } - - /** - * Returns the given {@link IAccessPath}. - */ - public IAccessPath getAccessPath(IAccessPath accessPath) { - - return accessPath; - - } - - /** - * Returns the approximate range count for the given {@link IAccessPath}. - */ - public long rangeCount(IAccessPath accessPath) { - - return accessPath.rangeCount(false/*exact*/); - - } - - /** - * Default to true for backchaining. - */ - public boolean backchain() { - - return true; - - } - - /** - * Default to false for run first. - */ - public boolean runFirst() { - - return false; - - } - -} Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-09-29 19:55:30 UTC (rev 3685) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/LexiconRelation.java 2010-09-29 20:59:24 UTC (rev 3686) @@ -703,7 +703,7 @@ * A factory returning the softly held singleton for the * {@link FullTextIndex}. * - * @see Options#TEXT_INDEX + * @see AbstractTripleStore.Options#TEXT_INDEX * * @todo replace with the use of the {@link IResourceLocator} since it * already imposes a canonicalizing mapping within for the index name Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-09-29 19:55:30 UTC (rev 3685) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-09-29 20:59:24 UTC (rev 3686) @@ -551,6 +551,8 @@ * Boolean option (default <code>true</code>) enables support for a * full text index that may be used to lookup literals by tokens found * in the text of those literals. + * + * @see #TEXT_INDEXER_CLASS */ String TEXT_INDEX = AbstractTripleStore.class.getName() + ".textIndex"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |