This list is closed, nobody may subscribe to it.
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(139) |
Aug
(94) |
Sep
(232) |
Oct
(143) |
Nov
(138) |
Dec
(55) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(127) |
Feb
(90) |
Mar
(101) |
Apr
(74) |
May
(148) |
Jun
(241) |
Jul
(169) |
Aug
(121) |
Sep
(157) |
Oct
(199) |
Nov
(281) |
Dec
(75) |
2012 |
Jan
(107) |
Feb
(122) |
Mar
(184) |
Apr
(73) |
May
(14) |
Jun
(49) |
Jul
(26) |
Aug
(103) |
Sep
(133) |
Oct
(61) |
Nov
(51) |
Dec
(55) |
2013 |
Jan
(59) |
Feb
(72) |
Mar
(99) |
Apr
(62) |
May
(92) |
Jun
(19) |
Jul
(31) |
Aug
(138) |
Sep
(47) |
Oct
(83) |
Nov
(95) |
Dec
(111) |
2014 |
Jan
(125) |
Feb
(60) |
Mar
(119) |
Apr
(136) |
May
(270) |
Jun
(83) |
Jul
(88) |
Aug
(30) |
Sep
(47) |
Oct
(27) |
Nov
(23) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2016 |
Jan
|
Feb
|
Mar
(4) |
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <tho...@us...> - 2011-01-04 00:23:48
|
Revision: 4049 http://bigdata.svn.sourceforge.net/bigdata/?rev=4049&view=rev Author: thompsonbry Date: 2011-01-04 00:23:40 +0000 (Tue, 04 Jan 2011) Log Message: ----------- Removed the push()/pop() support on IBindingSet in order to reduce the data on the heap when we have a bunch of binding sets during query processing. The push()/pop() logic did not work out for the optional join groups, so it is just overhead. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/bindingSet/TestIBindingSet.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/mutation/TestInsert.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -425,56 +425,56 @@ return Collections.unmodifiableMap(map); } - /** - * Lookup the first operator in the specified conditional binding group and - * return its bopId. - * - * @param query - * The query plan. - * @param groupId - * The identifier for the desired conditional binding group. - * - * @return The bopId of the first operator in that conditional binding group - * -or- <code>null</code> if the specified conditional binding group - * does not exist in the query plan. - * - * @throws IllegalArgumentException - * if either argument is <code>null</code>. - * - * @see PipelineOp.Annotations#CONDITIONAL_GROUP - * @see PipelineOp.Annotations#ALT_SINK_GROUP - */ - static public Integer getFirstBOpIdForConditionalGroup(final BOp query, - final Integer groupId) { - if (query == null) - throw new IllegalArgumentException(); - if (groupId == null) - throw new IllegalArgumentException(); - final Iterator<BOp> itr = postOrderIterator(query); - while (itr.hasNext()) { - final BOp t = itr.next(); - final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - if (x != null) { - if (!(x instanceof Integer)) { - throw new BadConditionalGroupIdTypeException( - "Must be Integer, not: " + x.getClass() + ": " - + PipelineOp.Annotations.CONDITIONAL_GROUP); - } - final Integer id = (Integer) t - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - if(id.equals(groupId)) { - /* - * Return the BOpId associated with the first operator in - * the pre-order traversal of the query plan which has the - * specified groupId. - */ - return t.getId(); - } - } - } - // No such groupId in the query plan. - return null; - } +// /** +// * Lookup the first operator in the specified conditional binding group and +// * return its bopId. +// * +// * @param query +// * The query plan. +// * @param groupId +// * The identifier for the desired conditional binding group. +// * +// * @return The bopId of the first operator in that conditional binding group +// * -or- <code>null</code> if the specified conditional binding group +// * does not exist in the query plan. +// * +// * @throws IllegalArgumentException +// * if either argument is <code>null</code>. +// * +// * @see PipelineOp.Annotations#CONDITIONAL_GROUP +// * @see PipelineOp.Annotations#ALT_SINK_GROUP +// */ +// static public Integer getFirstBOpIdForConditionalGroup(final BOp query, +// final Integer groupId) { +// if (query == null) +// throw new IllegalArgumentException(); +// if (groupId == null) +// throw new IllegalArgumentException(); +// final Iterator<BOp> itr = postOrderIterator(query); +// while (itr.hasNext()) { +// final BOp t = itr.next(); +// final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// if (x != null) { +// if (!(x instanceof Integer)) { +// throw new BadConditionalGroupIdTypeException( +// "Must be Integer, not: " + x.getClass() + ": " +// + PipelineOp.Annotations.CONDITIONAL_GROUP); +// } +// final Integer id = (Integer) t +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// if(id.equals(groupId)) { +// /* +// * Return the BOpId associated with the first operator in +// * the pre-order traversal of the query plan which has the +// * specified groupId. +// */ +// return t.getId(); +// } +// } +// } +// // No such groupId in the query plan. +// return null; +// } /** * Return the parent of the operator in the operator tree (this does not Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/IBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -157,38 +157,38 @@ */ public int hashCode(); - /** - * Make a copy of the current symbol table (aka current variable bindings) - * and push it onto onto the stack. Variable bindings will be made against - * the current symbol table. The symbol table stack is propagated by - * {@link #clone()} and {@link #copy(IVariable[])}. Symbols tables may be - * used to propagate conditional bindings through a data flow until a - * decision point is reached, at which point they may be either discarded or - * committed. This mechanism may be used to support SPARQL style optional - * join groups. - * - * @throws UnsupportedOperationException - * if the {@link IBindingSet} is not mutable. - * - * @see #pop(boolean) - */ - public void push(); +// /** +// * Make a copy of the current symbol table (aka current variable bindings) +// * and push it onto onto the stack. Variable bindings will be made against +// * the current symbol table. The symbol table stack is propagated by +// * {@link #clone()} and {@link #copy(IVariable[])}. Symbols tables may be +// * used to propagate conditional bindings through a data flow until a +// * decision point is reached, at which point they may be either discarded or +// * committed. This mechanism may be used to support SPARQL style optional +// * join groups. +// * +// * @throws UnsupportedOperationException +// * if the {@link IBindingSet} is not mutable. +// * +// * @see #pop(boolean) +// */ +// public void push(); +// +// /** +// * Pop the current symbol table off of the stack. +// * +// * @param save +// * When <code>true</code>, the bindings on the current symbol +// * table are copied to the parent symbol table before the current +// * symbol table is popped off of the stack. If <code>false</code> +// * , any bindings associated with that symbol table are +// * discarded. +// * +// * @throws IllegalStateException +// * if there is no nested symbol table. +// * +// * @see #push() +// */ +// public void pop(boolean save); - /** - * Pop the current symbol table off of the stack. - * - * @param save - * When <code>true</code>, the bindings on the current symbol - * table are copied to the parent symbol table before the current - * symbol table is popped off of the stack. If <code>false</code> - * , any bindings associated with that symbol table are - * discarded. - * - * @throws IllegalStateException - * if there is no nested symbol table. - * - * @see #push() - */ - public void pop(boolean save); - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -33,7 +33,6 @@ import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.solutions.SliceOp; /** * Abstract base class for pipeline operators where the data moving along the @@ -106,70 +105,70 @@ */ String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - /** - * Annotation used to mark a set of operators belonging to a conditional - * binding group. Bindings within with the group will be discarded if - * any required operator in the group fails. For example, if a binding - * set exits via the alternative sink for a required join then any - * conditional bindings within the group will be discarded. - * <p> - * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} - * annotation provides the information necessary in order to decide the - * re-entry point in the query plan when a join within an conditional - * binding group fails. - * <p> - * The {@link #CONDITIONAL_GROUP} annotation controls the - * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of - * individual solutions as they propagate through the pipeline. When a - * pipeline starts, the {@link IBindingSet} stack contains only the top - * level symbol table (i.e., name/value bindings). When an intermediate - * solution enters a {@link PipelineOp} marked as belonging to a - * {@link #CONDITIONAL_GROUP}, a new symbol table is - * {@link IBindingSet#push() pushed} onto the stack for that solution. - * If the solution leaves the optional join group via the default sink, - * then the symbol table is "saved" when it is - * {@link IBindingSet#pop(boolean) popped} off of the stack. If the - * solution leaves the join group via the alternative sink, then the - * symbol table is discarded when it is {@link IBindingSet#pop(boolean) - * popped} off of the stack. This provides for conditional binding of - * variables within the operators of the group. - * <p> - * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} - * which uniquely identifies the group within the query. - * - * @deprecated The binding set stack push/pop mechanisms are not - * sufficient to support optional join groups. This - * annotation will be removed unless it proves valuable for - * marking the elements of a join group, in which case the - * javadoc needs to be updated. - */ - String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; +// /** +// * Annotation used to mark a set of operators belonging to a conditional +// * binding group. Bindings within with the group will be discarded if +// * any required operator in the group fails. For example, if a binding +// * set exits via the alternative sink for a required join then any +// * conditional bindings within the group will be discarded. +// * <p> +// * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} +// * annotation provides the information necessary in order to decide the +// * re-entry point in the query plan when a join within an conditional +// * binding group fails. +// * <p> +// * The {@link #CONDITIONAL_GROUP} annotation controls the +// * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of +// * individual solutions as they propagate through the pipeline. When a +// * pipeline starts, the {@link IBindingSet} stack contains only the top +// * level symbol table (i.e., name/value bindings). When an intermediate +// * solution enters a {@link PipelineOp} marked as belonging to a +// * {@link #CONDITIONAL_GROUP}, a new symbol table is +// * {@link IBindingSet#push() pushed} onto the stack for that solution. +// * If the solution leaves the optional join group via the default sink, +// * then the symbol table is "saved" when it is +// * {@link IBindingSet#pop(boolean) popped} off of the stack. If the +// * solution leaves the join group via the alternative sink, then the +// * symbol table is discarded when it is {@link IBindingSet#pop(boolean) +// * popped} off of the stack. This provides for conditional binding of +// * variables within the operators of the group. +// * <p> +// * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} +// * which uniquely identifies the group within the query. +// * +// * @deprecated The binding set stack push/pop mechanisms are not +// * sufficient to support optional join groups. This +// * annotation will be removed unless it proves valuable for +// * marking the elements of a join group, in which case the +// * javadoc needs to be updated. +// */ +// String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - /** - * Annotation used to designate the target when a required operator - * within an {@link #CONDITIONAL_GROUP} fails. The value of this - * annotation must be the {@link #CONDITIONAL_GROUP} identifier - * corresponding to the next conditional binding group in the query - * plan. If there is no such group, then the {@link #ALT_SINK_REF} - * should be used instead to specify the target operator in the - * pipeline, e.g., a {@link SliceOp}. - * <p> - * The target {@link #CONDITIONAL_GROUP} is specified (rather than the - * bopId of the target join) since the non-optional joins in the target - * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The - * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} - * is therefore the first operator in the target - * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from - * the join ordering decisions. - * - * @see #CONDITIONAL_GROUP - * @see #ALT_SINK_REF - * - * @deprecated The binding set stack push/pop mechanisms are not - * sufficient to support optional join groups. This - * annotation will be removed. - */ - String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; +// /** +// * Annotation used to designate the target when a required operator +// * within an {@link #CONDITIONAL_GROUP} fails. The value of this +// * annotation must be the {@link #CONDITIONAL_GROUP} identifier +// * corresponding to the next conditional binding group in the query +// * plan. If there is no such group, then the {@link #ALT_SINK_REF} +// * should be used instead to specify the target operator in the +// * pipeline, e.g., a {@link SliceOp}. +// * <p> +// * The target {@link #CONDITIONAL_GROUP} is specified (rather than the +// * bopId of the target join) since the non-optional joins in the target +// * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The +// * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} +// * is therefore the first operator in the target +// * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from +// * the join ordering decisions. +// * +// * @see #CONDITIONAL_GROUP +// * @see #ALT_SINK_REF +// * +// * @deprecated The binding set stack push/pop mechanisms are not +// * sufficient to support optional join groups. This +// * annotation will be removed. +// */ +// String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ArrayBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -33,7 +33,6 @@ import java.util.Collections; import java.util.Iterator; import java.util.Map; -import java.util.Stack; import java.util.Map.Entry; import com.bigdata.bop.IBindingSet; @@ -206,60 +205,62 @@ * pushed onto the stack or popped off of the stack, but the stack MAY NOT * become empty. */ - private final Stack<ST> stack; +// private final Stack<ST> stack; + private final ST current; /** * Return the symbol table on the top of the stack. */ private ST current() { - return stack.peek(); + return current; +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final ST cur = current(); +// +// // Create a new symbol table. +// final ST tmp = new ST(cur.nbound, cur.vars.clone(), cur.vals.clone()); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// } +// +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final ST old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // The current symbol table. - final ST cur = current(); - - // Create a new symbol table. - final ST tmp = new ST(cur.nbound, cur.vars.clone(), cur.vals.clone()); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final ST old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * Copy constructor (used by clone, copy). * @@ -272,30 +273,32 @@ protected ArrayBindingSet(final ArrayBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (ST srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final ST tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); - - int depth = 1; + current = copy(src.current, variablesToKeep); - for (ST srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final ST tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** @@ -458,9 +461,11 @@ if(vars.length != vals.length) throw new IllegalArgumentException(); - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// stack.push(new ST(vars.length, vars, vals)); - stack.push(new ST(vars.length, vars, vals)); + current = new ST(vars.length, vars, vals); } @@ -478,11 +483,14 @@ if (capacity < 0) throw new IllegalArgumentException(); - stack = new Stack<ST>(); +// stack = new Stack<ST>(); +// +// stack.push(new ST(0/* nbound */, new IVariable[capacity], +// new IConstant[capacity])); - stack.push(new ST(0/* nbound */, new IVariable[capacity], - new IConstant[capacity])); - + current = new ST(0/* nbound */, new IVariable[capacity], + new IConstant[capacity]); + } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/EmptyBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -161,12 +161,12 @@ } - public void push() { - throw new IllegalStateException(); - } - - public void pop(boolean save) { - throw new UnsupportedOperationException(); - } +// public void push() { +// throw new IllegalStateException(); +// } +// +// public void pop(boolean save) { +// throw new UnsupportedOperationException(); +// } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/HashBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -32,7 +32,6 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; -import java.util.Stack; import java.util.Map.Entry; import com.bigdata.bop.IBindingSet; @@ -60,92 +59,98 @@ // */ // private final LinkedHashMap<IVariable, IConstant> map; - /** - * The stack of symbol tables. Each symbol table is a mapping from an - * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. - * The stack is initialized with an empty symbol table. Symbol tables may be - * pushed onto the stack or popped off of the stack, but the stack MAY NOT - * become empty. - */ - private final Stack<LinkedHashMap<IVariable, IConstant>> stack; +// /** +// * The stack of symbol tables. Each symbol table is a mapping from an +// * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. +// * The stack is initialized with an empty symbol table. Symbol tables may be +// * pushed onto the stack or popped off of the stack, but the stack MAY NOT +// * become empty. +// */ +// private final Stack<LinkedHashMap<IVariable, IConstant>> stack; + final private LinkedHashMap<IVariable,IConstant> current; + /** * Return the symbol table on the top of the stack. */ private LinkedHashMap<IVariable, IConstant> current() { - return stack.peek(); + return current; + +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final LinkedHashMap<IVariable, IConstant> cur = current(); +// +// // Create a new symbol table. +// final LinkedHashMap<IVariable, IConstant> tmp = new LinkedHashMap<IVariable, IConstant>( +// cur.size()); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// /* +// * Make a copy of each entry in the symbol table which was on the top of +// * the stack when we entered this method, inserting the entries into the +// * new symbol table as we go. This avoids side effects of mutation on +// * the nested symbol tables and also ensures that we do not need to read +// * through to the nested symbol tables when answering a query about the +// * current symbol table. The only down side of this is that naive +// * serialization is that much less compact. +// */ +// for (Map.Entry<IVariable, IConstant> e : cur.entrySet()) { +// +// tmp.put(e.getKey(), e.getValue()); +// +// } +// +// } - // The current symbol table. - final LinkedHashMap<IVariable, IConstant> cur = current(); +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final LinkedHashMap<IVariable,IConstant> old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // Create a new symbol table. - final LinkedHashMap<IVariable, IConstant> tmp = new LinkedHashMap<IVariable, IConstant>( - cur.size()); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - /* - * Make a copy of each entry in the symbol table which was on the top of - * the stack when we entered this method, inserting the entries into the - * new symbol table as we go. This avoids side effects of mutation on - * the nested symbol tables and also ensures that we do not need to read - * through to the nested symbol tables when answering a query about the - * current symbol table. The only down side of this is that naive - * serialization is that much less compact. - */ - for (Map.Entry<IVariable, IConstant> e : cur.entrySet()) { - - tmp.put(e.getKey(), e.getValue()); - - } - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final LinkedHashMap<IVariable,IConstant> old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * New empty binding set. */ public HashBindingSet() { - stack = new Stack<LinkedHashMap<IVariable, IConstant>>(); - - stack.push(new LinkedHashMap<IVariable, IConstant>()); +// stack = new Stack<LinkedHashMap<IVariable, IConstant>>(); +// +// stack.push(new LinkedHashMap<IVariable, IConstant>()); + current = new LinkedHashMap<IVariable, IConstant>(); + } /** @@ -155,30 +160,32 @@ */ protected HashBindingSet(final HashBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<LinkedHashMap<IVariable,IConstant>>(); +// stack = new Stack<LinkedHashMap<IVariable,IConstant>>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (LinkedHashMap<IVariable, IConstant> srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final LinkedHashMap<IVariable,IConstant> tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); + current = copy(src.current, variablesToKeep); - int depth = 1; - - for (LinkedHashMap<IVariable, IConstant> srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final LinkedHashMap<IVariable,IConstant> tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/bindingSet/ListBindingSet.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -6,7 +6,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Stack; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; @@ -82,90 +81,94 @@ } }; - /** - * The stack of symbol tables. Each symbol table is a mapping from an - * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. - * The stack is initialized with an empty symbol table. Symbol tables may be - * pushed onto the stack or popped off of the stack, but the stack MAY NOT - * become empty. - */ - private final Stack<List<E>> stack; +// /** +// * The stack of symbol tables. Each symbol table is a mapping from an +// * {@link IVariable} onto its non-<code>null</code> bound {@link IConstant}. +// * The stack is initialized with an empty symbol table. Symbol tables may be +// * pushed onto the stack or popped off of the stack, but the stack MAY NOT +// * become empty. +// */ +// private final Stack<List<E>> stack; + private final List<E> current; /** * Return the symbol table on the top of the stack. */ - private List<E> current() { + final private List<E> current() { - return stack.peek(); + return current; +// return stack.peek(); } - public void push() { +// public void push() { +// +// // The current symbol table. +// final List<E> cur = current(); +// +// // Create a new symbol table. +// final List<E> tmp = new LinkedList<E>(); +// +// // Push the new symbol table onto the stack. +// stack.push(tmp); +// +// /* +// * Make a copy of each entry in the symbol table which was on the top of +// * the stack when we entered this method, inserting the entries into the +// * new symbol table as we go. This avoids side effects of mutation on +// * the nested symbol tables and also ensures that we do not need to read +// * through to the nested symbol tables when answering a query about the +// * current symbol table. The only down side of this is that naive +// * serialization is that much less compact. +// */ +// for (E e : cur) { +// +// tmp.add(new E(e.var, e.val)); +// +// } +// +// } +// +// public void pop(final boolean save) { +// +// if (stack.size() < 2) { +// /* +// * The stack may never become empty. Therefore there must be at +// * least two symbol tables on the stack for a pop() request. +// */ +// throw new IllegalArgumentException(); +// } +// +// // Pop the symbol table off of the top of the stack. +// final List<E> old = stack.pop(); +// +// if (save) { +// +// // discard the current symbol table. +// stack.pop(); +// +// // replacing it with the symbol table which we popped off the stack. +// stack.push(old); +// +// } else { +// +// // clear the hash code. +// hash = 0; +// +// } +// +// } - // The current symbol table. - final List<E> cur = current(); - - // Create a new symbol table. - final List<E> tmp = new LinkedList<E>(); - - // Push the new symbol table onto the stack. - stack.push(tmp); - - /* - * Make a copy of each entry in the symbol table which was on the top of - * the stack when we entered this method, inserting the entries into the - * new symbol table as we go. This avoids side effects of mutation on - * the nested symbol tables and also ensures that we do not need to read - * through to the nested symbol tables when answering a query about the - * current symbol table. The only down side of this is that naive - * serialization is that much less compact. - */ - for (E e : cur) { - - tmp.add(new E(e.var, e.val)); - - } - - } - - public void pop(final boolean save) { - - if (stack.size() < 2) { - /* - * The stack may never become empty. Therefore there must be at - * least two symbol tables on the stack for a pop() request. - */ - throw new IllegalArgumentException(); - } - - // Pop the symbol table off of the top of the stack. - final List<E> old = stack.pop(); - - if (save) { - - // discard the current symbol table. - stack.pop(); - - // replacing it with the symbol table which we popped off the stack. - stack.push(old); - - } else { - - // clear the hash code. - hash = 0; - - } - - } - /** * Create an empty binding set. */ public ListBindingSet() { - stack = new Stack<List<E>>(); - - stack.push(new LinkedList<E>()); +// stack = new Stack<List<E>>(); +// +// stack.push(new LinkedList<E>()); + + current = new LinkedList<E>(); } @@ -207,30 +210,32 @@ protected ListBindingSet(final ListBindingSet src, final IVariable[] variablesToKeep) { - stack = new Stack<List<E>>(); +// stack = new Stack<List<E>>(); +// +// final int stackSize = src.stack.size(); +// +// int depth = 1; +// +// for (List<E> srcLst : src.stack) { +// +// /* +// * Copy the source bindings. +// * +// * Note: If a restriction exists on the variables to be copied, then +// * it is applied onto the the top level of the stack. If the symbol +// * table is saved when it is pop()'d, then the modified bindings +// * will replace the parent symbol table on the stack. +// */ +// final List<E> tmp = copy(srcLst, +// depth == stackSize ? variablesToKeep : null); +// +// // Push onto the stack. +// stack.push(tmp); +// +// } - final int stackSize = src.stack.size(); + current = copy(src.current, variablesToKeep); - int depth = 1; - - for (List<E> srcLst : src.stack) { - - /* - * Copy the source bindings. - * - * Note: If a restriction exists on the variables to be copied, then - * it is applied onto the the top level of the stack. If the symbol - * table is saved when it is pop()'d, then the modified bindings - * will replace the parent symbol table on the stack. - */ - final List<E> tmp = copy(srcLst, - depth == stackSize ? variablesToKeep : null); - - // Push onto the stack. - stack.push(tmp); - - } - } /** Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -1070,30 +1070,33 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - { - // altSink (null when not specified). - final Integer altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); - final Integer altSinkGroup = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkId != null && altSinkGroup != null) - throw new RuntimeException( - "Annotations are mutually exclusive: " - + PipelineOp.Annotations.ALT_SINK_REF - + " and " - + PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkGroup != null) { - /* - * Lookup the first pipeline op in the conditional binding - * group and use its bopId as the altSinkId. - */ - this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( - getQuery(), altSinkGroup); - } else { - // MAY be null. - this.altSinkId = altSinkId; - } - } + // altSink (null when not specified). + altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// { +// // altSink (null when not specified). +// final Integer altSinkId = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// final Integer altSinkGroup = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkId != null && altSinkGroup != null) +// throw new RuntimeException( +// "Annotations are mutually exclusive: " +// + PipelineOp.Annotations.ALT_SINK_REF +// + " and " +// + PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkGroup != null) { +// /* +// * Lookup the first pipeline op in the conditional binding +// * group and use its bopId as the altSinkId. +// */ +// this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( +// getQuery(), altSinkGroup); +// } else { +// // MAY be null. +// this.altSinkId = altSinkId; +// } +// } if (altSinkId != null && !getBOpIndex().containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -1131,18 +1134,19 @@ } assert stats != null; - // The groupId (if any) for this operator. - final Integer fromGroupId = (Integer) op - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// // The groupId (if any) for this operator. +// final Integer fromGroupId = (Integer) op +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); if (p == null) { sink = getQueryBuffer(); } else { - final BOp targetOp = getBOpIndex().get(sinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - sink = newBuffer(op, sinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, true/* isSink */), +// final BOp targetOp = getBOpIndex().get(sinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, true/* isSink */); + sink = newBuffer(op, sinkId, //null/*stm*/, sinkMessagesOut, stats); } @@ -1156,11 +1160,12 @@ */ // altSink = sink; } else { - final BOp targetOp = getBOpIndex().get(altSinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, false/* isSink */), +// final BOp targetOp = getBOpIndex().get(altSinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, false/* isSink */); + altSink = newBuffer(op, altSinkId, //null/* stm */, altSinkMessagesOut, stats); } @@ -1192,7 +1197,7 @@ */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, final int sinkId, - final SinkTransitionMetadata sinkTransitionMetadata, +// final SinkTransitionMetadata sinkTransitionMetadata, final AtomicInteger sinkMessagesOut, final BOpStats stats) { // final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null @@ -1218,10 +1223,14 @@ // .getChunkTimeout(), // BufferAnnotations.chunkTimeoutUnit); - return new SinkTransitionBuffer(new HandleChunkBuffer( + return +// new SinkTransitionBuffer( + new HandleChunkBuffer( ChunkedRunningQuery.this, bopId, sinkId, op - .getChunkCapacity(), sinkMessagesOut, stats), - sinkTransitionMetadata); + .getChunkCapacity(), sinkMessagesOut, stats) +// , +// sinkTransitionMetadata) + ; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -52,7 +52,7 @@ import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.fed.QueryEngineFactory; import com.bigdata.btree.BTree; import com.bigdata.btree.IndexSegment; @@ -798,7 +798,7 @@ return eval(queryId, (PipelineOp) op, new LocalChunkMessage<IBindingSet>(this/* queryEngine */, queryId, startId, -1 /* partitionId */, - newBindingSetIterator(new HashBindingSet()))); + newBindingSetIterator(new ListBindingSet()))); } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -34,6 +34,8 @@ /** * Delegation pattern handles the {@link SinkTransitionMetadata}. + * + * @deprecated along with {@link SinkTransitionMetadata} */ class SinkTransitionBuffer implements IBlockingBuffer<IBindingSet[]> { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -53,10 +53,10 @@ * * @todo Unit tests of this class in isolation. * - * @todo It appears that this design can not be made to satisfy SPARQL optional - * group semantics. Therefore, we may be able to drop this class, support - * for it in the {@link ChunkedRunningQuery} and support for the symbol - * table stack in {@link IBindingSet}. + * @deprecated It appears that this design can not be made to satisfy SPARQL + * optional group semantics. Therefore, we may be able to drop this + * class, support for it in the {@link ChunkedRunningQuery} and + * support for the symbol table stack in {@link IBindingSet}. */ class SinkTransitionMetadata { @@ -91,24 +91,25 @@ * The binding set. */ public void handleBindingSet(final IBindingSet bset) { - if (fromGroupId == null) { - if (toGroupId == null) - return; - // Transition from no group to some group. - bset.push(); - return; - } else { - if (toGroupId == null) - // Transition from a group to no group. - bset.pop(isSink/* save */); - else if (toGroupId.equals(fromGroupId)) { - // NOP (transition to the same group) - } else { - // Transition to a different group. - bset.pop(isSink/* save */); - bset.push(); - } - } +// if (fromGroupId == null) { +// if (toGroupId == null) +// return; +// // Transition from no group to some group. +// bset.push(); +// return; +// } else { +// if (toGroupId == null) +// // Transition from a group to no group. +// bset.pop(isSink/* save */); +// else if (toGroupId.equals(fromGroupId)) { +// // NOP (transition to the same group) +// } else { +// // Transition to a different group. +// bset.pop(isSink/* save */); +// bset.push(); +// } +// } + throw new UnsupportedOperationException(); } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -622,30 +622,33 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - { - // altSink (null when not specified). - final Integer altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); - final Integer altSinkGroup = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkId != null && altSinkGroup != null) - throw new RuntimeException( - "Annotations are mutually exclusive: " - + PipelineOp.Annotations.ALT_SINK_REF - + " and " - + PipelineOp.Annotations.ALT_SINK_GROUP); - if (altSinkGroup != null) { - /* - * Lookup the first pipeline op in the conditional binding - * group and use its bopId as the altSinkId. - */ - this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( - getQuery(), altSinkGroup); - } else { - // MAY be null. - this.altSinkId = altSinkId; - } - } + // altSink (null when not specified). + altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// { +// // altSink (null when not specified). +// final Integer altSinkId = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_REF); +// final Integer altSinkGroup = (Integer) op +// .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkId != null && altSinkGroup != null) +// throw new RuntimeException( +// "Annotations are mutually exclusive: " +// + PipelineOp.Annotations.ALT_SINK_REF +// + " and " +// + PipelineOp.Annotations.ALT_SINK_GROUP); +// if (altSinkGroup != null) { +// /* +// * Lookup the first pipeline op in the conditional binding +// * group and use its bopId as the altSinkId. +// */ +// this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( +// getQuery(), altSinkGroup); +// } else { +// // MAY be null. +// this.altSinkId = altSinkId; +// } +// } if (altSinkId != null && !getBOpIndex().containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -683,19 +686,20 @@ } assert stats != null; - // The groupId (if any) for this operator. - final Integer fromGroupId = (Integer) op - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// // The groupId (if any) for this operator. +// final Integer fromGroupId = (Integer) op +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); if (p == null) { sink = getQueryBuffer(); } else { - final BOp targetOp = getBOpIndex().get(sinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - sink = newBuffer(op, sinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, true/* isSink */), - /*sinkMessagesOut,*/ stats); +// final BOp targetOp = getBOpIndex().get(sinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, true/* isSink */); + sink = newBuffer(op, sinkId, //null/* stm */, + /* sinkMessagesOut, */stats); } if (altSinkId == null) { @@ -708,11 +712,12 @@ */ // altSink = sink; } else { - final BOp targetOp = getBOpIndex().get(altSinkId); - final Integer toGroupId = (Integer) targetOp - .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( - fromGroupId, toGroupId, false/* isSink */), +// final BOp targetOp = getBOpIndex().get(altSinkId); +// final Integer toGroupId = (Integer) targetOp +// .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); +// final SinkTransitionMetadata stm = new SinkTransitionMetadata( +// fromGroupId, toGroupId, false/* isSink */); + altSink = newBuffer(op, altSinkId, //null/*stm*/, /*altSinkMessagesOut,*/ stats); } @@ -744,7 +749,7 @@ */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, final int sinkId, - final SinkTransitionMetadata sinkTransitionMetadata, +// final SinkTransitionMetadata sinkTransitionMetadata, /* final AtomicInteger sinkMessagesOut, */final BOpStats stats) { final MultiplexBlockingBuffer<IBindingSet[]> factory = operatorQueues @@ -760,9 +765,11 @@ * operator writing on this sink. Wrap that to handle the sink * transition metadata. */ - return new SinkTransitionBuffer( + return //new SinkTransitionBuffer( new OutputStatsBuffer<IBindingSet[]>(factory.newInstance(), - stats), sinkTransitionMetadata); + stats) + //, sinkTransitionMetadata) + ; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-01-03 18:25:34 UTC (rev 4048) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2011-01-04 00:23:40 UTC (rev 4049) @@ -586,119 +586,119 @@ } - /** - * A conditional join group: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * } - * </pre> - * - * where the groupId for the optional join group is ONE (1). The test should - * locate the first {@link PipelineJoin} in that join group, which is the - * one reading on the <code>(b c)</code> access path. - */ - public void test_getFirstBOpIdForConditionalGroup() { - - final String namespace = "kb"; - - final int startId = 1; // - final int joinId1 = 2; // : base join group. - final int predId1 = 3; // (a b) - final int joinId2 = 4; // : joinGroup1 - final int predId2 = 5; // (b c) - final int joinId3 = 6; // : joinGroup1 - final int predId3 = 7; // (c d) - final int sliceId = 8; // - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. - new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = ... [truncated message content] |
From: <tho...@us...> - 2011-01-03 18:25:42
|
Revision: 4048 http://bigdata.svn.sourceforge.net/bigdata/?rev=4048&view=rev Author: thompsonbry Date: 2011-01-03 18:25:34 +0000 (Mon, 03 Jan 2011) Log Message: ----------- Added try/finally to ensure that the subquery is cancelled if the query is interrupted. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -51,13 +51,8 @@ * solutions produced by the subquery are copied to the default sink. If no * solutions are produced, then the original binding set is copied to the * default sink (optional join semantics). Each subquery is run as a separate - * query but is linked to the parent query in the operator is being evaluated. + * query but will be cancelled if the parent query is cancelled. * - * FIXME Is this true?: "This operator must on the query controller." For an - * optional join group in scale-out, we need to concentrate the solutions back - * to the controller if this is true. If it is not a requirement, then we can - * just issue the subquery from ANY node. - * * FIXME Parallel evaluation of subqueries is not implemented. What is the * appropriate parallelism for this operator? More parallelism should reduce * latency but could increase the memory burden. Review this decision once we @@ -241,22 +236,33 @@ final IAsynchronousIterator<IBindingSet[]> sitr = context .getSource(); - // @todo test for interrupt/halted query? while(sitr.hasNext()) { final IBindingSet[] chunk = sitr.next(); for(IBindingSet bset : chunk) { - final FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( + FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( new SubqueryTask(bset, subquery, context)); // run the subquery. executor.execute(ft); + + try { + + // wait for the outcome. + ft.get(); + + } finally { + + /* + * Ensure that the inner task is cancelled if the + * outer task is interrupted. + */ + ft.cancel(true/* mayInterruptIfRunning */); + + } - // wait for the outcome. - ft.get(); - } } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -28,8 +28,6 @@ import junit.framework.TestCase; import junit.framework.TestSuite; -import com.bigdata.bop.controller.JoinGraph; -import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.eval.DefaultEvaluationPlan2; /** @@ -60,11 +58,9 @@ * Returns a test that will run each of the implementation specific test * suites in turn. * - * @todo Test the static optimization approach based on - * {@link DefaultEvaluationPlan2}, which will have to be reworked to - * remove its dependencies on the {@link IRule} model. - * - * @todo Test runtime optimization based on {@link JoinGraph}s. + * @todo If the static optimization is reworked as a controller operator + * based on {@link DefaultEvaluationPlan2}, then add a test suite for + * that operator here. */ public static Test suite() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestAll.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -75,9 +75,6 @@ // stress test for SliceOp. suite.addTestSuite(TestQueryEngine_Slice.class); - // test suite for optional join groups. - suite.addTestSuite(TestQueryEngineOptionalJoins.class); - // @todo test suite for query evaluation (DISTINCT, ORDER BY, GROUP BY). // suite.addTestSuite(TestQueryEngine2.class); Deleted: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2011-01-03 14:41:36 UTC (rev 4047) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2011-01-03 18:25:34 UTC (rev 4048) @@ -1,1077 +0,0 @@ -/** - -Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. - -Contact: - SYSTAP, LLC - 4501 Tower Road - Greensboro, NC 27410 - lic...@bi... - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -/* - * Created on Aug 23, 2010 - */ - -package com.bigdata.bop.engine; - -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; - -import junit.framework.TestCase2; - -import com.bigdata.bop.BOp; -import com.bigdata.bop.BOpEvaluationContext; -import com.bigdata.bop.Constant; -import com.bigdata.bop.IBindingSet; -import com.bigdata.bop.IConstant; -import com.bigdata.bop.IConstraint; -import com.bigdata.bop.IVariable; -import com.bigdata.bop.IVariableOrConstant; -import com.bigdata.bop.NV; -import com.bigdata.bop.PipelineOp; -import com.bigdata.bop.Var; -import com.bigdata.bop.IPredicate.Annotations; -import com.bigdata.bop.ap.E; -import com.bigdata.bop.ap.Predicate; -import com.bigdata.bop.ap.R; -import com.bigdata.bop.bindingSet.ArrayBindingSet; -import com.bigdata.bop.bindingSet.HashBindingSet; -import com.bigdata.bop.bset.ConditionalRoutingOp; -import com.bigdata.bop.bset.StartOp; -import com.bigdata.bop.constraint.EQConstant; -import com.bigdata.bop.constraint.NEConstant; -import com.bigdata.bop.join.PipelineJoin; -import com.bigdata.bop.solutions.SliceOp; -import com.bigdata.journal.BufferMode; -import com.bigdata.journal.ITx; -import com.bigdata.journal.Journal; -import com.bigdata.relation.accesspath.IAsynchronousIterator; -import com.bigdata.relation.accesspath.ThickAsynchronousIterator; -import com.bigdata.striterator.ChunkedArrayIterator; -import com.bigdata.striterator.Dechunkerator; -import com.bigdata.striterator.ICloseableIterator; - -/** - * Test suite for handling of optional join groups during query evaluation - * against a local database instance. Optional join groups are handled using - * {@link IBindingSet#push()} when entering the join group and - * {@link IBindingSet#pop(boolean)} when exiting the join group. If the join - * group was successful for a given binding set, then <code>save:=true</code> is - * specified for {@link IBindingSet#pop(boolean)} and the applied bindings will - * be visible to the downstream consumer. Otherwise the bindings applied during - * the join group are simply discarded. - * - * <pre> - * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties - * </pre> - * - * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id: TestQueryEngine.java 3950 2010-11-17 02:14:08Z thompsonbry $ - * - * @deprecated This test suite has been moved to the com.bigdata.bop.engine - * package and should be removed from this package. - */ -public class TestQueryEngineOptionalJoins extends TestCase2 { - - /** - * - */ - public TestQueryEngineOptionalJoins() { - } - - /** - * @param name - */ - public TestQueryEngineOptionalJoins(String name) { - super(name); - } - - @Override - public Properties getProperties() { - - final Properties p = new Properties(super.getProperties()); - - p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient - .toString()); - - return p; - - } - - static private final String namespace = "ns"; - Journal jnl; - QueryEngine queryEngine; - - public void setUp() throws Exception { - - jnl = new Journal(getProperties()); - - loadData(jnl); - - queryEngine = new QueryEngine(jnl); - - queryEngine.init(); - - } - - /** - * Create and populate relation in the {@link #namespace}. - */ - private void loadData(final Journal store) { - - // create the relation. - final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); - rel.create(); - - // data to insert (in key order for convenience). - final E[] a = {// - new E("Paul", "Mary"),// [0] - new E("Paul", "Brad"),// [1] - - new E("John", "Mary"),// [2] - new E("John", "Brad"),// [3] - - new E("Mary", "Brad"),// [4] - - new E("Brad", "Fred"),// [5] - new E("Brad", "Leon"),// [6] - }; - - // insert data (the records are not pre-sorted). - rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); - - // Do commit since not scale-out. - store.commit(); - - } - - public void tearDown() throws Exception { - - if (queryEngine != null) { - queryEngine.shutdownNow(); - queryEngine = null; - } - - if (jnl != null) { - jnl.destroy(); - jnl = null; - } - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, - * empty {@link IBindingSet}. - * - * @param bindingSet - * the binding set. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet bindingSet) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { new IBindingSet[] { bindingSet } }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSets - * the binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[] bindingSets) { - - return new ThickAsynchronousIterator<IBindingSet[]>( - new IBindingSet[][] { bindingSets }); - - } - - /** - * Return an {@link IAsynchronousIterator} that will read a single, chunk - * containing all of the specified {@link IBindingSet}s. - * - * @param bindingSetChunks - * the chunks of binding sets. - */ - protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( - final IBindingSet[][] bindingSetChunks) { - - return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); - - } - - /** - * Unit test for optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. - * Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. - * - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be four solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (paul mary brad leon) - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In this cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - */ - public void test_query_join2_optionals() throws Exception { - - final int startId = 1; // - final int joinId1 = 2; // : base join group. - final int predId1 = 3; // (a b) - final int joinId2 = 4; // : joinGroup1 - final int predId2 = 5; // (b c) - final int joinId3 = 6; // : joinGroup1 - final int predId3 = 7; // (c d) - final int sliceId = 8; // - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // four solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - /* - * junit.framework.AssertionFailedError: Iterator will deliver too - * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad - * }, { a=Paul, b=Brad }]. - */ - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(5, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter. Three joins are used and - * target a {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group - * contains a filter. - * <p> - * The optional join group takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(d != Leon) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (paul mary brad fred) - * (john mary brad fred) - * </pre> - * - * and five more that don't succeed the optional join group: - * - * <pre> - * (paul brad) * - * (john brad) * - * (mary brad) * - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?c will become temporarily - * bound to fred and leon (since brad knows fred and leon), but the (c d) - * tail will fail since fred and leon don't know anyone else. At this point, - * the ?c binding must be removed from the solution. - * <p> - * The filter (d != Leon) will prune the two solutions: - * - * <pre> - * (paul mary brad leon) - * (john mary brad leon) - * </pre> - * - * since ?d is bound to Leon in those cases. - */ - public void test_query_optionals_filter() throws Exception { - - final int startId = 1; - final int joinId1 = 2; // - final int predId1 = 3; // (a,b) - final int joinId2 = 4; // : group1 - final int predId2 = 5; // (b,c) - final int joinId3 = 6; // : group1 - final int predId3 = 7; // (c,d) - final int sliceId = 8; - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // constraint d != Leon - new NV(PipelineJoin.Annotations.CONSTRAINTS, - new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }), - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(5, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Unit test for optional join group with a filter on a variable outside the - * optional join group. Three joins are used and target a {@link SliceOp}. - * The 2nd and 3rd joins are an optional join group. Intermediate results - * which do not succeed on the optional join are forwarded to the - * {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group - * contains a filter that uses a variable outside the optional join group. - * <P> - * The query takes the form: - * - * <pre> - * (a b) - * optional { - * (b c) - * (c d) - * filter(a != Paul) - * } - * </pre> - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions that - * succeed the optional join group: - * - * <pre> - * (john mary brad fred) - * (john mary brad leon) - * </pre> - * - * and six more that don't succeed the optional join group: - * - * <pre> - * (paul mary) * - * (paul brad) * - * (john brad) - * (mary brad) - * (brad fred) - * (brad leon) - * </pre> - * - * In the cases marked with a <code>*</code>, ?a is bound to Paul even - * though there is a filter that specifically prohibits a = Paul. This is - * because the filter is inside the optional join group, which means that - * solutions can still include a = Paul, but the optional join group should - * not run in that case. - */ - public void test_query_optionals_filter2() throws Exception { - - final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; // (a,b) - final int condId = 4; // (a != Paul) - final int joinId2 = 5; // : group1 - final int predId2 = 6; // (b,c) - final int joinId3 = 7; // : group1 - final int predId3 = 8; // (c,d) - final int sliceId = 9; - - final IVariable<?> a = Var.var("a"); - final IVariable<?> b = Var.var("b"); - final IVariable<?> c = Var.var("c"); - final IVariable<?> d = Var.var("d"); - - final Integer joinGroup1 = Integer.valueOf(1); - - /* - * Not quite sure how to write this one. I think it probably goes - * something like this: - * - * 1. startOp - * 2. join1Op(a b) - * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) - * 4. join2Op(b c) - * 5. join3Op(c d) - * 6. sliceOp - */ - - final PipelineOp startOp = new StartOp(new BOp[] {}, - NV.asMap(new NV[] {// - new NV(Predicate.Annotations.BOP_ID, startId),// - new NV(SliceOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final Predicate<?> pred1Op = new Predicate<E>( - new IVariableOrConstant[] { a, b }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId1),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred2Op = new Predicate<E>( - new IVariableOrConstant[] { b, c }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId2),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final Predicate<?> pred3Op = new Predicate<E>( - new IVariableOrConstant[] { c, d }, NV - .asMap(new NV[] {// - new NV(Predicate.Annotations.RELATION_NAME, - new String[] { namespace }),// - new NV(Predicate.Annotations.BOP_ID, predId3),// - new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// - })); - - final PipelineOp join1Op = new PipelineJoin<E>(// - new BOp[]{startOp},// - new NV(Predicate.Annotations.BOP_ID, joinId1),// - new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); - - final IConstraint condition = new EQConstant(a, new Constant<String>("Paul")); - - final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, - NV.asMap(new NV[]{// - new NV(BOp.Annotations.BOP_ID,condId), - new NV(PipelineOp.Annotations.SINK_REF, sliceId), // a == Paul - new NV(PipelineOp.Annotations.ALT_SINK_REF, joinId2), // a != Paul - new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), - })); - - final PipelineOp join2Op = new PipelineJoin<E>(// - new BOp[] { condOp },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp join3Op = new PipelineJoin<E>(// - new BOp[] { join2Op },// - new NV(Predicate.Annotations.BOP_ID, joinId3),// - new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// - new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// - // join is optional. -// new NV(PipelineJoin.Annotations.OPTIONAL, true),// - // optional target is the same as the default target. - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); - - final PipelineOp sliceOp = new SliceOp(// - new BOp[]{join3Op}, - NV.asMap(new NV[] {// - new NV(BOp.Annotations.BOP_ID, sliceId),// - new NV(BOp.Annotations.EVALUATION_CONTEXT, - BOpEvaluationContext.CONTROLLER),// - })); - - final PipelineOp query = sliceOp; - - // start the query. - final UUID queryId = UUID.randomUUID(); - final IChunkMessage<IBindingSet> initialChunkMessage; - { - - final IBindingSet initialBindings = new HashBindingSet(); - -// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); - - initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, - queryId, startId,// - -1, // partitionId - newBindingSetIterator(initialBindings)); - } - final IRunningQuery runningQuery = queryEngine.eval(queryId, query, - initialChunkMessage); - - // verify solutions. - { - - // the expected solutions. - final IBindingSet[] expected = new IBindingSet[] {// - // two solutions where the optional join succeeds. - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b, c, d },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Mary"), - new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - // plus anything we read from the first access path which did not - // pass the optional join - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Mary") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Paul"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("John"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Mary"), - new Constant<String>("Brad") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Fred") }// - ), - new ArrayBindingSet(// - new IVariable[] { a, b },// - new IConstant[] { new Constant<String>("Brad"), - new Constant<String>("Leon") }// - ) - }; - - assertSameSolutionsAnyOrder(expected, - new Dechunkerator<IBindingSet>(runningQuery.iterator())); - - } - - // Wait until the query is done. - runningQuery.get(); - final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); - { - // validate the stats map. - assertNotNull(statsMap); - assertEquals(6, statsMap.size()); - if (log.isInfoEnabled()) - log.info(statsMap.toString()); - } - - } - - /** - * Verify the expected solutions. - * - * @param expected - * @param itr - */ - static public void assertSameSolutions(final IBindingSet[] expected, - final IAsynchronousIterator<IBindingSet[]> itr) { - try { - int n = 0; - while (itr.hasNext()) { - final IBindingSet[] e = itr.next(); - if (log.isInfoEnabled()) - log.info(n + " : chunkSize=" + e.length); - for (int i = 0; i < e.length; i++) { - if (log.isInfoEnabled()) - log.info(n + " : " + e[i]); - if (n >= expected.length) { - fail("Willing to deliver too many solutions: n=" + n - + " : " + e[i]); - } - if (!expected[n].equals(e[i])) { - fail("n=" + n + ", expected=" + expected[n] - + ", actual=" + e[i]); - } - n++; - } - } - assertEquals("Wrong number of solutions", expected.length, n); - } finally { - itr.close(); - } - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final T[] expected, - final Iterator<T> actual) { - - assertSameSolutionsAnyOrder("", expected, actual); - - } - - /** - * Verifies that the iterator visits the specified objects in some arbitrary - * ordering and that the iterator is exhausted once all expected objects - * have been visited. The implementation uses a selection without - * replacement "pattern". - * <p> - * Note: If the objects being visited do not correctly implement hashCode() - * and equals() then this can fail even if the desired objects would be - * visited. When this happens, fix the implementation classes. - */ - static public <T> void assertSameSolutionsAnyOrder(final String msg, - final T[] expected, final Iterator<T> actual) { - - try { - - /* - * Populate a map that we will use to realize the match and - * selection without replacement logic. The map uses counters to - * handle duplicate keys. This makes it possible to write tests in - * which two or more binding sets which are "equal" appear. - */ - - final int nrange = expected.length; - - final java.util.Map<T, AtomicInteger> range = new java.util.LinkedHashMap<T, AtomicInteger>(); - - for (int j = 0; j < nrange; j++) { - - AtomicInteger count = range.get(expected[j]); - - if (count == null) { - - count = new AtomicInteger(); - - } - - range.put(expected[j], count); - - count.incrementAndGet(); - - } - - // Do selection without replacement for the objects visited by - // iterator. - - for (int j = 0; j < nrange; j++) { - - if (!actual.hasNext()) { - - fail(msg - + ": Iterator exhausted while expecting more object(s)" - + ": index=" + j); - - } - - final T actualObject = actual.next(); - - if (log.isInfoEnabled()) - log.info("visting: " + actualObject); - - AtomicInteger counter = range.get(actualObject); - - if (counter == null || counter.get() == 0) { - - fail("Object not expected" + ": index=" + j + ", object=" - + actualObject); - - } - - counter.decrementAndGet(); - - } - - if (actual.hasNext()) { - - final List<T> remainder = new LinkedList<T>(); - - while(actual.hasNext()) { - remainder.add(actual.next()); - } - - fail("Iterator will deliver too many objects: reminder(" - + remainder.size() + ")=" + remainder); - - } - - } finally { - - if (actual instanceof ICloseableIterator<?>) { - - ((ICloseableIterator<T>) actual).close(); - - } - - } - - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-03 14:41:44
|
Revision: 4047 http://bigdata.svn.sourceforge.net/bigdata/?rev=4047&view=rev Author: thompsonbry Date: 2011-01-03 14:41:36 +0000 (Mon, 03 Jan 2011) Log Message: ----------- Added an OptionalJoinGroup operator. It issues a subquery for each binding set presented to the operator. If the subquery produces any solutions, then they are copied to the default sink. Otherwise the binding set presented to the operator is copied to the default sink. This provides optional semantics for the group. Modified the optional join group test suite to use the OptionalJoinGroup operator. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -136,6 +136,12 @@ * <p> * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} * which uniquely identifies the group within the query. + * + * @deprecated The binding set stack push/pop mechanisms are not + * sufficient to support optional join groups. This + * annotation will be removed unless it proves valuable for + * marking the elements of a join group, in which case the + * javadoc needs to be updated. */ String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; @@ -158,6 +164,10 @@ * * @see #CONDITIONAL_GROUP * @see #ALT_SINK_REF + * + * @deprecated The binding set stack push/pop mechanisms are not + * sufficient to support optional join groups. This + * annotation will be removed. */ String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -55,8 +55,9 @@ * executed independently. By default, the subqueries are run with unlimited * parallelism. * <p> - * Note: This operator must on the query controller. The - * {@link PipelineOp.Annotations#SINK_REF} of each child operand should be + * Note: This operator must execute on the query controller. + * <p> + * The {@link PipelineOp.Annotations#SINK_REF} of each child operand should be * overridden to specify the parent of the this operator. If you fail to do * this, then the intermediate results of the subqueries will be routed to this * operator, which DOES NOT pass them on. This may cause unnecessary network @@ -193,10 +194,10 @@ this.latch = new CountDownLatch(controllerOp.arity()); /* - * Create FutureTasks for each subquery. The futures are submitted - * to the Executor yet. That happens in call(). By deferring the - * evaluation until call() we gain the ability to cancel all - * subqueries if any subquery fails. + * Create FutureTasks for each subquery. The futures are not + * submitted to the Executor yet. That happens in call(). By + * deferring the evaluation until call() we gain the ability to + * cancel all subqueries if any subquery fails. */ for (BOp op : controllerOp.args()) { Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -0,0 +1,418 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 18, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.Executor; +import java.util.concurrent.FutureTask; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.util.concurrent.LatchedExecutor; + +/** + * For each binding set presented, this operator executes a subquery. Any + * solutions produced by the subquery are copied to the default sink. If no + * solutions are produced, then the original binding set is copied to the + * default sink (optional join semantics). Each subquery is run as a separate + * query but is linked to the parent query in the operator is being evaluated. + * + * FIXME Is this true?: "This operator must on the query controller." For an + * optional join group in scale-out, we need to concentrate the solutions back + * to the controller if this is true. If it is not a requirement, then we can + * just issue the subquery from ANY node. + * + * FIXME Parallel evaluation of subqueries is not implemented. What is the + * appropriate parallelism for this operator? More parallelism should reduce + * latency but could increase the memory burden. Review this decision once we + * have the RWStore operating as a binding set buffer on the Java process heap. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class OptionalJoinGroup extends PipelineOp { + + /** + * + */ + private static final long serialVersionUID = 1L; + + public interface Annotations extends PipelineOp.Annotations { + + /** + * The subquery to be evaluated for each binding sets presented to the + * {@link OptionalJoinGroup} (required). This should be a + * {@link PipelineOp}. + */ + String SUBQUERY = OptionalJoinGroup.class.getName() + ".subquery"; + + /** + * The maximum parallelism with which the subqueries will be evaluated + * (default {@value #DEFAULT_MAX_PARALLEL}). + */ + String MAX_PARALLEL = OptionalJoinGroup.class.getName() + + ".maxParallel"; + + int DEFAULT_MAX_PARALLEL = 1; + + } + + /** + * @see Annotations#MAX_PARALLEL + */ + public int getMaxParallel() { + return getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + } + + /** + * Deep copy constructor. + */ + public OptionalJoinGroup(final OptionalJoinGroup op) { + super(op); + } + + /** + * Shallow copy constructor. + * + * @param args + * @param annotations + */ + public OptionalJoinGroup(final BOp[] args, + final Map<String, Object> annotations) { + + super(args, annotations); + +// if (!getEvaluationContext().equals(BOpEvaluationContext.CONTROLLER)) +// throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT +// + "=" + getEvaluationContext()); + + getRequiredProperty(Annotations.SUBQUERY); + + if (!getProperty(Annotations.CONTROLLER, Annotations.DEFAULT_CONTROLLER)) + throw new IllegalArgumentException(Annotations.CONTROLLER); + +// // The id of this operator (if any). +// final Integer thisId = (Integer)getProperty(Annotations.BOP_ID); +// +// for(BOp op : args) { +// +// final Integer sinkId = (Integer) op +// .getRequiredProperty(Annotations.SINK_REF); +// +// if(sinkId.equals(thisId)) +// throw new RuntimeException("Operand may not target ") +// +// } + + } + + public OptionalJoinGroup(final BOp[] args, NV... annotations) { + + this(args, NV.asMap(annotations)); + + } + + public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { + + return new FutureTask<Void>(new ControllerTask(this, context)); + + } + + /** + * Evaluates the arguments of the operator as subqueries. The arguments are + * evaluated in order. An {@link Executor} with limited parallelism to + * evaluate the arguments. If the controller operator is interrupted, then + * the subqueries are cancelled. If a subquery fails, then all subqueries + * are cancelled. + */ + private static class ControllerTask implements Callable<Void> { + + private final OptionalJoinGroup controllerOp; + private final BOpContext<IBindingSet> context; +// private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); +// private final CountDownLatch latch; + private final int nparallel; + private final PipelineOp subquery; + private final Executor executor; + + public ControllerTask(final OptionalJoinGroup controllerOp, final BOpContext<IBindingSet> context) { + + if (controllerOp == null) + throw new IllegalArgumentException(); + + if (context == null) + throw new IllegalArgumentException(); + + this.controllerOp = controllerOp; + + this.context = context; + + this.nparallel = controllerOp.getProperty(Annotations.MAX_PARALLEL, + Annotations.DEFAULT_MAX_PARALLEL); + + this.subquery = (PipelineOp) controllerOp + .getRequiredProperty(Annotations.SUBQUERY); + + this.executor = new LatchedExecutor(context.getIndexManager() + .getExecutorService(), nparallel); + +// this.latch = new CountDownLatch(controllerOp.arity()); + +// /* +// * Create FutureTasks for each subquery. The futures are submitted +// * to the Executor yet. That happens in call(). By deferring the +// * evaluation until call() we gain the ability to cancel all +// * subqueries if any subquery fails. +// */ +// for (BOp op : controllerOp.args()) { +// +// /* +// * Task runs subquery and cancels all subqueries in [tasks] if +// * it fails. +// */ +// tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, +// context)) { +// /* +// * Hook future to count down the latch when the task is +// * done. +// */ +// public void run() { +// try { +// super.run(); +// } finally { +// latch.countDown(); +// } +// } +// }); +// +// } + + } + + /** + * Evaluate the subquery. + * + * @todo Support limited parallelism for each binding set read from the + * source. We will need to keep track of the running subqueries in + * order to wait on them before returning from this method and in + * order to cancel them if something goes wrong. + */ + public Void call() throws Exception { + + try { + + final IAsynchronousIterator<IBindingSet[]> sitr = context + .getSource(); + + // @todo test for interrupt/halted query? + while(sitr.hasNext()) { + + final IBindingSet[] chunk = sitr.next(); + + for(IBindingSet bset : chunk) { + + final FutureTask<IRunningQuery> ft = new FutureTask<IRunningQuery>( + new SubqueryTask(bset, subquery, context)); + + // run the subquery. + executor.execute(ft); + + // wait for the outcome. + ft.get(); + + } + + } + +// /* +// * Run subqueries with limited parallelism. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) { +// executor.execute(ft); +// } +// +// /* +// * Wait for all subqueries to complete. +// */ +// latch.await(); +// +// /* +// * Get the futures, throwing out any errors. +// */ +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.get(); + + // Now that we know the subqueries ran Ok, flush the sink. + context.getSink().flush(); + + // Done. + return null; + + } finally { + +// // Cancel any tasks which are still running. +// for (FutureTask<IRunningQuery> ft : tasks) +// ft.cancel(true/* mayInterruptIfRunning */); + + context.getSource().close(); + + context.getSink().close(); + + if (context.getSink2() != null) + context.getSink2().close(); + + } + + } + + /** + * Run a subquery. + * + * @author <a href="mailto:tho...@us...">Bryan + * Thompson</a> + */ + private class SubqueryTask implements Callable<IRunningQuery> { + + /** + * The evaluation context for the parent query. + */ + private final BOpContext<IBindingSet> parentContext; + + /** + * The source binding set. This will be copied to the output if + * there are no solutions for the subquery (optional join + * semantics). + */ + private final IBindingSet bset; + + /** + * The root operator for the subquery. + */ + private final BOp subQueryOp; + + public SubqueryTask(final IBindingSet bset, final BOp subQuery, + final BOpContext<IBindingSet> parentContext) { + + this.bset = bset; + + this.subQueryOp = subQuery; + + this.parentContext = parentContext; + + } + + public IRunningQuery call() throws Exception { + + IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; + try { + + final QueryEngine queryEngine = parentContext.getRunningQuery() + .getQueryEngine(); + +// final IRunningQuery runningQuery = queryEngine +// .eval(subQueryOp); + + final BOp startOp = BOpUtility.getPipelineStart(subQueryOp); + + final int startId = startOp.getId(); + + final UUID queryId = UUID.randomUUID(); + + // execute the subquery, passing in the source binding set. + final IRunningQuery runningQuery = queryEngine + .eval( + queryId, + (PipelineOp) subQueryOp, + new LocalChunkMessage<IBindingSet>( + queryEngine, + queryId, + startId, + -1 /* partitionId */, + new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bset } }))); + + // Iterator visiting the subquery solutions. + subquerySolutionItr = runningQuery.iterator(); + + // Copy solutions from the subquery to the query. + final long ncopied = BOpUtility.copy(subquerySolutionItr, + parentContext.getSink(), null/* sink2 */, + null/* constraints */, null/* stats */); + + // wait for the subquery. + runningQuery.get(); + + if (ncopied == 0L) { + + /* + * Since there were no solutions for the subquery, copy + * the original binding set to the default sink. + */ + parentContext.getSink().add(new IBindingSet[]{bset}); + + } + + // done. + return runningQuery; + + } catch (Throwable t) { + + /* + * If a subquery fails, then propagate the error to the + * parent and rethrow the first cause error out of the + * subquery. + */ + throw new RuntimeException(ControllerTask.this.context + .getRunningQuery().halt(t)); + + } finally { + + if (subquerySolutionItr != null) + subquerySolutionItr.close(); + + } + + } + + } // SubqueryTask + + } // ControllerTask + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/OptionalJoinGroup.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-02 22:49:27 UTC (rev 4046) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestAll.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -77,6 +77,8 @@ // test STEPS // suite.addTestSuite(TestUnion.class); + suite.addTestSuite(TestOptionalJoinGroup.class); + // @todo test STAR (transitive closure). // suite.addTestSuite(TestStar.class); Added: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestOptionalJoinGroup.java 2011-01-03 14:41:36 UTC (rev 4047) @@ -0,0 +1,1138 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Aug 23, 2010 + */ + +package com.bigdata.bop.controller; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; + +import junit.framework.TestCase2; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.Constant; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.IConstant; +import com.bigdata.bop.IConstraint; +import com.bigdata.bop.IVariable; +import com.bigdata.bop.IVariableOrConstant; +import com.bigdata.bop.NV; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.Var; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.ap.R; +import com.bigdata.bop.bindingSet.ArrayBindingSet; +import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.bset.ConditionalRoutingOp; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.constraint.NEConstant; +import com.bigdata.bop.engine.BOpStats; +import com.bigdata.bop.engine.IChunkMessage; +import com.bigdata.bop.engine.IRunningQuery; +import com.bigdata.bop.engine.LocalChunkMessage; +import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.BufferMode; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.ThickAsynchronousIterator; +import com.bigdata.striterator.ChunkedArrayIterator; +import com.bigdata.striterator.Dechunkerator; +import com.bigdata.striterator.ICloseableIterator; + +/** + * Test suite for handling of optional join groups during query evaluation + * against a local database instance. Optional join groups are handled using + * {@link IBindingSet#push()} when entering the join group and + * {@link IBindingSet#pop(boolean)} when exiting the join group. If the join + * group was successful for a given binding set, then <code>save:=true</code> is + * specified for {@link IBindingSet#pop(boolean)} and the applied bindings will + * be visible to the downstream consumer. Otherwise the bindings applied during + * the join group are simply discarded. + * + * <pre> + * -Dlog4j.configuration=bigdata/src/resources/logging/log4j.properties + * </pre> + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class TestOptionalJoinGroup extends TestCase2 { + + /** + * + */ + public TestOptionalJoinGroup() { + } + + /** + * @param name + */ + public TestOptionalJoinGroup(String name) { + super(name); + } + + @Override + public Properties getProperties() { + + final Properties p = new Properties(super.getProperties()); + + p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.Transient + .toString()); + + return p; + + } + + static private final String namespace = "ns"; + private Journal jnl; + private QueryEngine queryEngine; + + public void setUp() throws Exception { + + jnl = new Journal(getProperties()); + + loadData(jnl); + + queryEngine = new QueryEngine(jnl); + + queryEngine.init(); + + } + + /** + * Create and populate relation in the {@link #namespace}. + */ + private void loadData(final Journal store) { + + // create the relation. + final R rel = new R(store, namespace, ITx.UNISOLATED, new Properties()); + rel.create(); + + // data to insert (in key order for convenience). + final E[] a = {// + new E("Paul", "Mary"),// [0] + new E("Paul", "Brad"),// [1] + + new E("John", "Mary"),// [2] + new E("John", "Brad"),// [3] + + new E("Mary", "Brad"),// [4] + + new E("Brad", "Fred"),// [5] + new E("Brad", "Leon"),// [6] + }; + + // insert data (the records are not pre-sorted). + rel.insert(new ChunkedArrayIterator<E>(a.length, a, null/* keyOrder */)); + + // Do commit since not scale-out. + store.commit(); + + } + + public void tearDown() throws Exception { + + if (queryEngine != null) { + queryEngine.shutdownNow(); + queryEngine = null; + } + + if (jnl != null) { + jnl.destroy(); + jnl = null; + } + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, + * empty {@link IBindingSet}. + * + * @param bindingSet + * the binding set. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet bindingSet) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { new IBindingSet[] { bindingSet } }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSets + * the binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[] bindingSets) { + + return new ThickAsynchronousIterator<IBindingSet[]>( + new IBindingSet[][] { bindingSets }); + + } + + /** + * Return an {@link IAsynchronousIterator} that will read a single, chunk + * containing all of the specified {@link IBindingSet}s. + * + * @param bindingSetChunks + * the chunks of binding sets. + */ + protected ThickAsynchronousIterator<IBindingSet[]> newBindingSetIterator( + final IBindingSet[][] bindingSetChunks) { + + return new ThickAsynchronousIterator<IBindingSet[]>(bindingSetChunks); + + } + + /** + * Unit test for optional join group. Three joins are used and target a + * {@link SliceOp}. The 2nd and 3rd joins are embedded in an + * {@link OptionalJoinGroup}. + * <P> + * The optional join group takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions that + * succeed the optional join group: + * + * <pre> + * (paul mary brad fred) + * (paul mary brad leon) + * (john mary brad fred) + * (john mary brad leon) + * </pre> + * + * and five more that don't succeed the optional join group: + * + * <pre> + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * </pre> + * + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + */ + public void test_query_join2_optionals() throws Exception { + + // main query + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinGroup1 = 9; + final int sliceId = 8; // + + // subquery + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*join1Op*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // four solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + /* + * junit.framework.AssertionFailedError: Iterator will deliver too + * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad + * }, { a=Paul, b=Brad }]. + */ + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter. Three joins are used and + * target a {@link SliceOp}. The 2nd and 3rd joins are embedded in an + * optional join group. The optional join group contains a filter. + * <p> + * The optional join group takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * filter(d != Leon) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> + * (paul mary brad fred) + * (john mary brad fred) + * </pre> + * + * and five more that don't succeed the optional join group: + * + * <pre> + * (paul brad) * + * (john brad) * + * (mary brad) * + * (brad fred) + * (brad leon) + * </pre> + * + * In the cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + * <p> + * The filter (d != Leon) will prune the two solutions: + * + * <pre> + * (paul mary brad leon) + * (john mary brad leon) + * </pre> + * + * since ?d is bound to Leon in those cases. + */ + public void test_query_optionals_filter() throws Exception { + + // main query + final int startId = 1; + final int joinId1 = 2; // + final int predId1 = 3; // (a,b) + final int joinGroup1 = 9; + final int sliceId = 8; + + // subquery + final int joinId2 = 4; // : group1 + final int predId2 = 5; // (b,c) + final int joinId3 = 6; // : group1 + final int predId3 = 7; // (c,d) + + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*join1Op*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // constraint d != Leon + new NV(PipelineJoin.Annotations.CONSTRAINTS, + new IConstraint[] { new NEConstant(d, new Constant<String>("Leon")) }) +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{join1Op}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(4, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Unit test for optional join group with a filter on a variable outside the + * optional join group. Three joins are used and target a {@link SliceOp}. + * The 2nd and 3rd joins are in embedded an {@link OptionalJoinGroup}. The + * optional join group contains a filter that uses a variable outside the + * optional join group. + * <P> + * The query takes the form: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * filter(a != Paul) + * } + * </pre> + * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> + * (john mary brad fred) + * (john mary brad leon) + * </pre> + * + * and six more that don't succeed the optional join group: + * + * <pre> + * (paul mary) * + * (paul brad) * + * (john brad) + * (mary brad) + * (brad fred) + * (brad leon) + * </pre> + * + * In the cases marked with a <code>*</code>, ?a is bound to Paul even + * though there is a filter that specifically prohibits a = Paul. This is + * because the filter is inside the optional join group, which means that + * solutions can still include a = Paul, but the optional join group should + * not run in that case. + */ + public void test_query_optionals_filter2() throws Exception { + + // main query + final int startId = 1; + final int joinId1 = 2; + final int predId1 = 3; // (a,b) + final int condId = 4; // (a != Paul) + final int joinGroup1 = 10; + final int sliceId = 9; + + // subquery (iff condition is satisfied) + final int joinId2 = 5; // : group1 + final int predId2 = 6; // (b,c) + final int joinId3 = 7; // : group1 + final int predId3 = 8; // (c,d) + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + +// final Integer joinGroup1 = Integer.valueOf(1); + + /* + * Not quite sure how to write this one. I think it probably goes + * something like this: + * + * 1. startOp + * 2. join1Op(a b) + * 3. conditionalRoutingOp( if a = Paul then goto sliceOp ) + * 4. join2Op(b c) + * 5. join3Op(c d) + * 6. sliceOp + */ + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final IConstraint condition = new NEConstant(a, new Constant<String>("Paul")); + + final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, + NV.asMap(new NV[]{// + new NV(BOp.Annotations.BOP_ID,condId), + new NV(PipelineOp.Annotations.SINK_REF, joinGroup1), // a != Paul + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId), // a == Paul + new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), + })); + + final PipelineOp subQuery; + { + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { /*condOp*/ },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + subQuery = join3Op; + } + + final PipelineOp joinGroup1Op = new OptionalJoinGroup(new BOp[]{condOp}, + new NV(Predicate.Annotations.BOP_ID, joinGroup1),// +// new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(OptionalJoinGroup.Annotations.SUBQUERY, subQuery),// + new NV(BOp.Annotations.CONTROLLER,true)// +// new NV(BOp.Annotations.EVALUATION_CONTEXT, +// BOpEvaluationContext.CONTROLLER)// +// // join is optional. +// new NV(PipelineJoin.Annotations.OPTIONAL, true),// +// // optional target is the same as the default target. +// new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId) + ); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{joinGroup1Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // start the query. + final UUID queryId = UUID.randomUUID(); + final IChunkMessage<IBindingSet> initialChunkMessage; + { + + final IBindingSet initialBindings = new HashBindingSet(); + +// initialBindings.set(Var.var("x"), new Constant<String>("Mary")); + + initialChunkMessage = new LocalChunkMessage<IBindingSet>(queryEngine, + queryId, startId,// + -1, // partitionId + newBindingSetIterator(initialBindings)); + } + final IRunningQuery runningQuery = queryEngine.eval(queryId, query, + initialChunkMessage); + + // verify solutions. + { + + // the expected solutions. + final IBindingSet[] expected = new IBindingSet[] {// + // two solutions where the optional join succeeds. + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b, c, d },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Mary"), + new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + // plus anything we read from the first access path which did not + // pass the optional join + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Mary") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Paul"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("John"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Mary"), + new Constant<String>("Brad") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Fred") }// + ), + new ArrayBindingSet(// + new IVariable[] { a, b },// + new IConstant[] { new Constant<String>("Brad"), + new Constant<String>("Leon") }// + ) + }; + + assertSameSolutionsAnyOrder(expected, + new Dechunkerator<IBindingSet>(runningQuery.iterator())); + + } + + // Wait until the query is done. + runningQuery.get(); + final Map<Integer, BOpStats> statsMap = runningQuery.getStats(); + { + // validate the stats map. + assertNotNull(statsMap); + assertEquals(5, statsMap.size()); + if (log.isInfoEnabled()) + log.info(statsMap.toString()); + } + + } + + /** + * Verify the expected solutions. + * + * @param expected + * @param itr + */ + static public void assertSameSolutions(final IBindingSet[] expected, + final IAsynchronousIterator<IBindingSet[]> itr) { + try { + int n = 0; + while (itr.hasNext()) { + final IBindingSet[... [truncated message content] |
From: <tho...@us...> - 2011-01-02 22:49:33
|
Revision: 4046 http://bigdata.svn.sourceforge.net/bigdata/?rev=4046&view=rev Author: thompsonbry Date: 2011-01-02 22:49:27 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Backed out the runtime optimizer query hint since it was not correctly integrated. I've added some more inline comments on how the integration should be developed. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 21:40:00 UTC (rev 4045) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 22:49:27 UTC (rev 4046) @@ -66,8 +66,6 @@ import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.controller.Steps; import com.bigdata.bop.controller.Union; -import com.bigdata.bop.controller.JoinGraph.JGraph; -import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.cost.ScanCostReport; import com.bigdata.bop.cost.SubqueryCostReport; import com.bigdata.bop.engine.QueryEngine; @@ -443,6 +441,16 @@ * which optimizes the join graph and then evaluates it rather than * explicitly doing the optimization and evaluation steps here. * + * FIXME The runtime query optimizer can not be run against an + * IPredicate[] extracted from the IRule, even for triples, because + * those IPredicates lack some critical annotations, such as the + * bopId, which are only added in the logic below this point. Thus, + * while we can run the static optimizer first, the runtime + * optimizer needs to be run after we convert to bops (or as a bop + * at runtime). [This all runs into trouble because we are creating + * the JOIN operators in the code below rather than inferring the + * correct JOIN annotations based on the IPredicates.] + * * @todo Make sure that a summary of the information collected by * the runtime query optimizer is attached as an annotation to the * query. @@ -450,35 +458,38 @@ * @todo query hints for [limit] and [nedges]. */ - // The initial sampling limit. - final int limit = 100; +// // The initial sampling limit. +// final int limit = 100; +// +// // The #of edges considered for the initial paths. +// final int nedges = 2; +// +// // isolate/extract the join graph. +// final IPredicate[] preds = new IPredicate[rule.getTailCount()]; +// for (int i = 0; i < preds.length; i++) { +// preds[i] = rule.getTail(i); +// } +// +// final JGraph g = new JGraph(preds); +// +// final Path p; +// try { +// p = g.runtimeOptimizer(queryEngine, limit, nedges); +// } catch (Exception e) { +// throw new RuntimeException(e); +// } +// +// // the permutation order. +// order = g.getOrder(p); +// +// keyOrder = null; +// +// cardinality = null; +// +// break; - // The #of edges considered for the initial paths. - final int nedges = 2; + throw new UnsupportedOperationException("Runtime optimizer is not supported yet."); - // isolate/extract the join graph. - final IPredicate[] preds = new IPredicate[rule.getTailCount()]; - for (int i = 0; i < preds.length; i++) { - preds[i] = rule.getTail(i); - } - - final JGraph g = new JGraph(preds); - - final Path p; - try { - p = g.runtimeOptimizer(queryEngine, limit, nedges); - } catch (Exception e) { - throw new RuntimeException(e); - } - - // the permutation order. - order = g.getOrder(p); - - keyOrder = null; - - cardinality = null; - - break; } default: throw new AssertionError("Unknown option: " + optimizer); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-02 21:40:08
|
Revision: 4045 http://bigdata.svn.sourceforge.net/bigdata/?rev=4045&view=rev Author: thompsonbry Date: 2011-01-02 21:40:00 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Hooked up the query hints into the sail. Moved the declaration of the query hints namespace into a QueryHints interface in the sail package. Added a query hint to select the join optimizer (Static, Runtime, None). The Runtime query optimizer can only be used for plains triples without optionals right now. Adding support for optionals is easy enough (MikeP is signed up for this). Added support for quads and scale-out is more tricky since we place various annotations on the joins (not just the predicates) when configuring for named or default graph queries (e.g., when to use a REMOTE access path). Since the runtime query optimizer works directly with the IPredicates, the annotations for the joins probably need to be inferred directly from the annotations for the predicates in order for this to be compatible with the JoinGraph optimizer. Also, the DataSetJoin needs to be replaced by a standard join against an inline access path comprising the default or named graph data set. This requires some changes to the AccessPath class. PipelineJoin lacked a deep copy constructor. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpBase.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -360,8 +360,31 @@ if (!annotations.containsKey(name)) return defaultValue; - return (T) annotations.get(name); + final Object val = annotations.get(name); + if (defaultValue != null && val.getClass() != defaultValue.getClass()) { + + /* + * Attempt to convert to the correct target type. + */ + + if (defaultValue.getClass() == Integer.class) { + return (T) Integer.valueOf("" + val); + } + if (defaultValue.getClass() == Long.class) { + return (T) Long.valueOf("" + val); + } + if (defaultValue.getClass() == Float.class) { + return (T) Float.valueOf("" + val); + } + if (defaultValue.getClass() == Double.class) { + return (T) Double.valueOf("" + val); + } + + } + + return (T) val; + } // @SuppressWarnings("unchecked") Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -1416,7 +1416,7 @@ return preds; } - + /** * Return the {@link BOp} identifiers of the predicates associated with * each vertex in path order. @@ -1851,6 +1851,55 @@ } + /** + * Return a permutation vector which may be used to reorder the given + * {@link IPredicate}[] into the evaluation order selected by the + * runtime query optimizer. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + * @throws IllegalArgumentException + * if the given {@link Path} does not cover all vertices in + * the join graph. + */ + public int[] getOrder(final Path p) { + + if(p == null) + throw new IllegalArgumentException(); + + final IPredicate[] path = p.getPredicates(); + + if (path.length != V.length) { + throw new IllegalArgumentException( + "Wrong path length: #vertices=" + V.length + + ", but path.length=" + path.length); + } + + final int[] order = new int[V.length]; + + for (int i = 0; i < order.length; i++) { + + boolean found = false; + for (int j = 0; j < order.length; j++) { + + if (path[i].getId() == V[j].pred.getId()) { + order[i] = j; + found = true; + break; + } + + } + + if (!found) + throw new RuntimeException("No such vertex: id=" + + path[i].getId()); + + } + + return order; + + } + /** * Choose the starting vertices. * Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -378,6 +378,15 @@ } /** + * Deep copy constructor. + * + * @param op + */ + public PipelineJoin(final PipelineJoin<E> op) { + super(op); + } + + /** * Shallow copy vararg constructor. * * @param args @@ -637,6 +646,9 @@ this.predicate = joinOp.getPredicate(); this.constraints = joinOp.constraints(); this.maxParallel = joinOp.getMaxParallel(); + if (maxParallel < 0) + throw new IllegalArgumentException(Annotations.MAX_PARALLEL + + "=" + maxParallel); if (maxParallel > 0) { // shared service. service = new LatchedExecutor(context.getIndexManager() Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -110,7 +110,7 @@ * Deep Copy constructor. * @param op */ - public SliceOp(SliceOp op) { + public SliceOp(final SliceOp op) { super(op); @@ -122,7 +122,7 @@ * @param args * @param annotations */ - public SliceOp(BOp[] args, Map<String, Object> annotations) { + public SliceOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -52,6 +52,7 @@ import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; + /** * A vocabulary for bigdata specific extensions. * @@ -64,15 +65,6 @@ * The namespace used for bigdata specific extensions. */ String NAMESPACE = "http://www.bigdata.com/rdf#"; - - /** - * The namespace prefix used in SPARQL queries to signify query hints. You - * can embed query hints into a SPARQL query as follows: - * <code> - * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> - * </code> - */ - String QUERY_HINTS_NAMESPACE = "BIGDATA_QUERY_HINTS"; /** * The name of a per-statement attribute whose value is recognized in Modified: branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -230,7 +230,7 @@ final Properties properties = getProperties(); final File file; - if (false) { + if (true) { /* * Use a persistent file that is generated once and then reused by * each test run. @@ -899,6 +899,8 @@ // System.err.println(getName() + " : runtime optimizer join order " // + Arrays.toString(Path.getVertexIds(p.edges))); + System.err.println(getName() + " : order[]=" + Arrays.toString(g.getOrder(p))); + return p.getPredicates(); } finally { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -366,7 +366,7 @@ * This is the top-level method called by the SAIL to evaluate a query. * The TupleExpr parameter here is guaranteed to be the root of the operator * tree for the query. Query hints are parsed by the SAIL from the - * namespaces in the original query. See {@link BD#QUERY_HINTS_NAMESPACE}. + * namespaces in the original query. See {@link QueryHints#NAMESPACE}. */ public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( TupleExpr expr, BindingSet bindings, Properties queryHints) @@ -1673,7 +1673,7 @@ final QueryEngine queryEngine = tripleSource.getSail().getQueryEngine(); - final int startId = 1; +// final int startId = 1; final PipelineOp query; { @@ -1686,7 +1686,7 @@ // Convert the step to a bigdata operator tree. query = Rule2BOpUtility.convert(step, idFactory, database, - queryEngine); + queryEngine, queryHints); if (log.isInfoEnabled()) log.info(query); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -3117,7 +3117,7 @@ * {@link Options#QUERY_TIME_EXPANDER}, but not on a per-query basis. * <p> * QueryHints are a set of properties that are parsed from a SPARQL - * query. See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * query. See {@link QueryHints#NAMESPACE} for more information. * * @todo The [bindings] are supposed to be inputs to the query * evaluation, but I am still not quite clear what the role of the Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailBooleanQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -12,14 +12,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailBooleanQuery extends SailBooleanQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -32,7 +31,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public boolean evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailGraphQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -42,14 +42,13 @@ import org.openrdf.repository.sail.SailRepositoryConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailGraphQuery extends SailGraphQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -222,7 +221,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public GraphQueryResult evaluate() throws QueryEvaluationException { Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailRepositoryConnection.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -29,7 +29,6 @@ import com.bigdata.rdf.sail.sparql.PrefixDeclProcessor; import com.bigdata.rdf.sail.sparql.StringEscapesProcessor; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.BD; public class BigdataSailRepositoryConnection extends SailRepositoryConnection { @@ -52,7 +51,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailGraphQuery prepareGraphQuery(final QueryLanguage ql, @@ -72,7 +71,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailTupleQuery prepareTupleQuery(final QueryLanguage ql, @@ -89,7 +88,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. See - * {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * {@link QueryHints#NAMESPACE} for more information. */ @Override public SailBooleanQuery prepareBooleanQuery(final QueryLanguage ql, @@ -106,7 +105,7 @@ * <p> * Overridden to capture query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public SailQuery prepareQuery(final QueryLanguage ql, final String qs, @@ -251,7 +250,7 @@ /** * Parse query hints from a query string. Query hints are embedded in the * query string via special namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private Properties parseQueryHints(QueryLanguage ql, String queryString, String baseURI) @@ -270,7 +269,7 @@ for (Map.Entry<String, String> prefix : prefixes.entrySet()) { // if we see one that matches the magic namespace, try // to parse it - if (prefix.getKey().equalsIgnoreCase(BD.QUERY_HINTS_NAMESPACE)) { + if (prefix.getKey().equalsIgnoreCase(QueryHints.NAMESPACE)) { String hints = prefix.getValue(); // has to have a # and it can't be at the end int i = hints.indexOf('#'); Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSailTupleQuery.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -14,14 +14,13 @@ import org.openrdf.sail.SailConnection; import org.openrdf.sail.SailException; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; -import com.bigdata.rdf.store.BD; public class BigdataSailTupleQuery extends SailTupleQuery implements BigdataSailQuery { /** * Query hints are embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ private final Properties queryHints; @@ -34,7 +33,7 @@ /** * Overriden to use query hints from SPARQL queries. Query hints are * embedded in query strings as namespaces. - * See {@link BD#QUERY_HINTS_NAMESPACE} for more information. + * See {@link QueryHints#NAMESPACE} for more information. */ @Override public TupleQueryResult evaluate() throws QueryEvaluationException { Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,66 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ + +package com.bigdata.rdf.sail; + +import com.bigdata.bop.BOp; + +/** + * Query hint directives understood by a bigdata SPARQL end point. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface QueryHints { + + /** + * The namespace prefix used in SPARQL queries to signify query hints. You + * can embed query hints into a SPARQL query as follows: + * + * <pre> + * PREFIX BIGDATA_QUERY_HINTS: <http://www.bigdata.com/queryHints#name1=value1&name2=value2> + * </pre> + * + * where <i>name</i> is the name of a query hint and <i>value</i> is the + * value associated with that query hint. Multiple query hints can be + * specified (as shown in this example) using a <code>&</code> character + * to separate each name=value pair. + * <p> + * Query hints are either directives understood by the SPARQL end point or + * {@link BOp.Annotations}. A list of the known directives is declared by + * this interface. + */ + String NAMESPACE = "BIGDATA_QUERY_HINTS"; + + /** + * Specify the query optimizer. + * + * @see QueryOptimizerEnum + */ + String OPTIMIZER = QueryHints.class.getName() + ".optimizer"; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryHints.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Added: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -0,0 +1,69 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2011. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Jan 2, 2011 + */ +package com.bigdata.rdf.sail; + +/** + * The known query optimizers. + * + * @see QueryHints#OPTIMIZER + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public enum QueryOptimizerEnum { + /** + * The query optimizer is disabled. The joins in the query will be evaluated + * in the order in which they are given. This may be used to compensate when + * the static query optimizer produces an inefficient join ordering. + */ + None, + /** + * A query optimizer based on a static analysis of the query which relies on + * fast range counts for the basic graph patterns to estimate the + * cardinality of the different access paths. This optimizer is fast but it + * can fail to order joins correctly as the error in the estimated + * cardinality of joins can grow exponentially in the number of joins in the + * query. + */ + Static, + /** + * A runtime query optimizer based on sampling. The runtime query optimizer + * samples each of the access paths and each of the joins and builds out + * join paths in a breadth first manner until it finds a join ordering which + * is known to dominate the other possible join orderings. The runtime query + * optimizer takes into account the actual cardinality and correlation in + * the query and the data selected by that query. The runtime query + * optimizer can have slightly more overhead than the static query + * optimizer, but it never produces a bad join ordering and often identifies + * the <em>best</em> join ordering. For cases where the <code>static</code> + * query optimizer produces a bad join ordering, the runtime query optimizer + * can find join orderings which are orders of magnitude more efficient (10x + * or 100x). For long running joins, this can translates into a savings of + * minutes or hours. + */ + Runtime; +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/QueryOptimizerEnum.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/Rule2BOpUtility.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -30,12 +30,14 @@ import java.io.Serializable; import java.util.Arrays; import java.util.Collection; +import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; @@ -64,6 +66,8 @@ import com.bigdata.bop.bset.StartOp; import com.bigdata.bop.controller.Steps; import com.bigdata.bop.controller.Union; +import com.bigdata.bop.controller.JoinGraph.JGraph; +import com.bigdata.bop.controller.JoinGraph.Path; import com.bigdata.bop.cost.ScanCostReport; import com.bigdata.bop.cost.SubqueryCostReport; import com.bigdata.bop.engine.QueryEngine; @@ -243,13 +247,13 @@ */ public static PipelineOp convert(final IStep step, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { if (step instanceof IRule<?>) { // Convert the step to a bigdata operator tree. PipelineOp tmp = convert((IRule<?>) step, idFactory, db, - queryEngine); + queryEngine, queryHints); if (!tmp.getEvaluationContext().equals( BOpEvaluationContext.CONTROLLER)) { @@ -265,15 +269,55 @@ } - return tmp; + return applyQueryHints(tmp, queryHints); } - return convert((IProgram) step, idFactory, db, queryEngine); + return convert((IProgram) step, idFactory, db, queryEngine, queryHints); } /** + * Apply any query hints to the operator as annotations of that operator. + * + * @param op + * The operator. + * @param queryHints + * The query hints. + * + * @return A copy of that operator to which the query hints (if any) have + * been applied. If there are no query hints then the original + * operator is returned. + * + * @todo It would be nice if this would only apply those query hints to an + * operator which are known to be annotations understood by that + * operator. This information is basically available from the inner + * Annotation interface for a given operator class, but that is not + * really all that accessible. + */ + private static PipelineOp applyQueryHints(PipelineOp op, + Properties queryHints) { + + final Enumeration<?> pnames = queryHints.propertyNames(); + + while (pnames.hasMoreElements()) { + + final String name = (String) pnames.nextElement(); + + final String value = queryHints.getProperty(name); + + if (log.isInfoEnabled()) + log.info("Query hint: [" + name + "=" + value + "]"); + + op = (PipelineOp) op.setProperty(name, value); + + } + + return op; + + } + + /** * Convert a rule into an operator tree. * * @param rule @@ -282,52 +326,164 @@ */ public static PipelineOp convert(final IRule<?> rule, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // // true iff the database is in quads mode. // final boolean isQuadsQuery = db.isQuads(); - final PipelineOp startOp = new StartOp(new BOp[] {}, + final PipelineOp startOp = applyQueryHints(new StartOp(new BOp[] {}, NV.asMap(new NV[] {// new NV(Predicate.Annotations.BOP_ID, idFactory .incrementAndGet()),// new NV(SliceOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// - })); - + })),queryHints); + /* * First put the tails in the correct order based on the logic in * DefaultEvaluationPlan2. + * + * @todo Consider making order[] disappear such that all of the arrays + * (preds[], cardinality[], keyOrder[]) are indexed directly by the + * array index rather than by order[i]. Alternatively, make sure that + * the runtime query optimizer reports the permutation array (order[]) + * so we can maintain information about the relationship between the + * given joins and the evaluation order. */ final BOpContextBase context = new BOpContextBase(queryEngine); - final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( - new IRangeCountFactory() { + + final QueryOptimizerEnum optimizer = QueryOptimizerEnum + .valueOf(queryHints.getProperty(QueryHints.OPTIMIZER, + QueryOptimizerEnum.Static.toString())); - public long rangeCount(final IPredicate pred) { - return context.getRelation(pred).getAccessPath(pred) - .rangeCount(false); + // The evaluation plan order. + final int[] order; + // The estimated cardinality of each tail (if the optimizer provides it) + final long[] cardinality; + // The index assigned to each tail of the rule by static analysis. + final IKeyOrder[] keyOrder; + + switch(optimizer) { + case None: { + /* + * Do not run the join optimizer. + * + * @todo Do we need to move any of the joins to the front, e.g., + * magic search, or should everything just be left the way it is? + */ + order = new int[rule.getTailCount()]; + for (int i = 0; i < order.length; i++) { + order[i] = i; } + cardinality = null; + keyOrder = null; + break; + } + case Static: { + /* + * Static query optimizer. + */ + final DefaultEvaluationPlan2 plan = new DefaultEvaluationPlan2( + new IRangeCountFactory() { + + public long rangeCount(final IPredicate pred) { + return context.getRelation(pred) + .getAccessPath(pred).rangeCount(false); + } + + }, rule); + + order = plan.getOrder(); + + /* + * The index assigned to each tail of the rule by static analysis + * (this is often not the index which is actually used when we + * evaluate a given predicate since we always choose the best index + * and that can depend on whether or not we are binding the context + * position for a default or named graph query. When optional joins + * are involved, some variables may not become bound for some + * solutions. A different index will often be chosen for access + * paths using the unbound variable. + */ + + // the #of variables in each tail of the rule (set by side-effect). + final int[] nvars = new int[rule.getTailCount()]; + + cardinality = new long[rule.getTailCount()]; + for (int i = 0; i < cardinality.length; i++) { + cardinality[i] = plan.cardinality(i); + } + + keyOrder = computeKeyOrderForEachTail(rule, context, order, nvars); + + break; + + } + case Runtime: { + /* + * The runtime query optimizer. + * + * FIXME MikeP: I have modified the JoinGraph so that it can report + * the permutation order. However, the code here needs to isolate + * the join graph rather than running against all predicates in the + * tail. As it is, it will reorder optionals. + * + * FIXME We can not optimize quads here using the runtime query + * optimizer since we have not yet generated the full query plan. In + * order to get the runtime query optimizer working for quads we + * need to replace the DataSetJoin with a PipelineJoin against an + * inline "relation" containing the named or default graphs IVs. The + * runtime query optimizer does not accept the JOIN operators so the + * annotations which are being applied there will be lost which is + * another problem, especially in scale-out. Both of these issues + * need to be resolved before quads can be used with the runtime + * query optimizer. + * + * @todo In fact, we should be able to write in a JoinGraph operator + * which optimizes the join graph and then evaluates it rather than + * explicitly doing the optimization and evaluation steps here. + * + * @todo Make sure that a summary of the information collected by + * the runtime query optimizer is attached as an annotation to the + * query. + * + * @todo query hints for [limit] and [nedges]. + */ - }, rule); - - // evaluation plan order. - final int[] order = plan.getOrder(); - - // the #of variables in each tail of the rule. - final int[] nvars = new int[rule.getTailCount()]; + // The initial sampling limit. + final int limit = 100; - /* - * The index assigned to each tail of the rule by static analysis (this - * is often not the index which is actually used when we evaluate a - * given predicate since we always choose the best index and that can - * depend on whether or not we are binding the context position for a - * default or named graph query. When optional joins are involved, some - * variables may not become bound for some solutions. A different index - * will often be chosen for access paths using the unbound variable. - */ - final IKeyOrder[] keyOrder = computeKeyOrderForEachTail(rule, context, - order, nvars); + // The #of edges considered for the initial paths. + final int nedges = 2; + // isolate/extract the join graph. + final IPredicate[] preds = new IPredicate[rule.getTailCount()]; + for (int i = 0; i < preds.length; i++) { + preds[i] = rule.getTail(i); + } + + final JGraph g = new JGraph(preds); + + final Path p; + try { + p = g.runtimeOptimizer(queryEngine, limit, nedges); + } catch (Exception e) { + throw new RuntimeException(e); + } + + // the permutation order. + order = g.getOrder(p); + + keyOrder = null; + + cardinality = null; + + break; + } + default: + throw new AssertionError("Unknown option: " + optimizer); + } + // the variables to be retained for each join. final IVariable<?>[][] selectVars = RuleState .computeRequiredVarsForEachTail(rule, order); @@ -379,15 +535,22 @@ Predicate<?> pred = (Predicate<?>) rule.getTail(order[i]).setBOpId( idFactory.incrementAndGet()); - // decorate the predicate with the assigned index. -// pred = pred.setKeyOrder(keyOrder[order[i]]); - pred = (Predicate<?>) pred.setProperty(Annotations.ORIGINAL_INDEX, - keyOrder[order[i]]); + /* + * Decorate the predicate with the assigned index (this is purely + * informative). + */ + if (keyOrder != null && keyOrder[order[i]] != null) { + // pred = pred.setKeyOrder(keyOrder[order[i]]); + pred = (Predicate<?>) pred.setProperty( + Annotations.ORIGINAL_INDEX, keyOrder[order[i]]); + } // decorate the predicate with the cardinality estimate. - pred = (Predicate<?>) pred.setProperty( - Annotations.ESTIMATED_CARDINALITY, plan - .cardinality(order[i])); + if (cardinality != null) { + pred = (Predicate<?>) pred.setProperty( + Annotations.ESTIMATED_CARDINALITY, + cardinality[order[i]]); + } /* * Collect all the constraints for this predicate based on which @@ -468,11 +631,11 @@ switch (scope) { case NAMED_CONTEXTS: left = namedGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; case DEFAULT_CONTEXTS: left = defaultGraphJoin(queryEngine, context, idFactory, - left, anns, pred, dataset); + left, anns, pred, dataset, queryHints); break; default: throw new AssertionError(); @@ -494,10 +657,10 @@ BOpEvaluationContext.ANY)); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - - left = new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + left = applyQueryHints(new PipelineJoin(new BOp[] { left }, + anns.toArray(new NV[anns.size()])), queryHints); + } } else { @@ -506,7 +669,7 @@ * Triples or provenance mode. */ - left = triplesModeJoin(queryEngine, left, anns, pred); + left = triplesModeJoin(queryEngine, left, anns, pred, queryHints); } @@ -533,7 +696,8 @@ * @return The join operator. */ private static PipelineOp triplesModeJoin(final QueryEngine queryEngine, - final PipelineOp left, final List<NV> anns, Predicate<?> pred) { + final PipelineOp left, final List<NV> anns, Predicate<?> pred, + final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -551,8 +715,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -578,7 +742,7 @@ private static PipelineOp namedGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { final boolean scaleOut = queryEngine.isScaleOut(); if (scaleOut) { @@ -603,8 +767,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -616,8 +780,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -646,8 +810,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -662,8 +826,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -714,8 +878,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } else { @@ -762,8 +926,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { dataSetJoin }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { dataSetJoin }, + anns.toArray(new NV[anns.size()])), queryHints); } @@ -786,7 +950,7 @@ private static PipelineOp defaultGraphJoin(final QueryEngine queryEngine, final BOpContextBase context, final AtomicInteger idFactory, final PipelineOp left, final List<NV> anns, Predicate<?> pred, - final Dataset dataset) { + final Dataset dataset, final Properties queryHints) { /* * @todo raise this into the caller and do one per rule rather than once @@ -813,8 +977,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -842,8 +1006,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])), queryHints); } @@ -911,8 +1075,8 @@ // // } // -// return new PipelineJoin(new BOp[] { left, pred }, anns -// .toArray(new NV[anns.size()])); +// return applyQueryHints(new PipelineJoin(new BOp[] { left, pred }, anns +// .toArray(new NV[anns.size()])),queryHints); // // } @@ -987,8 +1151,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } else { @@ -1037,8 +1201,8 @@ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); - return new PipelineJoin(new BOp[] { left }, anns - .toArray(new NV[anns.size()])); + return applyQueryHints(new PipelineJoin(new BOp[] { left }, anns + .toArray(new NV[anns.size()])),queryHints); } @@ -1059,7 +1223,7 @@ */ public static PipelineOp convert(final IProgram program, final AtomicInteger idFactory, final AbstractTripleStore db, - final QueryEngine queryEngine) { + final QueryEngine queryEngine, final Properties queryHints) { // When parallel, the program is translated to a UNION. Else STEPS. final boolean isParallel = program.isParallel(); @@ -1076,7 +1240,8 @@ for (int i = 0; i < arity; i++) { // convert the child IStep - BOpBase tmp = convert(steps[i], idFactory, db, queryEngine); + final BOpBase tmp = convert(steps[i], idFactory, db, queryEngine, + queryHints); /* * @todo Route binding sets around the UNION/STEPS operator. We need Modified: branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 00:41:31 UTC (rev 4044) +++ branches/QUADS_QUERY_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestQueryHints.java 2011-01-02 21:40:00 UTC (rev 4045) @@ -28,22 +28,17 @@ import java.util.Collection; import java.util.LinkedList; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; + import org.openrdf.model.URI; -import org.openrdf.model.impl.BNodeImpl; -import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.URIImpl; -import org.openrdf.model.vocabulary.RDF; -import org.openrdf.model.vocabulary.RDFS; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResult; import org.openrdf.query.impl.BindingImpl; -import com.bigdata.rdf.store.BD; +import com.bigdata.bop.join.PipelineJoin; + /** * Unit tests the query hints aspect of the {@link BigdataSail} implementation. * @@ -68,7 +63,11 @@ /** * Tests adding query hints in SPARQL. * - * @throws Exception + * @throws Exception + * + * @todo Unfortunately, this does not really _test_ anything since the query + * should be answered correctly regardless of the query hint(s) + * specified. */ public void testQueryHints() throws Exception { @@ -102,20 +101,22 @@ { - String query = - "PREFIX "+BD.QUERY_HINTS_NAMESPACE+": " + - " <http://www.bigdata.com/queryOption#com.bigdata.relation.rule.eval.DefaultRuleTaskFactory.nestedSubquery=true&com.bigdata.fullScanTreshold=1000> " + - "SELECT * " + - "WHERE { " + - " <"+a+"> ?p ?o " + - "}"; - + final String query = "PREFIX " + QueryHints.NAMESPACE + + ": " + "<http://www.bigdata.com/queryOption#" + // + PipelineJoin.Annotations.MAX_PARALLEL + "=-5" // + + "&" + "com.bigdata.fullScanTreshold=1000" // + + ">\n"// + + "SELECT * " + // + "WHERE { " + // + " <" + a + "> ?p ?o " + // + "}"; + final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.setIncludeInferred(true /* includeInferred */); - TupleQueryResult result = tupleQuery.evaluate(); + final TupleQueryResult result = tupleQuery.evaluate(); - Collection<BindingSet> answer = new LinkedList<BindingSet>(); + final Collection<BindingSet> answer = new LinkedList<BindingSet>(); answer.add(createBindingSet( new BindingImpl("p", b), new BindingImpl("o", c) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2011-01-02 00:41:41
|
Revision: 4044 http://bigdata.svn.sourceforge.net/bigdata/?rev=4044&view=rev Author: thompsonbry Date: 2011-01-02 00:41:31 +0000 (Sun, 02 Jan 2011) Log Message: ----------- Added a new IRunningQuery implementation based on chaining together operators using a blocking queue in front of each operator. The new implementation is conditionally enabled by an annotation. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/IRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryEngineTestAnnotations.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/QueryResultIterator.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunState.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/CancelQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederatedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/fed/FederationChunkHandler.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/solutions/SliceOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/relation/accesspath/MultiplexBlockingBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/util/config/LogUtil.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/controller/TestUnion.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/MockRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngine_Slice.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/fed/TestFederatedQueryEngine.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnBarData.java branches/QUADS_QUERY_BRANCH/bigdata-rdf/src/test/com/bigdata/bop/rdf/joinGraph/TestJoinGraphOnLubm.java branches/QUADS_QUERY_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/OutputStatsBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionBuffer.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/SinkTransitionMetadata.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/StandaloneChainedRunningQuery.java Removed Paths: ------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -413,40 +413,4 @@ // // } -// /** -// * Copy data from the source to the sink. The sink will be flushed and -// * closed. The source will be closed. -// */ -// public void copySourceToSink() { -// -// // source. -// final IAsynchronousIterator<IBindingSet[]> source = (IAsynchronousIterator) getSource(); -// -// // default sink -// final IBlockingBuffer<IBindingSet[]> sink = (IBlockingBuffer) getSink(); -// -// final BOpStats stats = getStats(); -// -// try { -// -// // copy binding sets from the source. -// BOpUtility.copy(source, sink, null/* sink2 */, -// null/* constraints */, stats); -// -// // flush the sink. -// sink.flush(); -// -// } finally { -// -// sink.close(); -// -// if (sink2 != null) -// sink2.close(); -// -// source.close(); -// -// } -// -// } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContextBase.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -233,13 +233,8 @@ * @todo replaces * {@link IJoinNexus#getTailAccessPath(IRelation, IPredicate)}. * - * @todo Reconcile with IRelation#getAccessPath(IPredicate) once the bop - * conversion is done. It has much of the same logic (this also - * handles remote access paths now). - * * @todo Support mutable relation views (no - just fix truth maintenance). */ -// @SuppressWarnings("unchecked") public <E> IAccessPath<E> getAccessPath(final IRelation<E> relation, final IPredicate<E> predicate) { @@ -252,146 +247,6 @@ return relation.getAccessPath(indexManager/* localIndexManager */, relation.getKeyOrder(predicate), predicate); -// /* -// * Note: ALWAYS use the "perfect" index. -// */ -// final IKeyOrder<E> keyOrder = relation.getKeyOrder(predicate); -//// { -//// final IKeyOrder<E> tmp = predicate.getKeyOrder(); -//// if (tmp != null) { -//// // use the specified index. -//// keyOrder = tmp; -//// } else { -//// // ask the relation for the best index. -//// keyOrder = relation.getKeyOrder(predicate); -//// } -//// } -//// -//// if (keyOrder == null) -//// throw new RuntimeException("No access path: " + predicate); -// -// final int partitionId = predicate.getPartitionId(); -// -// final long timestamp = (Long) predicate -// .getRequiredProperty(BOp.Annotations.TIMESTAMP); -// -// final int flags = predicate.getProperty( -// IPredicate.Annotations.FLAGS, -// IPredicate.Annotations.DEFAULT_FLAGS) -// | (TimestampUtility.isReadOnly(timestamp) ? IRangeQuery.READONLY -// : 0); -// -// final int chunkOfChunksCapacity = predicate.getProperty( -// BufferAnnotations.CHUNK_OF_CHUNKS_CAPACITY, -// BufferAnnotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY); -// -// final int chunkCapacity = predicate.getProperty( -// BufferAnnotations.CHUNK_CAPACITY, -// BufferAnnotations.DEFAULT_CHUNK_CAPACITY); -// -// final int fullyBufferedReadThreshold = predicate.getProperty( -// IPredicate.Annotations.FULLY_BUFFERED_READ_THRESHOLD, -// IPredicate.Annotations.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD); -// -// if (partitionId != -1) { -// -// /* -// * Note: This handles a read against a local index partition. For -// * scale-out, the [indexManager] will be the data service's local -// * index manager. -// * -// * Note: Expanders ARE NOT applied in this code path. Expanders -// * require a total view of the relation, which is not available -// * during scale-out pipeline joins. Likewise, the [backchain] -// * property will be ignored since it is handled by an expander. -// * -// * @todo Replace this with IRelation#getAccessPathForIndexPartition() -// */ -//// return ((AbstractRelation<?>) relation) -//// .getAccessPathForIndexPartition(indexManager, -//// (IPredicate) predicate); -// -// /* -// * @todo This is an error since expanders are currently ignored on -// * shard-wise access paths. While it is possible to enable expanders -// * for shard-wise access paths. -// */ -// if (predicate.getSolutionExpander() != null) -// throw new IllegalArgumentException(); -// -// final String namespace = relation.getNamespace();//predicate.getOnlyRelationName(); -// -// // The name of the desired index partition. -// final String name = DataService.getIndexPartitionName(namespace -// + "." + keyOrder.getIndexName(), partitionId); -// -// // MUST be a local index view. -// final ILocalBTreeView ndx = (ILocalBTreeView) indexManager -// .getIndex(name, timestamp); -// -// return new AccessPath<E>(relation, indexManager, timestamp, -// predicate, keyOrder, ndx, flags, chunkOfChunksCapacity, -// chunkCapacity, fullyBufferedReadThreshold).init(); -// -// } -// -//// accessPath = relation.getAccessPath((IPredicate) predicate); -// -// // Decide on a local or remote view of the index. -// final IIndexManager indexManager; -// if (predicate.isRemoteAccessPath()) { -// // use federation in scale-out for a remote access path. -// indexManager = fed != null ? fed : this.indexManager; -// } else { -// indexManager = this.indexManager; -// } -// -// // Obtain the index. -// final String fqn = AbstractRelation.getFQN(relation, keyOrder); -// final IIndex ndx = AbstractRelation.getIndex(indexManager, fqn, timestamp); -// -// if (ndx == null) { -// -// throw new IllegalArgumentException("no index? relation=" -// + relation.getNamespace() + ", timestamp=" + timestamp -// + ", keyOrder=" + keyOrder + ", pred=" + predicate -// + ", indexManager=" + getIndexManager()); -// -// } -// -// // Obtain the access path for that relation and index. -// final IAccessPath<E> accessPath = ((AbstractRelation<E>) relation) -// .newAccessPath(relation, indexManager, timestamp, predicate, -// keyOrder, ndx, flags, chunkOfChunksCapacity, -// chunkCapacity, fullyBufferedReadThreshold); -// -// // optionally wrap with an expander pattern. -// return expander(predicate, accessPath); - } -// /** -// * Optionally wrap with an expander pattern. -// * -// * @param predicate -// * @param accessPath -// * @return -// * @param <E> -// */ -// private <E> IAccessPath<E> expander(final IPredicate<E> predicate, -// final IAccessPath<E> accessPath) { -// -// final ISolutionExpander<E> expander = predicate.getSolutionExpander(); -// -// if (expander != null) { -// -// // allow the predicate to wrap the access path -// return expander.getAccessPath(accessPath); -// -// } -// -// return accessPath; -// -// } - } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/AbstractSubqueryOp.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -42,8 +42,8 @@ import com.bigdata.bop.IBindingSet; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.bset.Tee; +import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.engine.RunningQuery; import com.bigdata.relation.accesspath.IAsynchronousIterator; import com.bigdata.util.concurrent.LatchedExecutor; @@ -167,7 +167,7 @@ private final AbstractSubqueryOp controllerOp; private final BOpContext<IBindingSet> context; - private final List<FutureTask<RunningQuery>> tasks = new LinkedList<FutureTask<RunningQuery>>(); + private final List<FutureTask<IRunningQuery>> tasks = new LinkedList<FutureTask<IRunningQuery>>(); private final CountDownLatch latch; private final int nparallel; private final Executor executor; @@ -204,7 +204,7 @@ * Task runs subquery and cancels all subqueries in [tasks] if * it fails. */ - tasks.add(new FutureTask<RunningQuery>(new SubqueryTask(op, + tasks.add(new FutureTask<IRunningQuery>(new SubqueryTask(op, context)) { /* * Hook future to count down the latch when the task is @@ -233,7 +233,7 @@ /* * Run subqueries with limited parallelism. */ - for (FutureTask<RunningQuery> ft : tasks) { + for (FutureTask<IRunningQuery> ft : tasks) { executor.execute(ft); } @@ -251,7 +251,7 @@ /* * Get the futures, throwing out any errors. */ - for (FutureTask<RunningQuery> ft : tasks) + for (FutureTask<IRunningQuery> ft : tasks) ft.get(); // Now that we know the subqueries ran Ok, flush the sink. @@ -263,7 +263,7 @@ } finally { // Cancel any tasks which are still running. - for (FutureTask<RunningQuery> ft : tasks) + for (FutureTask<IRunningQuery> ft : tasks) ft.cancel(true/* mayInterruptIfRunning */); context.getSink().close(); @@ -281,7 +281,7 @@ * @author <a href="mailto:tho...@us...">Bryan * Thompson</a> */ - private class SubqueryTask implements Callable<RunningQuery> { + private class SubqueryTask implements Callable<IRunningQuery> { /** * The evaluation context for the parent query. @@ -302,7 +302,7 @@ } - public RunningQuery call() throws Exception { + public IRunningQuery call() throws Exception { IAsynchronousIterator<IBindingSet[]> subquerySolutionItr = null; try { @@ -310,7 +310,7 @@ final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); - final RunningQuery runningQuery = queryEngine + final IRunningQuery runningQuery = queryEngine .eval(subQueryOp); // Iterator visiting the subquery solutions. Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/controller/JoinGraph.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -61,9 +61,9 @@ import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.SampleIndex; import com.bigdata.bop.bindingSet.HashBindingSet; +import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.LocalChunkMessage; import com.bigdata.bop.engine.QueryEngine; -import com.bigdata.bop.engine.RunningQuery; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.join.PipelineJoin.PipelineJoinStats; import com.bigdata.bop.rdf.join.DataSetJoin; @@ -1061,7 +1061,7 @@ // run the cutoff sampling of the edge. final UUID queryId = UUID.randomUUID(); - final RunningQuery runningQuery = queryEngine.eval(queryId, + final IRunningQuery runningQuery = queryEngine.eval(queryId, queryOp, new LocalChunkMessage<IBindingSet>(queryEngine, queryId, joinOp.getId()/* startId */, -1 /* partitionId */, @@ -2834,7 +2834,7 @@ final QueryEngine queryEngine = parentContext.getRunningQuery() .getQueryEngine(); - final RunningQuery runningQuery = queryEngine + final IRunningQuery runningQuery = queryEngine .eval( queryId, queryOp, Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -0,0 +1,1000 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Created on Dec 30, 2010 + */ + +package com.bigdata.bop.engine; + +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpEvaluationContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.PipelineOp; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.IIndexManager; +import com.bigdata.journal.ITx; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.service.IBigdataFederation; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.Haltable; + +/** + * Abstract base class for various {@link IRunningQuery} implementations. The + * purpose of this class is to isolate aspects common to different designs for + * managing resources for a running query and make it easier to realize + * different strategies for managing the resources allocated to a running query. + * <p> + * There are common requirements for the {@link IRunningQuery}, but a variety of + * ways in which those requirements can be met. Among the common requirements + * are a means to manage tradeoffs in the allocation of various resources to the + * operators in each query. Some of the more important tradeoffs are the #of + * threads to allocate to each operator (threads bounds IO for Java 6 since we + * are using a synchronous IO model) and the amount of RAM allocated to each + * operator (including RAM on the JVM heap and RAM on the native Java process + * heap). If the #of threads is too restrictive, then queries will progress + * slowly due to insufficient IO level parallelism. If the query buffers too + * much data on the JVM heap, then it can cause GC overhead problems that can + * drastically reduce the responsiveness and throughput of the JVM. Data can be + * moved off of the JVM heap onto the Java process heap by serializing it into + * <em>direct</em> {@link ByteBuffer}s. This can be very efficient in + * combination with hash joins at the expense of increasing the latency to the + * first result when compared with pipelined evaluation. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +abstract public class AbstractRunningQuery implements IRunningQuery { + + /** + * Error message used when an operation which must be performed on the query + * controller is attempted on some other {@link IQueryPeer}. + */ + protected static final String ERR_NOT_CONTROLLER = "Operator only permitted on the query controller"; + + /** + * Error message used when a request is made after the query has stopped + * executing. + */ + protected static final String ERR_QUERY_DONE = "Query is no longer running"; + + /** + * Error message used when a request is addressed to an operator other than + * the head of the pipeline in a context where the request must be addressed + * to the operator at the head of the pipeline (e.g., when presenting the + * initial binding sets to get the query moving.) + */ + protected static final String ERR_NOT_PIPELINE_START = "Not pipeline start"; + + /** + * Error message used when no operator can be found for a given + * {@link BOp.Annotations#BOP_ID}. + */ + protected static final String ERR_NO_SUCH_BOP = "No such bop: id="; + + /** + * Error message used when two operators have the same + * {@link BOp.Annotations#BOP_ID}. + */ + protected static final String ERR_DUPLICATE_IDENTIFIER = "Duplicate identifier: id="; + + private final static transient Logger log = Logger + .getLogger(AbstractRunningQuery.class); + + /** + * The class executing the query on this node. + */ + final private QueryEngine queryEngine; + + /** The unique identifier for this query. */ + final private UUID queryId; + + /** + * The query deadline. The value is the system clock time in milliseconds + * when the query is due and {@link Long#MAX_VALUE} if there is no deadline. + * In order to have a guarantee of a consistent clock, the deadline is + * interpreted by the query controller. + */ + final private AtomicLong deadline = new AtomicLong(Long.MAX_VALUE); + + /** + * The timestamp (ms) when the query begins to execute. + */ + final private AtomicLong startTime = new AtomicLong(System + .currentTimeMillis()); + + /** + * The timestamp (ms) when the query is done executing and ZERO (0L) if the + * query is not done. + */ + final private AtomicLong doneTime = new AtomicLong(0L); + + /** + * <code>true</code> iff the outer {@link QueryEngine} is the controller for + * this query. + */ + final private boolean controller; + + /** + * The client executing this query (aka the query controller). + * <p> + * Note: The proxy is primarily for light weight RMI messages used to + * coordinate the distributed query evaluation. Ideally, all large objects + * will be transfered among the nodes of the cluster using NIO buffers. + */ + final private IQueryClient clientProxy; + + /** The query. */ + final private PipelineOp query; + + /** + * An index from the {@link BOp.Annotations#BOP_ID} to the {@link BOp}. This + * index is generated by the constructor. It is immutable and thread-safe. + */ + private final Map<Integer, BOp> bopIndex; + + /** + * The run state of the query and the result of the computation iff it + * completes execution normally (without being interrupted, cancelled, etc). + */ + final private Haltable<Void> future = new Haltable<Void>(); + + /** + * The {@link Future} of this query. + * <p> + * Note: This is exposed to the {@link QueryEngine} to let it cache the + * {@link Future} for recently finished queries. + */ + final protected Future<Void> getFuture() { + + return future; + + } + + /** + * The runtime statistics for each {@link BOp} in the query and + * <code>null</code> unless this is the query controller. + */ + final private ConcurrentHashMap<Integer/* bopId */, BOpStats> statsMap; + + /** + * The buffer used for the overall output of the query pipeline. + * <p> + * Note: This only exists on the query controller, and then only when the + * top-level operator is not a mutation. In order to ensure that the results + * are transferred to the query controller in scale-out, the top-level + * operator in the query plan must specify + * {@link BOpEvaluationContext#CONTROLLER}. For example, {@link SliceOp} + * uses this {@link BOpEvaluationContext}. + */ + final private IBlockingBuffer<IBindingSet[]> queryBuffer; + + /** + * The iterator draining the {@link #queryBuffer} and <code>null</code> iff + * the {@link #queryBuffer} is <code>null</code>. + */ + final private IAsynchronousIterator<IBindingSet[]> queryIterator; + + /** + * A lock guarding various state changes. This guards changes to the + * internal state of the {@link #runState} object. It is also used to + * serialize requests to {@link #acceptChunk(IChunkMessage)} and + * {@link #cancel(boolean)} and make atomic decision concerning whether to + * attach a new {@link IChunkMessage} to an operator task which is already + * running or to start a new task for that message. + * + * @see RunState + */ + protected final ReentrantLock lock = new ReentrantLock(); + + /** + * The run state of this query and <code>null</code> unless this is the + * query controller. + */ + final private RunState runState; + + /** + * Flag used to prevent retriggering of {@link #lifeCycleTearDownQuery()}. + */ + private final AtomicBoolean didQueryTearDown = new AtomicBoolean(false); + + /** + * Set the query deadline. The query will be cancelled when the deadline is + * passed. If the deadline is passed, the query is immediately cancelled. + * + * @param deadline + * The deadline. + * @throws IllegalArgumentException + * if the deadline is non-positive. + * @throws IllegalStateException + * if the deadline was already set. + * @throws UnsupportedOperationException + * unless node is the query controller. + */ + final public void setDeadline(final long deadline) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (deadline <= 0) + throw new IllegalArgumentException(); + + // set the deadline. + if (!this.deadline + .compareAndSet(Long.MAX_VALUE/* expect */, deadline/* update */)) { + + // the deadline is already set. + throw new IllegalStateException(); + + } + + if (deadline < System.currentTimeMillis()) { + + // deadline has already expired. + halt(new TimeoutException()); + + } + + } + + final public long getDeadline() { + + return deadline.get(); + + } + + final public long getStartTime() { + + return startTime.get(); + + } + + final public long getDoneTime() { + + return doneTime.get(); + + } + + final public long getElapsed() { + + long mark = doneTime.get(); + + if (mark == 0L) + mark = System.currentTimeMillis(); + + return mark - startTime.get(); + + } + + /** + * Return the buffer used for the overall output of the query pipeline and + * <code>null</code> if this is not the query controller. + */ + final protected IBlockingBuffer<IBindingSet[]> getQueryBuffer() { + + return queryBuffer; + + } + + public QueryEngine getQueryEngine() { + + return queryEngine; + + } + + /** + * The client executing this query (aka the query controller). + * <p> + * Note: The proxy is primarily for light weight RMI messages used to + * coordinate the distributed query evaluation. Ideally, all large objects + * will be transfered among the nodes of the cluster using NIO buffers. + */ + final public IQueryClient getQueryController() { + + return clientProxy; + + } + + /** + * The unique identifier for this query. + */ + final public UUID getQueryId() { + + return queryId; + + } + + /** + * Return the operator tree for this query. + */ + final public PipelineOp getQuery() { + + return query; + + } + + /** + * Return <code>true</code> iff this is the query controller. + */ + final public boolean isController() { + + return controller; + + } + + final public Map<Integer/* bopId */, BOpStats> getStats() { + + return Collections.unmodifiableMap(statsMap); + + } + + /** + * Return the {@link BOpStats} instance associated with the given + * {@link BOp} identifier. + * + * @param bopId + * The {@link BOp} identifier. + * + * @return The associated {@link BOpStats} object -or- <code>null</code> if + * there is no entry for that {@link BOp} identifier. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code>. + */ + final public BOpStats getStats(final Integer bopId) { + + if (bopId == null) + throw new IllegalArgumentException(); + + return statsMap.get(bopId); + + } + + final public Map<Integer, BOp> getBOpIndex() { + + return bopIndex; + + } + + /** + * @param queryEngine + * The {@link QueryEngine} on which the query is running. In + * scale-out, a query is typically instantiated on many + * {@link QueryEngine}s. + * @param queryId + * The identifier for that query. + * @param controller + * <code>true</code> iff the {@link QueryEngine} is the query + * controller for this query (the {@link QueryEngine} which will + * coordinate the query evaluation). + * @param clientProxy + * The query controller. In standalone, this is the same as the + * <i>queryEngine</i>. In scale-out, this is an RMI proxy for the + * query controller whenever the query is instantiated on a node + * other than the query controller itself. + * @param query + * The query. + * + * @throws IllegalArgumentException + * if any argument is <code>null</code>. + * @throws IllegalArgumentException + * if the <i>readTimestamp</i> is {@link ITx#UNISOLATED} + * (queries may not read on the unisolated indices). + * @throws IllegalArgumentException + * if the <i>writeTimestamp</i> is neither + * {@link ITx#UNISOLATED} nor a read-write transaction + * identifier. + */ + public AbstractRunningQuery(final QueryEngine queryEngine, + final UUID queryId, final boolean controller, + final IQueryClient clientProxy, final PipelineOp query) { + + if (queryEngine == null) + throw new IllegalArgumentException(); + + if (queryId == null) + throw new IllegalArgumentException(); + + if (clientProxy == null) + throw new IllegalArgumentException(); + + if (query == null) + throw new IllegalArgumentException(); + + this.queryEngine = queryEngine; + + this.queryId = queryId; + + this.controller = controller; + + this.clientProxy = clientProxy; + + this.query = query; + + this.bopIndex = BOpUtility.getIndex(query); + + /* + * Setup the BOpStats object for each pipeline operator in the query. + */ + if (controller) { + + runState = new RunState(this); + + statsMap = new ConcurrentHashMap<Integer, BOpStats>(); + + populateStatsMap(query); + + /* + * FIXME Review the concept of mutation queries. It used to be that + * queries could only either read or write. Now we have access paths + * which either read or write and each query could use zero or more + * such access paths. + */ + if (true/* !query.isMutation() */) { + + // read-only query. + + final BOpStats queryStats = statsMap.get(query.getId()); + + queryBuffer = new BlockingBufferWithStats<IBindingSet[]>(query, + queryStats); + + queryIterator = new QueryResultIterator<IBindingSet[]>(this, + queryBuffer.iterator()); + + // } else { + // + // // Note: Not used for mutation queries. + // queryBuffer = null; + // queryIterator = null; + + } + + } else { + + runState = null; // Note: only on the query controller. + statsMap = null; // Note: only on the query controller. + queryBuffer = null; // Note: only on the query controller. + queryIterator = null; // Note: only when queryBuffer is defined. + + } + + } + + /** + * Pre-populate a map with {@link BOpStats} objects for the query. Only the + * child operands are visited. Operators in subqueries are not visited since + * they will be assigned {@link BOpStats} objects when they are run as a + * subquery. + * + * @see BOp.Annotations#CONTROLLER + */ + private void populateStatsMap(final BOp op) { + + if (!(op instanceof PipelineOp)) + return; + + final PipelineOp bop = (PipelineOp) op; + + final int bopId = bop.getId(); + + statsMap.put(bopId, bop.newStats()); + + if (!op.getProperty(BOp.Annotations.CONTROLLER, + BOp.Annotations.DEFAULT_CONTROLLER)) { + /* + * Visit children, but not if this is a CONTROLLER operator since + * its children belong to a subquery. + */ + for (BOp t : op.args()) { + // visit children (recursion) + populateStatsMap(t); + } + } + + } + + /** + * Message provides notice that the query has started execution and will + * consume some specific number of binding set chunks. + * + * @param msg + * The initial message presented to the query. The message is + * used to update the query {@link RunState}. However, the + * message will not be consumed until it is presented to + * {@link #acceptChunk(IChunkMessage)} by the {@link QueryEngine} + * . + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void startQuery(final IChunkMessage<IBindingSet> msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.getQueryId())) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + runState.startQuery(msg); + + lifeCycleSetUpQuery(); + + } catch (TimeoutException ex) { + + halt(ex); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Message provides notice that the operator has started execution and will + * consume some specific number of binding set chunks. + * + * @param msg + * The {@link StartOpMessage}. + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void startOp(final StartOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.queryId)) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + if (runState.startOp(msg)) + lifeCycleSetUpOperator(msg.bopId); + + } catch (TimeoutException ex) { + + halt(ex); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Message provides notice that the operator has ended execution. The + * termination conditions for the query are checked. (For scale-out, the + * node node controlling the query needs to be involved for each operator + * start/stop in order to make the termination decision atomic). + * + * @param msg + * The {@link HaltOpMessage} + * + * @throws UnsupportedOperationException + * If this node is not the query coordinator. + */ + final protected void haltOp(final HaltOpMessage msg) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (msg == null) + throw new IllegalArgumentException(); + + if (!queryId.equals(msg.queryId)) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + // update per-operator statistics. + final BOpStats tmp = statsMap.putIfAbsent(msg.bopId, msg.taskStats); + + // combine stats, but do not combine a stats object with itself. + if (tmp != null && tmp != msg.taskStats) { + tmp.add(msg.taskStats); + } + + if (runState.haltOp(msg)) { + + /* + * No more chunks can appear for this operator so invoke its end + * of life cycle hook. + */ + + lifeCycleTearDownOperator(msg.bopId); + + if (runState.isAllDone()) { + + // Normal termination. + halt(); + + } + + } + + } catch (Throwable t) { + + halt(t); + + } finally { + + lock.unlock(); + + } + + } + + /** + * Hook invoked the first time the given operator is evaluated for the + * query. This may be used to set up life cycle resources for the operator, + * such as a distributed hash table on a set of nodes identified by + * annotations of the operator. + * + * @param bopId + * The operator identifier. + */ + protected void lifeCycleSetUpOperator(final int bopId) { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); + + } + + /** + * Hook invoked the after the given operator has been evaluated for the + * query for what is known to be the last time. This may be used to tear + * down life cycle resources for the operator, such as a distributed hash + * table on a set of nodes identified by annotations of the operator. + * + * @param bopId + * The operator identifier. + */ + protected void lifeCycleTearDownOperator(final int bopId) { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId + ", bopId=" + bopId); + + } + + /** + * Hook invoked the before any operator is evaluated for the query. This may + * be used to set up life cycle resources for the query. + */ + protected void lifeCycleSetUpQuery() { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); + + } + + /** + * Hook invoked when the query terminates. This may be used to tear down + * life cycle resources for the query. + */ + protected void lifeCycleTearDownQuery() { + + if (log.isTraceEnabled()) + log.trace("queryId=" + queryId); + + } + + /** + * Make a chunk of binding sets available for consumption by the query. + * <p> + * Note: this is invoked by {@link QueryEngine#acceptChunk(IChunkMessage)} + * + * @param msg + * The chunk. + * + * @return <code>true</code> if the message was accepted. + * + * @todo Reconcile {@link #acceptChunk(IChunkMessage)} and + * {@link #consumeChunk()}. Why {@link #consumeChunk()} is also used + * by the {@link QueryEngine}. + */ + abstract protected boolean acceptChunk(final IChunkMessage<IBindingSet> msg); + + /** + * Instruct the {@link IRunningQuery} to consume an {@link IChunkMessage} + * already on its input queue. + */ + abstract protected void consumeChunk(); + + final public IAsynchronousIterator<IBindingSet[]> iterator() { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (queryIterator == null) + throw new UnsupportedOperationException(); + + return queryIterator; + + } + + public void halt() { + + lock.lock(); + + try { + + // signal normal completion. + future.halt((Void) null); + + // interrupt anything which is running. + cancel(true/* mayInterruptIfRunning */); + + } finally { + + lock.unlock(); + + } + + } + + public Throwable halt(final Throwable t) { + + if (t == null) + throw new IllegalArgumentException(); + + lock.lock(); + + try { + + if (!InnerCause.isInnerCause(t, InterruptedException.class)) + log.error(toString(), t); + + try { + + // signal error condition. + return future.halt(t); + + } finally { + + // interrupt anything which is running. + cancel(true/* mayInterruptIfRunning */); + + } + + } finally { + + lock.unlock(); + + } + + } + + /** + * {@inheritDoc} + * <p> + * Cancelled queries : + * <ul> + * <li>must reject new chunks</li> + * <li>must cancel any running operators</li> + * <li>must not begin to evaluate operators</li> + * <li>must release all of their resources</li> + * <li>must not cause the solutions to be discarded before the client can + * consume them.</li> + * </ul> + */ + final public boolean cancel(final boolean mayInterruptIfRunning) { + lock.lock(); + try { + // halt the query. + boolean cancelled = future.cancel(mayInterruptIfRunning); + if (didQueryTearDown + .compareAndSet(false/* expect */, true/* update */)) { + /* + * Do additional cleanup exactly once. + */ + // cancel any running operators for this query on this node. + cancelled |= cancelRunningOperators(mayInterruptIfRunning); + if (controller) { + // cancel query on other peers. + cancelled |= cancelQueryOnPeers(future.getCause()); + } + if (queryBuffer != null) { + /* + * Close the query buffer so the iterator draining the query + * results will recognize that no new results will become + * available. + */ + queryBuffer.close(); + } + // life cycle hook for the end of the query. + lifeCycleTearDownQuery(); + // mark done time. + doneTime.set(System.currentTimeMillis()); + // log summary statistics for the query. + if (isController()) + QueryLog.log(this); + } + // remove from the collection of running queries. + queryEngine.halt(this); + // true iff we cancelled something. + return cancelled; + } finally { + lock.unlock(); + } + } + + /** + * Cancel any running operators for this query on this node (internal API). + * <p> + * Note: This will wind up invoking the tear down methods for each operator + * which was running or which could have been re-triggered. + * + * @return <code>true</code> if any operators were cancelled. + */ + abstract protected boolean cancelRunningOperators( + final boolean mayInterruptIfRunning); + + // { + // boolean cancelled = false; + // + // final Iterator<ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> fitr = + // operatorFutures.values().iterator(); + // + // while (fitr.hasNext()) { + // + // final ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask> set = + // fitr.next(); + // + // for(ChunkFutureTask f : set.keySet()) { + // + // if (f.cancel(mayInterruptIfRunning)) + // cancelled = true; + // + // } + // + // } + // + // return cancelled; + // + // } + + /** + * Cancel the query on each node where it is known to be running. + * <p> + * Note: The default implementation verifies that the caller is holding the + * {@link #lock} but is otherwise a NOP. This is overridden for scale-out. + * + * @param cause + * When non-<code>null</code>, the cause. + * + * @return <code>true</code> iff something was cancelled. + * + * @throws IllegalMonitorStateException + * unless the {@link #lock} is held by the current thread. + * @throws UnsupportedOperationException + * unless this is the query controller. + */ + protected boolean cancelQueryOnPeers(final Throwable cause) { + + if (!controller) + throw new UnsupportedOperationException(ERR_NOT_CONTROLLER); + + if (!lock.isHeldByCurrentThread()) + throw new IllegalMonitorStateException(); + + return false; + + } + + final public Void get() throws InterruptedException, ExecutionException { + + return future.get(); + + } + + final public Void get(long arg0, TimeUnit arg1) + throws InterruptedException, ExecutionException, TimeoutException { + + return future.get(arg0, arg1); + + } + + final public boolean isCancelled() { + + return future.isCancelled(); + + } + + final public boolean isDone() { + + return future.isDone(); + + } + + final public Throwable getCause() { + + return future.getCause(); + + } + + public IBigdataFederation<?> getFederation() { + + return queryEngine.getFederation(); + + } + + public IIndexManager getIndexManager() { + + return queryEngine.getIndexManager(); + + } + + public String toString() { + final StringBuilder sb = new StringBuilder(getClass().getName()); + sb.append("{queryId=" + queryId); + sb.append(",deadline=" + deadline.get()); + sb.append(",isDone=" + isDone()); + sb.append(",isCancelled=" + isCancelled()); + sb.append(",runState=" + runState); + sb.append(",controller=" + controller); + sb.append(",clientProxy=" + clientProxy); + sb.append(",query=" + query); + sb.append("}"); + return sb.toString(); + } + + // abstract protected IChunkHandler getChunkHandler(); + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/AbstractRunningQuery.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2010-12-22 17:32:36 UTC (rev 4043) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/BlockingBufferWithStats.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -39,7 +39,11 @@ * to the buffer. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> - * @version $Id$ + * @version $Id: BlockingBufferWithStats.java 3838 2010-10-22 19:45:33Z + * thompsonbry $ + * + * @todo replace with {@link OutputStatsBuffer}? (It is still used by the + * {@link ChunkedRunningQuery} and by the query output buffer.) */ public class BlockingBufferWithStats<E> extends BlockingBuffer<E> { Copied: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java (from rev 4039, branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java) =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/ChunkedRunningQuery.java 2011-01-02 00:41:31 UTC (rev 4044) @@ -0,0 +1,1592 @@ +/* + +Copyright (C) SYSTAP, LLC 2006-2008. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + */ +/* + * Created on Aug 31, 2010 + */ +package com.bigdata.bop.engine; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.log4j.Logger; + +import com.bigdata.bop.BOp; +import com.bigdata.bop.BOpContext; +import com.bigdata.bop.BOpUtility; +import com.bigdata.bop.IBindingSet; +import com.bigdata.bop.NoSuchBOpException; +import com.bigdata.bop.PipelineOp; +import com.bigdata.journal.ITx; +import com.bigdata.journal.Journal; +import com.bigdata.relation.accesspath.BufferClosedException; +import com.bigdata.relation.accesspath.IAsynchronousIterator; +import com.bigdata.relation.accesspath.IBlockingBuffer; +import com.bigdata.relation.accesspath.IMultiSourceAsynchronousIterator; +import com.bigdata.relation.accesspath.MultiSourceSequentialAsynchronousIterator; +import com.bigdata.service.IBigdataFederation; +import com.bigdata.util.InnerCause; +import com.bigdata.util.concurrent.Memoizer; + +/** + * {@link IRunningQuery} implementation based on the assignment of + * {@link IChunkMessage}(s) to an operator task. Operators (other than those + * with "operator-at-once" evaluation semantics) will typically executed + * multiple times, consuming at least one {@link IChunkMessage} each time they + * are evaluated. {@link IChunkMessage}s target a specific operator (bopId) and + * shard (shardId). In scale-out, binding sets will be mapped across the target + * access path and may be replicated to one or more nodes depending on the + * distribution of the shards. This evaluation strategy is compatible with both + * the {@link Journal} (aka standalone) and the {@link IBigdataFederation} (aka + * clustered or scale-out). + * + * @todo The challenge with this implementation is managing the amount of data + * buffered on the JVM heap without introducing control structures which + * can result in deadlock or starvation. One way to manage this is to move + * the data off of the JVM heap onto direct ByteBuffers and then + * potentially spilling blocks to disk, e.g., using an RWStore based cache + * pattern. + */ +public class ChunkedRunningQuery extends AbstractRunningQuery { + + private final static transient Logger log = Logger + .getLogger(ChunkedRunningQuery.class); + + /** + * Logger for the {@link ChunkTask}. + */ + private final static Logger chunkTaskLog = Logger + .getLogger(ChunkTask.class); + +// /** +// * The maximum number of operator tasks which may be concurrently executed +// * for a given (bopId,shardId). +// * +// * @see QueryEngineTestAnnotations#MAX_CONCURRENT_TASKS_PER_OPERATOR_AND_SHARD +// */ +// final private int maxConcurrentTasksPerOperatorAndShard; + +// /** +// * The maximum #of concurrent tasks for this query across all operators and +// * shards. +// * +// * Note: This is not a safe option and MUST be removed. It is possible for +// * N-1 tasks to backup with the Nth task not running due to concurrent +// * execution of some of the N-t tasks. +// */ +// final private int maxConcurrentTasks = 10; + + /* + * FIXME Explore the use of this semaphore to limit the maximum #of messages + * further. (Note that placing a limit on messages would allow us to buffer + * potentially many chunks. That could be solved by making LocalChunkMessage + * transparent in terms of the #of chunks or _binding_sets_ which it is + * carrying, but let's take this one step at a time). + * + * The first issue is ensuring that the query continue to make progress when + * a semaphore with a limited #of permits is introduced. This is because the + * ChunkFutureTask only attempts to schedule the next task for a given + * (bopId,shardId) but we could have failed to accept outstanding work for + * any of a number of operator/shard combinations. Likewise, the QueryEngine + * tells the RunningQuery to schedule work each time a message is dropped + * onto the QueryEngine, but the signal to execute more work is lost if the + * permits were not available immediately. + * + * One possibility would be to have a delayed retry. Another would be to + * have ChunkTaskFuture try to run *any* messages, not just messages for the + * same (bopId,shardId). + * + * Also, when scheduling work, there needs to be some bias towards the + * downstream operators in the query plan in order to ensure that they get a + * chance to clear work from upstream operators. This suggests that we might + * carry an order[] and use it to scan the work queue -- or make the work + * queue a priority heap using the order[] to place a primary sort over the + * bopIds in terms of the evaluation order and letting the shardIds fall in + * increasing shard order so we have a total order for the priority heap (a + * total order may also require a tie breaker, but I think that the priority + * heap allows ties). + * + * This concept of memory overhead and permits would be associated with the + * workload waiting on a given node for processing. (In scale-out, we do not + * care how much data is moving in the cluster, only how much data is + * challenging an individual machine). + * + * This emphasize again why we need to get the data off of the Java heap. + * + * The same concept should apply for chained buffers. Maybe one way to do + * this is to allocate a fixed budget to each query for the Java heap and + * the C heap and then the query blocks or goes to disk. + */ +// /** +// * The maximum number of binding sets which may be outstanding before a task +// * which is producing binding sets will block. This value may be used to +// * limit the memory demand of a query in which some operators produce +// * binding sets faster than other operators can consume them. +// * +// * @todo This could be generalized to consider the Java heap separately from +// * the native heap as we get into the use of native ByteBuffers to +// * buffer intermediate results. +// * +// * @todo This is expressed in terms of messages and not {@link IBindingSet}s +// * because the {@link LocalChunkMessage} does not self-report the #of +// * {@link IBindingSet}s (or chunks). [It should really be bytes on the +// * heap even if we can count binding sets and #s of bindings, but we +// * do not serialize all binding sets so we have to have one measure +// * for serialized and one measure for live objects.] +// */ +// final private int maxOutstandingMessageCount = 100; +// +// /** +// * A counting semaphore used to limit the #of outstanding binding set chunks +// * which may be buffered before a producer will block when trying to emit +// * another chunk. +// * +// * @see HandleChunkBuffer#outputChunk(IBindingSet[]) +// * @see #scheduleNext(BSBundle) +// * +// * @see #maxOutstandingMessageCount +// */ +// final private Semaphore outstandingMessageSemaphore = new Semaphore(maxOutstandingMessageCount); + + /** + * A collection of (bopId,partitionId) keys mapped onto a collection of + * operator task evaluation contexts for currently executing operators for + * this query. + */ + private final ConcurrentHashMap<BSBundle, ConcurrentHashMap<ChunkFutureTask,ChunkFutureTask>> operatorFutures; + + /** + * A map of unbounded work queues for each (bopId,partitionId). Empty queues + * are removed from the map. + * <p> + * The map is guarded by the {@link #lock}. + */ + private final Map<BSBundle, BlockingQueue<IChunkMessage<IBindingSet>>> operatorQueues; + +// /** +// * When running in stand alone, we can chain together the operators and have +// * much higher throughput. Each operator has an {@link BlockingBuffer} which +// * is essentially its input queue. The operator will drain its input queue +// * using {@link BlockingBuffer#iterator()}. +// * <p> +// * Each operator closes its {@link IBlockingBuffer} sink(s) once its own +// * source has been closed and it has finished processing that source. Since +// * multiple producers can target the same operator, we need a means to +// * ensure that the source for the target operator is not closed until each +// * producer which targets that operator has closed its corresponding sink. +// * <p> +// * In order to support this many-to-one producer/consumer pattern, we wrap +// * the input queue (a {@link BlockingBuffer}) for each operator having +// * multiple sources wi... [truncated message content] |
From: <tho...@us...> - 2010-12-22 17:32:42
|
Revision: 4043 http://bigdata.svn.sourceforge.net/bigdata/?rev=4043&view=rev Author: thompsonbry Date: 2010-12-22 17:32:36 +0000 (Wed, 22 Dec 2010) Log Message: ----------- Added test for the RWStore to BTree.removeAll() so that it will now cause the tuples to be removed and the nodes and leaves recycled. However, this is still not optimal for the RWStore per the comments in BTree.removeAll(). Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/BTree.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/BTree.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/BTree.java 2010-12-22 17:22:01 UTC (rev 4042) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/BTree.java 2010-12-22 17:32:36 UTC (rev 4043) @@ -39,6 +39,7 @@ import com.bigdata.journal.ICommitter; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.Name2Addr; +import com.bigdata.journal.RWStrategy; import com.bigdata.journal.Name2Addr.Entry; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.mdi.JournalMetadata; @@ -1173,8 +1174,21 @@ assertNotReadOnly(); - if (getIndexMetadata().getDeleteMarkers()) { - + /* + * FIXME Per https://sourceforge.net/apps/trac/bigdata/ticket/221, we + * should special case this for the RWStore when delete markers are not + * enabled and just issue deletes against each node and leave in the + * BTree. This could be done using a post-order traversal of the nodes + * and leaves such that the parent is not removed from the store until + * its children have been removed. The deletes should be low-level + * IRawStore#delete(addr) invocations without maintenance to the B+Tree + * data structures. Afterwards replaceRootWithEmptyLeaf() should be + * invoked to discard the hard reference ring buffer and associate a new + * root leaf with the B+Tree. + */ + if (getIndexMetadata().getDeleteMarkers() + || getStore() instanceof RWStrategy) { + /* * Write deletion markers for each non-deleted entry. When the * transaction commits, those delete markers will have to validate This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-22 17:22:07
|
Revision: 4042 http://bigdata.svn.sourceforge.net/bigdata/?rev=4042&view=rev Author: thompsonbry Date: 2010-12-22 17:22:01 +0000 (Wed, 22 Dec 2010) Log Message: ----------- Added logic to remove the justification chains (when present) to BigdataSail.removeAllEntailments() Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-22 12:51:06 UTC (rev 4041) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-22 17:22:01 UTC (rev 4042) @@ -112,6 +112,7 @@ import org.openrdf.sail.SailConnectionListener; import org.openrdf.sail.SailException; +import com.bigdata.btree.IRangeQuery; import com.bigdata.journal.IIndexManager; import com.bigdata.journal.ITransactionService; import com.bigdata.journal.ITx; @@ -2848,10 +2849,11 @@ getInferenceEngine().computeClosure(null/* focusStore */); } - + /** - * Removes all "inferred" statements from the database (does NOT commit - * the database). + * Removes all "inferred" statements from the database and the proof + * chains (if any) associated with those inferences (does NOT commit the + * database). */ public synchronized void removeAllEntailments() throws SailException { @@ -2871,6 +2873,15 @@ database .getAccessPath(NULL, NULL, NULL, InferredSPOFilter.INSTANCE) .removeAll(); + + if (database.isJustify()) { + database + .getSPORelation() + .getJustificationIndex() + .rangeIterator(null/* fromKey */, null/* toKey */, + 0/* capacity */, + IRangeQuery.REMOVEALL/* flags */, null/* filterCtor */); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-22 12:51:13
|
Revision: 4041 http://bigdata.svn.sourceforge.net/bigdata/?rev=4041&view=rev Author: thompsonbry Date: 2010-12-22 12:51:06 +0000 (Wed, 22 Dec 2010) Log Message: ----------- Edits to the 2nd and 3rd unit tests in this class to mark the conditional binding groups; to correct the conditional routing on the 3rd test; and to correct one of the expected results which passes the optional join group (per the javadoc, the 4th binding should have been Leon, not John). Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-22 09:49:57 UTC (rev 4040) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-22 12:51:06 UTC (rev 4041) @@ -143,13 +143,13 @@ new E("Paul", "Mary"),// [0] new E("Paul", "Brad"),// [1] - new E("John", "Mary"),// [0] - new E("John", "Brad"),// [1] + new E("John", "Mary"),// [2] + new E("John", "Brad"),// [3] - new E("Mary", "Brad"),// [1] + new E("Mary", "Brad"),// [4] - new E("Brad", "Fred"),// [1] - new E("Brad", "Leon"),// [1] + new E("Brad", "Fred"),// [5] + new E("Brad", "Leon"),// [6] }; // insert data (the records are not pre-sorted). @@ -277,7 +277,7 @@ final IVariable<?> c = Var.var("c"); final IVariable<?> d = Var.var("d"); - final Object joinGroup1 = Integer.valueOf(1); + final Integer joinGroup1 = Integer.valueOf(1); final PipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// @@ -489,7 +489,7 @@ * (brad leon) * </pre> * - * In this cases marked with a <code>*</code>, ?c will become temporarily + * In the cases marked with a <code>*</code>, ?c will become temporarily * bound to fred and leon (since brad knows fred and leon), but the (c d) * tail will fail since fred and leon don't know anyone else. At this point, * the ?c binding must be removed from the solution. @@ -506,19 +506,21 @@ public void test_query_optionals_filter() throws Exception { final int startId = 1; - final int joinId1 = 2; - final int predId1 = 3; - final int joinId2 = 4; - final int predId2 = 5; - final int joinId3 = 6; - final int predId3 = 7; + final int joinId1 = 2; // + final int predId1 = 3; // (a,b) + final int joinId2 = 4; // : group1 + final int predId2 = 5; // (b,c) + final int joinId3 = 6; // : group1 + final int predId3 = 7; // (c,d) final int sliceId = 8; final IVariable<?> a = Var.var("a"); final IVariable<?> b = Var.var("b"); final IVariable<?> c = Var.var("c"); final IVariable<?> d = Var.var("d"); - + + final Integer joinGroup1 = Integer.valueOf(1); + final PipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// new NV(Predicate.Annotations.BOP_ID, startId),// @@ -561,6 +563,7 @@ final PipelineOp join2Op = new PipelineJoin<E>(// new BOp[] { join1Op },// new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -570,6 +573,7 @@ final PipelineOp join3Op = new PipelineJoin<E>(// new BOp[] { join2Op },// new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// // constraint d != Leon new NV(PipelineJoin.Annotations.CONSTRAINTS, @@ -713,7 +717,7 @@ * (brad leon) * </pre> * - * In this cases marked with a <code>*</code>, ?a is bound to Paul even + * In the cases marked with a <code>*</code>, ?a is bound to Paul even * though there is a filter that specifically prohibits a = Paul. This is * because the filter is inside the optional join group, which means that * solutions can still include a = Paul, but the optional join group should @@ -723,19 +727,21 @@ final int startId = 1; final int joinId1 = 2; - final int predId1 = 3; - final int condId = 4; - final int joinId2 = 5; - final int predId2 = 6; - final int joinId3 = 7; - final int predId3 = 8; + final int predId1 = 3; // (a,b) + final int condId = 4; // (a != Paul) + final int joinId2 = 5; // : group1 + final int predId2 = 6; // (b,c) + final int joinId3 = 7; // : group1 + final int predId3 = 8; // (c,d) final int sliceId = 9; final IVariable<?> a = Var.var("a"); final IVariable<?> b = Var.var("b"); final IVariable<?> c = Var.var("c"); final IVariable<?> d = Var.var("d"); - + + final Integer joinGroup1 = Integer.valueOf(1); + /* * Not quite sure how to write this one. I think it probably goes * something like this: @@ -792,14 +798,15 @@ final ConditionalRoutingOp condOp = new ConditionalRoutingOp(new BOp[]{join1Op}, NV.asMap(new NV[]{// new NV(BOp.Annotations.BOP_ID,condId), - new NV(PipelineOp.Annotations.SINK_REF, joinId2), - new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId), + new NV(PipelineOp.Annotations.SINK_REF, sliceId), // a == Paul + new NV(PipelineOp.Annotations.ALT_SINK_REF, joinId2), // a != Paul new NV(ConditionalRoutingOp.Annotations.CONDITION, condition), })); final PipelineOp join2Op = new PipelineJoin<E>(// new BOp[] { condOp },// new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -809,6 +816,7 @@ final PipelineOp join3Op = new PipelineJoin<E>(// new BOp[] { join2Op },// new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -853,7 +861,7 @@ new IConstant[] { new Constant<String>("John"), new Constant<String>("Mary"), new Constant<String>("Brad"), - new Constant<String>("John") }// + new Constant<String>("Leon") }// ), new ArrayBindingSet(// new IVariable[] { a, b, c, d },// This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mar...@us...> - 2010-12-22 09:50:03
|
Revision: 4040 http://bigdata.svn.sourceforge.net/bigdata/?rev=4040&view=rev Author: martyncutcher Date: 2010-12-22 09:49:57 +0000 (Wed, 22 Dec 2010) Log Message: ----------- 1) Fixes freeList management associated with releaseSession 2) Fixes AllocationContext freelist associations with FixedAllocators Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-12-21 23:07:08 UTC (rev 4039) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/AllocBlock.java 2010-12-22 09:49:57 UTC (rev 4040) @@ -313,8 +313,11 @@ * but not in the recalculated transient. Tested with new &= ~old; * * @param cache + * @return the number of allocations released */ - public void releaseSession(RWWriteCacheService cache) { + public int releaseSession(RWWriteCacheService cache) { + int freebits = 0; + if (m_addr != 0) { // check active! for (int i = 0; i < m_live.length; i++) { int chkbits = m_transients[i]; @@ -332,11 +335,15 @@ log.trace("releasing address: " + clr); cache.clearWrite(clr); + + freebits++; } } } } } + + return freebits; } public String show() { Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-12-21 23:07:08 UTC (rev 4039) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/FixedAllocator.java 2010-12-22 09:49:57 UTC (rev 4040) @@ -517,21 +517,8 @@ if (((AllocBlock) m_allocBlocks.get(block)) .freeBit(offset % nbits, m_sessionActive && !overideSession)) { // bit adjust - // Only add back to the free list this is a DirectFixedAllocator - // or the freeBits exceed the cDefaultFreeBitsThreshold - // If a DirectFixedAllocator then also ensure it is added to the - // front of the free list - if (m_freeBits++ == 0 && this instanceof DirectFixedAllocator) { - m_freeWaiting = false; - m_freeList.add(0, this); - } else if (m_freeWaiting && m_freeBits == m_store.cDefaultFreeBitsThreshold) { - m_freeWaiting = false; - - if (log.isDebugEnabled()) - log.debug("Returning Allocator to FreeList - " + m_size); - - m_freeList.add(this); - } + m_freeBits++; + checkFreeList(); } else { m_freeTransients++; } @@ -557,6 +544,22 @@ return false; } + private void checkFreeList() { + if (m_freeWaiting) { + if (m_freeBits > 0 && this instanceof DirectFixedAllocator) { + m_freeWaiting = false; + m_freeList.add(0, this); + } else if (m_freeBits >= m_store.cDefaultFreeBitsThreshold) { + m_freeWaiting = false; + + if (log.isDebugEnabled()) + log.debug("Returning Allocator to FreeList - " + m_size); + + m_freeList.add(this); + } + } + } + /** * The introduction of IAllocationContexts has added some complexity to * the older concept of a free list. With AllocationContexts it is @@ -803,9 +806,17 @@ if (this.m_sessionActive) { if (log.isTraceEnabled()) log.trace("Allocator: #" + m_index + " releasing session protection"); + + int releasedAllocations = 0; for (AllocBlock ab : m_allocBlocks) { - ab.releaseSession(cache); + releasedAllocations += ab.releaseSession(cache); } + + m_freeBits += releasedAllocations; + m_freeTransients -= releasedAllocations; + + checkFreeList(); + } } } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-12-21 23:07:08 UTC (rev 4039) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/rwstore/RWStore.java 2010-12-22 09:49:57 UTC (rev 4040) @@ -3556,6 +3556,10 @@ } + /** + * Must return the shadowed allocators to the parent/global + * environment, resetting the freeList association. + */ void release() { final ArrayList<FixedAllocator> freeFixed[] = m_parent != null ? m_parent.m_freeFixed : m_store.m_freeFixed; @@ -3565,6 +3569,7 @@ for (FixedAllocator f : m_allFixed) { f.setAllocationContext(pcontext); + f.setFreeList(freeFixed[m_store.fixedAllocatorIndex(f.m_size)]); } for (int i = 0; i < m_freeFixed.length; i++) { @@ -3597,6 +3602,7 @@ if (free.size() == 0) { final FixedAllocator falloc = establishFixedAllocator(i); falloc.setAllocationContext(m_context); + falloc.setFreeList(free); free.add(falloc); m_allFixed.add(falloc); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 23:07:15
|
Revision: 4039 http://bigdata.svn.sourceforge.net/bigdata/?rev=4039&view=rev Author: thompsonbry Date: 2010-12-21 23:07:08 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Modified the QueryEngine (in RunningQuery) to support push/pop of binding sets when transitioning into or out of a conditional join group. I've updated the first of the unit tests developed by MikeP to show how to annotate the query plan in order to trigger the conditional binding mechanisms. It is clear that the conditional binding is working (solutions where the 3rd join fail discard the results from the 2nd join), but the query plan is overgenerating. Modified Paths: -------------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java Added Paths: ----------- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpContext.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -131,6 +131,7 @@ * join group. * * @see PipelineOp.Annotations#ALT_SINK_REF + * @see PipelineOp.Annotations#ALT_SINK_GROUP */ public final IBlockingBuffer<E[]> getSink2() { return sink2; Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BOpUtility.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -426,6 +426,57 @@ } /** + * Lookup the first operator in the specified conditional binding group and + * return its bopId. + * + * @param query + * The query plan. + * @param groupId + * The identifier for the desired conditional binding group. + * + * @return The bopId of the first operator in that conditional binding group + * -or- <code>null</code> if the specified conditional binding group + * does not exist in the query plan. + * + * @throws IllegalArgumentException + * if either argument is <code>null</code>. + * + * @see PipelineOp.Annotations#CONDITIONAL_GROUP + * @see PipelineOp.Annotations#ALT_SINK_GROUP + */ + static public Integer getFirstBOpIdForConditionalGroup(final BOp query, + final Integer groupId) { + if (query == null) + throw new IllegalArgumentException(); + if (groupId == null) + throw new IllegalArgumentException(); + final Iterator<BOp> itr = postOrderIterator(query); + while (itr.hasNext()) { + final BOp t = itr.next(); + final Object x = t.getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + if (x != null) { + if (!(x instanceof Integer)) { + throw new BadConditionalGroupIdTypeException( + "Must be Integer, not: " + x.getClass() + ": " + + PipelineOp.Annotations.CONDITIONAL_GROUP); + } + final Integer id = (Integer) t + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + if(id.equals(groupId)) { + /* + * Return the BOpId associated with the first operator in + * the pre-order traversal of the query plan which has the + * specified groupId. + */ + return t.getId(); + } + } + } + // No such groupId in the query plan. + return null; + } + + /** * Return the parent of the operator in the operator tree (this does not * search the annotations, just the children). * <p> Added: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java (rev 0) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -0,0 +1,22 @@ +package com.bigdata.bop; + +/** + * Exception thrown when a {@link PipelineOp.Annotations#CONDITIONAL_GROUP} is + * not an {@link Integer}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id: BadBOpIdTypeException.java 3466 2010-08-27 14:28:04Z + * thompsonbry $ + */ +public class BadConditionalGroupIdTypeException extends RuntimeException { + + /** + * @param msg + */ + public BadConditionalGroupIdTypeException(String msg) { + super(msg); + } + + private static final long serialVersionUID = 1L; + +} Property changes on: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/BadConditionalGroupIdTypeException.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/PipelineOp.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -33,10 +33,18 @@ import com.bigdata.bop.engine.BOpStats; import com.bigdata.bop.engine.QueryEngine; +import com.bigdata.bop.solutions.SliceOp; /** * Abstract base class for pipeline operators where the data moving along the * pipeline is chunks of {@link IBindingSet}s. + * <p> + * The top-level of a query plan is composed of a required + * {@link Annotations#JOIN_GRAPH}s followed by a mixture of optional joins and + * {@link Annotations#CONDITIONAL_GROUP}s. A + * {@link Annotations#CONDITIONAL_GROUP} will have at least one required join + * (in a {@link Annotations#JOIN_GRAPH}) followed by zero or more optional + * joins. * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -61,6 +69,8 @@ * The value of the annotation is the {@link BOp.Annotations#BOP_ID} of * the ancestor in the operator tree which serves as the alternative * sink for binding sets (default is no alternative sink). + * + * @see #ALT_SINK_GROUP */ String ALT_SINK_REF = PipelineOp.class.getName() + ".altSinkRef"; @@ -82,46 +92,73 @@ boolean DEFAULT_SHARED_STATE = false; - /** - * Annotation used to mark the set of non-optional joins which may be - * input to either the static or runtime query optimizer. Joins within a - * join graph may be freely reordered by the query optimizer in order to - * minimize the amount of work required to compute the solutions. - * <p> - * Note: Optional joins MAY NOT appear within the a join graph. Optional - * joins SHOULD be evaluated as part of the "tail plan" following the - * join graph, but before operations such as SORT, DISTINCT, etc. - * - * @todo We should be able to automatically apply the static or runtime - * query optimizers to an operator tree using this annotation to - * identify the join graphs. - */ + /** + * Annotation used to mark a set of (non-optional) joins which may be + * freely reordered by the query optimizer in order to minimize the + * amount of work required to compute the solutions. + * <p> + * Note: Optional joins MAY NOT appear within a join graph. Optional + * joins SHOULD be evaluated as part of the "tail plan" following the + * join graph, but before operations such as SORT, DISTINCT, etc. When + * the query plan includes {@link #CONDITIONAL_GROUP}s, those groups + * include a leading {@link #JOIN_GRAPH} (required joins) followed by + * zero or more optional joins. + */ String JOIN_GRAPH = PipelineOp.class.getName() + ".joinGraph"; - /** - * Annotation marks a high level join group, which may include optional - * joins. Join groups are marked in order to decide the re-entry point - * in the query plan when a join within an optional join group fails. - * Also, the top-level join group is not marked -- only nested join - * groups are marked. This is used by the decision rule to handle do - * {@link IBindingSet#push()} when entering a - * <p> - * This is different from a {@link #JOIN_GRAPH} primarily in that the - * latter may not include optional joins. - */ - String JOIN_GROUP = PipelineOp.class.getName() + ".joinGroup"; + /** + * Annotation used to mark a set of operators belonging to a conditional + * binding group. Bindings within with the group will be discarded if + * any required operator in the group fails. For example, if a binding + * set exits via the alternative sink for a required join then any + * conditional bindings within the group will be discarded. + * <p> + * Together with {@link #ALT_SINK_GROUP}, the {@link #CONDITIONAL_GROUP} + * annotation provides the information necessary in order to decide the + * re-entry point in the query plan when a join within an conditional + * binding group fails. + * <p> + * The {@link #CONDITIONAL_GROUP} annotation controls the + * {@link IBindingSet#push()} and {@link IBindingSet#pop(boolean)} of + * individual solutions as they propagate through the pipeline. When a + * pipeline starts, the {@link IBindingSet} stack contains only the top + * level symbol table (i.e., name/value bindings). When an intermediate + * solution enters a {@link PipelineOp} marked as belonging to a + * {@link #CONDITIONAL_GROUP}, a new symbol table is + * {@link IBindingSet#push() pushed} onto the stack for that solution. + * If the solution leaves the optional join group via the default sink, + * then the symbol table is "saved" when it is + * {@link IBindingSet#pop(boolean) popped} off of the stack. If the + * solution leaves the join group via the alternative sink, then the + * symbol table is discarded when it is {@link IBindingSet#pop(boolean) + * popped} off of the stack. This provides for conditional binding of + * variables within the operators of the group. + * <p> + * The value of the {@link #CONDITIONAL_GROUP} is an {@link Integer} + * which uniquely identifies the group within the query. + */ + String CONDITIONAL_GROUP = PipelineOp.class.getName() + ".conditionalGroup"; - /** - * Annotation is used to designate the target when a join within an - * optional join group fails. The value of this annotation must be the - * {@link #JOIN_GROUP} identifier corresponding to the next join group - * in the query plan. The target join group identifier is specified - * (rather than the bopId of the target join) since the joins in the - * target join group may be reordered by the query optimizer. The entry - * point for solutions redirected to the {@link #ALT_SINK_GROUP} is - * therefore the first operator in the target {@link #JOIN_GROUP}. This - * decouples the routing decisions from the join ordering decisions. - */ + /** + * Annotation used to designate the target when a required operator + * within an {@link #CONDITIONAL_GROUP} fails. The value of this + * annotation must be the {@link #CONDITIONAL_GROUP} identifier + * corresponding to the next conditional binding group in the query + * plan. If there is no such group, then the {@link #ALT_SINK_REF} + * should be used instead to specify the target operator in the + * pipeline, e.g., a {@link SliceOp}. + * <p> + * The target {@link #CONDITIONAL_GROUP} is specified (rather than the + * bopId of the target join) since the non-optional joins in the target + * {@link #CONDITIONAL_GROUP} be reordered by the query optimizer. The + * entry point for solutions redirected to the {@link #ALT_SINK_GROUP} + * is therefore the first operator in the target + * {@link #CONDITIONAL_GROUP}. This decouples the routing decisions from + * the join ordering decisions. + * + * @see #CONDITIONAL_GROUP + * @see #ALT_SINK_REF + */ String ALT_SINK_GROUP = PipelineOp.class.getName() + ".altSinkGroup"; } Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/engine/RunningQuery.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -41,7 +41,6 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -1568,8 +1567,9 @@ /** * The index of the operator which is the alternative sink for outputs * generated by this evaluation. This is <code>null</code> unless the - * operator explicitly specifies an alternative sink using - * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * operator explicitly specifies an alternative sink using either + * {@link PipelineOp.Annotations#ALT_SINK_REF} or + * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. */ private final Integer altSinkId; @@ -1674,9 +1674,30 @@ */ sinkId = BOpUtility.getEffectiveDefaultSink(bop, p); - // altSink (null when not specified). - altSinkId = (Integer) op - .getProperty(PipelineOp.Annotations.ALT_SINK_REF); + { + // altSink (null when not specified). + final Integer altSinkId = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_REF); + final Integer altSinkGroup = (Integer) op + .getProperty(PipelineOp.Annotations.ALT_SINK_GROUP); + if (altSinkId != null && altSinkGroup != null) + throw new RuntimeException( + "Annotations are mutually exclusive: " + + PipelineOp.Annotations.ALT_SINK_REF + + " and " + + PipelineOp.Annotations.ALT_SINK_GROUP); + if (altSinkGroup != null) { + /* + * Lookup the first pipeline op in the conditional binding + * group and use its bopId as the altSinkId. + */ + this.altSinkId = BOpUtility.getFirstBOpIdForConditionalGroup( + query, altSinkGroup); + } else { + // MAY be null. + this.altSinkId = altSinkId; + } + } if (altSinkId != null && !bopIndex.containsKey(altSinkId)) throw new NoSuchBOpException(altSinkId); @@ -1714,13 +1735,39 @@ } assert stats != null; - sink = (p == null ? queryBuffer : newBuffer(op, sinkId, - sinkMessagesOut, stats)); + // The groupId (if any) for this operator. + final Integer fromGroupId = (Integer) op + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); - altSink = altSinkId == null ? null - : altSinkId.equals(sinkId) ? sink : newBuffer(op, - altSinkId, altSinkMessagesOut, stats); + if (p == null) { + sink = queryBuffer; + } else { + final BOp targetOp = bopIndex.get(sinkId); + final Integer toGroupId = (Integer) targetOp + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + sink = newBuffer(op, sinkId, new SinkTransitionMetadata( + fromGroupId, toGroupId, true/* isSink */), + sinkMessagesOut, stats); + } + if (altSinkId == null) { + altSink = null; + // } else if(altSinkId.equals(sinkId)){ + /* + * @todo Note: The optimization when altSink:=sink is now only + * possible when the groupId is not changing during the + * transition. + */ + // altSink = sink; + } else { + final BOp targetOp = bopIndex.get(altSinkId); + final Integer toGroupId = (Integer) targetOp + .getProperty(PipelineOp.Annotations.CONDITIONAL_GROUP); + altSink = newBuffer(op, altSinkId, new SinkTransitionMetadata( + fromGroupId, toGroupId, false/* isSink */), + altSinkMessagesOut, stats); + } + // context : @todo pass in IChunkMessage or IChunkAccessor context = new BOpContext<IBindingSet>(RunningQuery.this, partitionId, stats, src, sink, altSink); @@ -1748,7 +1795,9 @@ * target that sink. */ private IBlockingBuffer<IBindingSet[]> newBuffer(final PipelineOp op, - final int sinkId, final AtomicInteger sinkMessagesOut, final BOpStats stats) { + final int sinkId, + final SinkTransitionMetadata sinkTransitionMetadata, + final AtomicInteger sinkMessagesOut, final BOpStats stats) { // final MultiplexBlockingBuffer<IBindingSet[]> factory = inputBufferMap == null ? null // : inputBufferMap.get(sinkId); @@ -1774,7 +1823,8 @@ // BufferAnnotations.chunkTimeoutUnit); return new HandleChunkBuffer(RunningQuery.this, bopId, sinkId, op - .getChunkCapacity(), sinkMessagesOut, stats); + .getChunkCapacity(), sinkTransitionMetadata, sinkMessagesOut, + stats); } @@ -1814,7 +1864,115 @@ } // call() } // class ChunkTask + + /** + * In order to setup the push/pop of the sink and altSink we need to specify + * certain metadata about the source groupId, the target groupId, and + * whether the transition is via the sink or the altSink. The groupId for + * the source and target operators MAY be null, in which case the operator + * is understood to be outside of any conditional binding group. + * <p> + * The action to be taken when the binding set is written to the sink or the + * altSink is determined by a simple decision matrix. + * + * <pre> + * | toGroup + * fromGroup + null + newGroup + sameGroup + * null | NOP | Push | n/a + * group | Pop | Pop+Push | NOP + * </pre> + * + * The value of the [boolean:save] flag for pop is decided based on whether + * the transition is via the default sink (save:=true) or the altSink + * (save:=false). + * + * @see PipelineOp.Annotations#CONDITIONAL_GROUP + */ + private static class SinkTransitionMetadata { + + private final Integer fromGroupId; + private final Integer toGroupId; + private final boolean isSink; + + public String toString() { + + return getClass().getSimpleName() + "{from=" + fromGroupId + ",to=" + + toGroupId + ",isSink=" + isSink + "}"; + + } + + public SinkTransitionMetadata(final Integer fromGroupId, + final Integer toGroupId, final boolean isSink) { + + this.fromGroupId = fromGroupId; + + this.toGroupId = toGroupId; + + this.isSink = isSink; + + } + + /** + * Apply the appropriate action(s) to the binding set. + * + * @param bset + * The binding set. + */ + public void handleBindingSet(final IBindingSet bset) { + if (fromGroupId == null) { + if (toGroupId == null) + return; + // Transition from no group to some group. + bset.push(); + return; + } else { + if (toGroupId == null) + // Transition from a group to no group. + bset.pop(isSink/* save */); + else if (toGroupId.equals(fromGroupId)) { + // NOP (transition to the same group) + } else { + // Transition to a different group. + bset.pop(isSink/* save */); + bset.push(); + } + } + } + + } +// /** +// * Type safe enumeration for after action on a generated binding set used to +// * manage exit from a conditional binding group via the defaultSink and the +// * altSink. +// * +// * @author <a href="mailto:tho...@us...">Bryan +// * Thompson</a> +// */ +// static private enum AfterActionEnum { +// /** +// * NOP +// */ +// None, +// /** +// * Use {@link IBindingSet#pop(boolean)} to discard the symbol table on +// * the top of the stack. +// */ +// Discard, +// /** +// * Use {@link IBindingSet#pop(boolean)} to save the symbol table on the +// * top of the stack. +// */ +// Save, +// /** +// * Use {@link IBindingSet#push()} to push a symbol table on the top of +// * the stack. Bindings made against that symbol table will be +// * conditional until they are either {@link #Discard discarded} or +// * {@link #Save saved}. +// */ +// Push; +// } + /** * Class traps {@link #add(IBindingSet[])} to handle the IBindingSet[] * chunks as they are generated by the running operator task, invoking @@ -1841,6 +1999,8 @@ */ private final int chunkCapacity; + private final SinkTransitionMetadata sinkTransitionMetadata; + private final AtomicInteger sinkMessagesOut; private final BOpStats stats; @@ -1869,11 +2029,13 @@ */ public HandleChunkBuffer(final RunningQuery q, final int bopId, final int sinkId, final int chunkCapacity, + final SinkTransitionMetadata sinkTransitionMetadata, final AtomicInteger sinkMessagesOut, final BOpStats stats) { this.q = q; this.bopId = bopId; this.sinkId = sinkId; this.chunkCapacity = chunkCapacity; + this.sinkTransitionMetadata = sinkTransitionMetadata; this.sinkMessagesOut = sinkMessagesOut; this.stats = stats; } @@ -1892,6 +2054,10 @@ if(!open) throw new BufferClosedException(); + for (IBindingSet bset : e) { + sinkTransitionMetadata.handleBindingSet(bset); + } + // if (chunkCapacity != 0 && e.length < (chunkCapacity >> 1)) { // /* // * The caller's array is significantly smaller than the target Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/java/com/bigdata/bop/join/PipelineJoin.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -137,7 +137,8 @@ /** * Marks the join as "optional" in the SPARQL sense. Binding sets which * fail the join will be routed to the alternative sink as specified by - * {@link PipelineOp.Annotations#ALT_SINK_REF}. + * either {@link PipelineOp.Annotations#ALT_SINK_REF} or + * {@link PipelineOp.Annotations#ALT_SINK_GROUP}. * * @see #DEFAULT_OPTIONAL */ Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/TestBOpUtility.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -31,6 +31,14 @@ import java.util.Map; import java.util.concurrent.FutureTask; +import com.bigdata.bop.IPredicate.Annotations; +import com.bigdata.bop.ap.E; +import com.bigdata.bop.ap.Predicate; +import com.bigdata.bop.bset.StartOp; +import com.bigdata.bop.join.PipelineJoin; +import com.bigdata.bop.solutions.SliceOp; +import com.bigdata.journal.ITx; + import junit.framework.TestCase2; /** @@ -579,6 +587,120 @@ } /** + * A conditional join group: + * + * <pre> + * (a b) + * optional { + * (b c) + * (c d) + * } + * </pre> + * + * where the groupId for the optional join group is ONE (1). The test should + * locate the first {@link PipelineJoin} in that join group, which is the + * one reading on the <code>(b c)</code> access path. + */ + public void test_getFirstBOpIdForConditionalGroup() { + + final String namespace = "kb"; + + final int startId = 1; // + final int joinId1 = 2; // : base join group. + final int predId1 = 3; // (a b) + final int joinId2 = 4; // : joinGroup1 + final int predId2 = 5; // (b c) + final int joinId3 = 6; // : joinGroup1 + final int predId3 = 7; // (c d) + final int sliceId = 8; // + + final IVariable<?> a = Var.var("a"); + final IVariable<?> b = Var.var("b"); + final IVariable<?> c = Var.var("c"); + final IVariable<?> d = Var.var("d"); + + final Integer joinGroup1 = Integer.valueOf(1); + + final PipelineOp startOp = new StartOp(new BOp[] {}, + NV.asMap(new NV[] {// + new NV(Predicate.Annotations.BOP_ID, startId),// + new NV(SliceOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final Predicate<?> pred1Op = new Predicate<E>( + new IVariableOrConstant[] { a, b }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId1),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred2Op = new Predicate<E>( + new IVariableOrConstant[] { b, c }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId2),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final Predicate<?> pred3Op = new Predicate<E>( + new IVariableOrConstant[] { c, d }, NV + .asMap(new NV[] {// + new NV(Predicate.Annotations.RELATION_NAME, + new String[] { namespace }),// + new NV(Predicate.Annotations.BOP_ID, predId3),// + new NV(Annotations.TIMESTAMP, ITx.READ_COMMITTED),// + })); + + final PipelineOp join1Op = new PipelineJoin<E>(// + new BOp[]{startOp},// + new NV(Predicate.Annotations.BOP_ID, joinId1),// + new NV(PipelineJoin.Annotations.PREDICATE,pred1Op)); + + final PipelineOp join2Op = new PipelineJoin<E>(// + new BOp[] { join1Op },// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp join3Op = new PipelineJoin<E>(// + new BOp[] { join2Op },// + new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// + new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// + // join is optional. + new NV(PipelineJoin.Annotations.OPTIONAL, true),// + // optional target is the same as the default target. + new NV(PipelineOp.Annotations.ALT_SINK_REF, sliceId)); + + final PipelineOp sliceOp = new SliceOp(// + new BOp[]{join3Op}, + NV.asMap(new NV[] {// + new NV(BOp.Annotations.BOP_ID, sliceId),// + new NV(BOp.Annotations.EVALUATION_CONTEXT, + BOpEvaluationContext.CONTROLLER),// + })); + + final PipelineOp query = sliceOp; + + // verify found. + assertEquals(Integer.valueOf(joinId2), BOpUtility + .getFirstBOpIdForConditionalGroup(query, joinGroup1)); + + // verify not-found. + assertEquals(null, BOpUtility.getFirstBOpIdForConditionalGroup(query, + Integer.valueOf(2)/* groupId */)); + + } + + /** * Unit test for {@link BOpUtility#getParent(BOp, BOp)}. */ public void test_getParent() { Modified: branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java =================================================================== --- branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-21 22:21:54 UTC (rev 4038) +++ branches/QUADS_QUERY_BRANCH/bigdata/src/test/com/bigdata/bop/engine/TestQueryEngineOptionalJoins.java 2010-12-21 23:07:08 UTC (rev 4039) @@ -28,6 +28,8 @@ package com.bigdata.bop.engine; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.Properties; import java.util.UUID; @@ -218,39 +220,46 @@ /** * Unit test for optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. - * Intermediate results which do not succeed on the optional join are + * {@link SliceOp}. The 2nd and 3rd joins are an optional join group. + * Intermediate results which do not succeed on the optional join are * forwarded to the {@link SliceOp} which is the target specified by the * {@link PipelineOp.Annotations#ALT_SINK_REF}. * * The optional join group takes the form: + * + * <pre> * (a b) * optional { * (b c) * (c d) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be four solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be four solutions that + * succeed the optional join group: + * + * <pre> * (paul mary brad fred) * (paul mary brad leon) * (john mary brad fred) * (john mary brad leon) + * </pre> * * and five more that don't succeed the optional join group: * + * <pre> * (paul brad) * * (john brad) * * (mary brad) * * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?c will become temporarily bound to fred - * and leon (since brad knows fred and leon), but the (c d) tail will fail - * since fred and leon don't know anyone else. At this point, the ?c binding - * must be removed from the solution. + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. */ public void test_query_join2_optionals() throws Exception { @@ -267,6 +276,8 @@ final IVariable<?> b = Var.var("b"); final IVariable<?> c = Var.var("c"); final IVariable<?> d = Var.var("d"); + + final Object joinGroup1 = Integer.valueOf(1); final PipelineOp startOp = new StartOp(new BOp[] {}, NV.asMap(new NV[] {// @@ -309,7 +320,8 @@ final PipelineOp join2Op = new PipelineJoin<E>(// new BOp[] { join1Op },// - new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(Predicate.Annotations.BOP_ID, joinId2),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred2Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -319,6 +331,7 @@ final PipelineOp join3Op = new PipelineJoin<E>(// new BOp[] { join2Op },// new NV(Predicate.Annotations.BOP_ID, joinId3),// + new NV(PipelineOp.Annotations.CONDITIONAL_GROUP, joinGroup1),// new NV(PipelineJoin.Annotations.PREDICATE, pred3Op),// // join is optional. new NV(PipelineJoin.Annotations.OPTIONAL, true),// @@ -415,6 +428,11 @@ ) }; + /* + * junit.framework.AssertionFailedError: Iterator will deliver too + * many objects: reminder(3)=[{ a=John, b=Brad }, { a=Mary, b=Brad + * }, { a=Paul, b=Brad }]. + */ assertSameSolutionsAnyOrder(expected, new Dechunkerator<IBindingSet>(runningQuery.iterator())); @@ -434,45 +452,54 @@ } /** - * Unit test for optional join group with a filter. Three joins are used - * and target a {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are + * Unit test for optional join group with a filter. Three joins are used and + * target a {@link SliceOp}. The 2nd and 3rd joins are an optional join + * group. Intermediate results which do not succeed on the optional join are * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group * contains a filter. + * <p> + * The optional join group takes the form: * - * The optional join group takes the form: + * <pre> * (a b) * optional { * (b c) * (c d) * filter(d != Leon) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> * (paul mary brad fred) * (john mary brad fred) + * </pre> * * and five more that don't succeed the optional join group: * + * <pre> * (paul brad) * * (john brad) * * (mary brad) * * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?c will become temporarily bound to fred - * and leon (since brad knows fred and leon), but the (c d) tail will fail - * since fred and leon don't know anyone else. At this point, the ?c binding - * must be removed from the solution. - * + * In this cases marked with a <code>*</code>, ?c will become temporarily + * bound to fred and leon (since brad knows fred and leon), but the (c d) + * tail will fail since fred and leon don't know anyone else. At this point, + * the ?c binding must be removed from the solution. + * <p> * The filter (d != Leon) will prune the two solutions: * + * <pre> * (paul mary brad leon) * (john mary brad leon) + * </pre> * * since ?d is bound to Leon in those cases. */ @@ -647,43 +674,50 @@ } /** - * Unit test for optional join group with a filter on a variable outside - * the optional join group. Three joins are used and target a - * {@link SliceOp}. The 2nd and 3rd joins are an optional join - * group. Intermediate results which do not succeed on the optional join are - * forwarded to the {@link SliceOp} which is the target specified by the - * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group + * Unit test for optional join group with a filter on a variable outside the + * optional join group. Three joins are used and target a {@link SliceOp}. + * The 2nd and 3rd joins are an optional join group. Intermediate results + * which do not succeed on the optional join are forwarded to the + * {@link SliceOp} which is the target specified by the + * {@link PipelineOp.Annotations#ALT_SINK_REF}. The optional join group * contains a filter that uses a variable outside the optional join group. + * <P> + * The query takes the form: * - * The query takes the form: + * <pre> * (a b) * optional { * (b c) * (c d) * filter(a != Paul) * } - * - * The (a b) tail will match everything in the knowledge base. The join - * group takes us two hops out from ?b. There should be two solutions - * that succeed the optional join group: + * </pre> * + * The (a b) tail will match everything in the knowledge base. The join + * group takes us two hops out from ?b. There should be two solutions that + * succeed the optional join group: + * + * <pre> * (john mary brad fred) * (john mary brad leon) + * </pre> * * and six more that don't succeed the optional join group: - * + * + * <pre> * (paul mary) * * (paul brad) * * (john brad) * (mary brad) * (brad fred) * (brad leon) + * </pre> * - * In this cases marked with a *, ?a is bound to Paul even though there is - * a filter that specifically prohibits a = Paul. This is because the filter - * is inside the optional join group, which means that solutions can still - * include a = Paul, but the optional join group should not run in that - * case. + * In this cases marked with a <code>*</code>, ?a is bound to Paul even + * though there is a filter that specifically prohibits a = Paul. This is + * because the filter is inside the optional join group, which means that + * solutions can still include a = Paul, but the optional join group should + * not run in that case. */ public void test_query_optionals_filter2() throws Exception { @@ -1006,8 +1040,15 @@ if (actual.hasNext()) { - fail("Iterator will deliver too many objects."); + final List<T> remainder = new LinkedList<T>(); + + while(actual.hasNext()) { + remainder.add(actual.next()); + } + fail("Iterator will deliver too many objects: reminder(" + + remainder.size() + ")=" + remainder); + } } finally { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 22:22:00
|
Revision: 4038 http://bigdata.svn.sourceforge.net/bigdata/?rev=4038&view=rev Author: thompsonbry Date: 2010-12-21 22:21:54 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Turned off the commit before each round of closure for the RWStore. This provides a significiant performance and size on the disk improvement compared. For example, a file which took 45G on the disk now occupies only 14G. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/ProgramTask.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/ProgramTask.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/ProgramTask.java 2010-12-21 21:10:00 UTC (rev 4037) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/relation/rule/eval/ProgramTask.java 2010-12-21 22:21:54 UTC (rev 4038) @@ -471,7 +471,7 @@ joinNexusFactory.setReadTimestamp(TimestampUtility .asHistoricalRead(lastCommitTime)); - } else if (indexManager instanceof Journal + } else if (false && indexManager instanceof Journal && ((Journal) indexManager).getBufferStrategy() .getBufferMode() == BufferMode.DiskRW) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 21:10:06
|
Revision: 4037 http://bigdata.svn.sourceforge.net/bigdata/?rev=4037&view=rev Author: thompsonbry Date: 2010-12-21 21:10:00 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Javadoc Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-21 20:02:17 UTC (rev 4036) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-21 21:10:00 UTC (rev 4037) @@ -28,6 +28,8 @@ package com.bigdata.search; +import info.aduna.i18n.languagetag.IanaLanguageTag; + import java.io.IOException; import java.io.Reader; import java.io.StringReader; @@ -346,7 +348,12 @@ /** * The name of the {@link IAnalyzerFactory} class which will be used to * obtain analyzers when tokenizing documents and queries (default - * {@value #DEFAULT_ANALYZER_FACTORY_CLASS}). + * {@value #DEFAULT_ANALYZER_FACTORY_CLASS}). The specified class MUST + * implement {@link IAnalyzerFactory} and MUST have a constructor with + * the following signature: + * <pre> + * public MyAnalyzerFactory(FullTextIndexer indexer) + * </pre> */ String ANALYZER_FACTORY_CLASS = FullTextIndex.class.getName() + ".analyzerFactoryClass"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 20:02:23
|
Revision: 4036 http://bigdata.svn.sourceforge.net/bigdata/?rev=4036&view=rev Author: thompsonbry Date: 2010-12-21 20:02:17 +0000 (Tue, 21 Dec 2010) Log Message: ----------- The distinctTermScan needed to explicitly specify IV.class rather than permitting the generic type of the backing array to be decided at runtime by inspection of the first element to be inserted into that array. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java 2010-12-21 20:01:29 UTC (rev 4035) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/spo/SPORelation.java 2010-12-21 20:02:17 UTC (rev 4036) @@ -1354,8 +1354,9 @@ }); - return new ChunkedWrappedIterator<IV>(itr); - + return new ChunkedWrappedIterator<IV>(itr, + IChunkedIterator.DEFAULT_CHUNK_SIZE, IV.class); + } /** * Efficient scan of the distinct term identifiers that appear in the first This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 20:01:36
|
Revision: 4035 http://bigdata.svn.sourceforge.net/bigdata/?rev=4035&view=rev Author: thompsonbry Date: 2010-12-21 20:01:29 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Added interface, option, and default implementation to permit applications to override the factory returning the appropriate Lucene analyzer for tokenizing documents and queries based on a languageCode. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java Added Paths: ----------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java Added: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java (rev 0) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java 2010-12-21 20:01:29 UTC (rev 4035) @@ -0,0 +1,346 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 21, 2010 + */ + +package com.bigdata.search; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.br.BrazilianAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; +import org.apache.lucene.analysis.cn.ChineseAnalyzer; +import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.el.GreekAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.nl.DutchAnalyzer; +import org.apache.lucene.analysis.ru.RussianAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.th.ThaiAnalyzer; +import org.apache.lucene.util.Version; + +import com.bigdata.btree.keys.IKeyBuilder; +import com.bigdata.btree.keys.KeyBuilder; + +/** + * Default implementation registers a bunch of {@link Analyzer}s for various + * language codes and then serves the appropriate {@link Analyzer} based on + * the specified language code. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public class DefaultAnalyzerFactory implements IAnalyzerFactory { + + private final FullTextIndex fullTextIndex; + + public DefaultAnalyzerFactory(final FullTextIndex fullTextIndex) { + + if (fullTextIndex == null) + throw new IllegalArgumentException(); + + this.fullTextIndex = fullTextIndex; + + } + + public Analyzer getAnalyzer(final String languageCode) { + + final IKeyBuilder keyBuilder = fullTextIndex.getKeyBuilder(); + + Map<String, AnalyzerConstructor> map = getAnalyzers(); + + AnalyzerConstructor ctor = null; + + if (languageCode == null) { + + if (keyBuilder.isUnicodeSupported()) { + + // The configured local for the database. + final Locale locale = ((KeyBuilder) keyBuilder) + .getSortKeyGenerator().getLocale(); + + // The analyzer for that locale. + Analyzer a = getAnalyzer(locale.getLanguage()); + + if (a != null) + return a; + + } + + // fall through + + } else { + + /* + * Check the declared analyzers. We first check the three letter + * language code. If we do not have a match there then we check the + * 2 letter language code. + */ + + String code = languageCode; + + if (code.length() > 3) { + + code = code.substring(0, 2); + + ctor = map.get(languageCode); + + } + + if (ctor == null && code.length() > 2) { + + code = code.substring(0, 1); + + ctor = map.get(languageCode); + + } + + } + + if (ctor == null) { + + // request the default analyzer. + + ctor = map.get(""); + + if (ctor == null) { + + throw new IllegalStateException("No entry for empty string?"); + + } + + } + + Analyzer a = ctor.newInstance(); + + return a; + + } + + abstract private static class AnalyzerConstructor { + + abstract public Analyzer newInstance(); + + } + + /** + * A map containing instances of the various kinds of analyzers that we know + * about. + * <p> + * Note: There MUST be an entry under the empty string (""). This entry will + * be requested when there is no entry for the specified language code. + */ + private Map<String,AnalyzerConstructor> analyzers; + + /** + * Initializes the various kinds of analyzers that we know about. + * <p> + * Note: Each {@link Analyzer} is registered under both the 3 letter and the + * 2 letter language codes. See <a + * href="http://www.loc.gov/standards/iso639-2/php/code_list.php">ISO 639-2</a>. + * + * @todo get some informed advice on which {@link Analyzer}s map onto which + * language codes. + * + * @todo thread safety? Analyzers produce token processors so maybe there is + * no problem here once things are initialized. If so, maybe this + * could be static. + * + * @todo configuration. Could be configured by a file containing a class + * name and a list of codes that are handled by that class. + * + * @todo strip language code down to 2/3 characters during lookup. + * + * @todo There are a lot of pidgins based on french, english, and other + * languages that are not being assigned here. + */ + synchronized private Map<String,AnalyzerConstructor> getAnalyzers() { + + if (analyzers != null) { + + return analyzers; + + } + + analyzers = new HashMap<String, AnalyzerConstructor>(); + + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new BrazilianAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("por", a); + analyzers.put("pt", a); + } + + /* + * Claims to handle Chinese. Does single character extraction. Claims to + * produce smaller indices as a result. + * + * Note: you can not tokenize with the Chinese analyzer and the do + * search using the CJK analyzer and visa versa. + * + * Note: I have no idea whether this would work for Japanese and Korean + * as well. I expect so, but no real clue. + */ + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new ChineseAnalyzer(); + } + }; + analyzers.put("zho", a); + analyzers.put("chi", a); + analyzers.put("zh", a); + } + + /* + * Claims to handle Chinese, Japanese, Korean. Does double character + * extraction with overlap. + */ + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new CJKAnalyzer(Version.LUCENE_CURRENT); + } + }; +// analyzers.put("zho", a); +// analyzers.put("chi", a); +// analyzers.put("zh", a); + analyzers.put("jpn", a); + analyzers.put("ja", a); + analyzers.put("jpn", a); + analyzers.put("kor",a); + analyzers.put("ko",a); + } + + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new CzechAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("ces",a); + analyzers.put("cze",a); + analyzers.put("cs",a); + } + + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new DutchAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("dut",a); + analyzers.put("nld",a); + analyzers.put("nl",a); + } + + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new FrenchAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("fra",a); + analyzers.put("fre",a); + analyzers.put("fr",a); + } + + /* + * Note: There are a lot of language codes for German variants that + * might be useful here. + */ + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new GermanAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("deu",a); + analyzers.put("ger",a); + analyzers.put("de",a); + } + + // Note: ancient greek has a different code (grc). + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new GreekAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("gre",a); + analyzers.put("ell",a); + analyzers.put("el",a); + } + + // @todo what about other Cyrillic scripts? + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new RussianAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("rus",a); + analyzers.put("ru",a); + } + + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new ThaiAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("tha",a); + analyzers.put("th",a); + } + + // English + { + AnalyzerConstructor a = new AnalyzerConstructor() { + public Analyzer newInstance() { + return new StandardAnalyzer(Version.LUCENE_CURRENT); + } + }; + analyzers.put("eng", a); + analyzers.put("en", a); + /* + * Note: There MUST be an entry under the empty string (""). This + * entry will be requested when there is no entry for the specified + * language code. + */ + analyzers.put("", a); + } + + return analyzers; + + } + +} Property changes on: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/DefaultAnalyzerFactory.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-21 19:30:51 UTC (rev 4034) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-21 20:01:29 UTC (rev 4035) @@ -31,13 +31,12 @@ import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.UUID; @@ -50,19 +49,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.br.BrazilianAnalyzer; -import org.apache.lucene.analysis.cjk.CJKAnalyzer; -import org.apache.lucene.analysis.cn.ChineseAnalyzer; -import org.apache.lucene.analysis.cz.CzechAnalyzer; -import org.apache.lucene.analysis.de.GermanAnalyzer; -import org.apache.lucene.analysis.el.GreekAnalyzer; -import org.apache.lucene.analysis.fr.FrenchAnalyzer; -import org.apache.lucene.analysis.nl.DutchAnalyzer; -import org.apache.lucene.analysis.ru.RussianAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.util.Version; import com.bigdata.btree.BytesUtil; import com.bigdata.btree.IIndex; @@ -309,7 +296,7 @@ * for an RDF database since the set of terms only grows and each term * is immutable. */ - String OVERWRITE = "indexer.overwrite"; + String OVERWRITE = FullTextIndex.class.getName() + ".overwrite"; String DEFAULT_OVERWRITE = "true"; @@ -328,7 +315,8 @@ * @todo consider modifying the default system so that defaults can be * made on a per-index, per-application, or per-namespace basis. */ - String INDEXER_COLLATOR_STRENGTH = "indexer.collator.strength"; + String INDEXER_COLLATOR_STRENGTH = FullTextIndex.class.getName() + + ".collator.strength"; String DEFAULT_INDEXER_COLLATOR_STRENGTH = StrengthEnum.Primary.toString(); @@ -340,8 +328,8 @@ * the timeout expires before all tasks complete then the search results * will only reflect partial information. */ - String INDEXER_TIMEOUT = "indexer.timeout"; - + String INDEXER_TIMEOUT = FullTextIndex.class.getName() + ".timeout"; + String DEFAULT_INDEXER_TIMEOUT = "1000"; /** @@ -354,6 +342,16 @@ + ".fieldsEnabled"; String DEFAULT_FIELDS_ENABLED = "true"; + + /** + * The name of the {@link IAnalyzerFactory} class which will be used to + * obtain analyzers when tokenizing documents and queries (default + * {@value #DEFAULT_ANALYZER_FACTORY_CLASS}). + */ + String ANALYZER_FACTORY_CLASS = FullTextIndex.class.getName() + + ".analyzerFactoryClass"; + + String DEFAULT_ANALYZER_FACTORY_CLASS = DefaultAnalyzerFactory.class.getName(); } @@ -390,6 +388,11 @@ return fieldsEnabled; } + + /** + * @see Options#ANALYZER_FACTORY_CLASS + */ + private final IAnalyzerFactory analyzerFactory; /** * The basename of the search index. @@ -437,7 +440,6 @@ super(indexManager, namespace, timestamp, properties); - // indexer.overwrite { overwrite = Boolean.parseBoolean(properties.getProperty( @@ -448,7 +450,6 @@ } - // indexer.timeout { timeout = Long.parseLong(properties.getProperty( @@ -469,6 +470,41 @@ } + { + + final String className = getProperty( + Options.ANALYZER_FACTORY_CLASS, + Options.DEFAULT_ANALYZER_FACTORY_CLASS); + + final Class<IAnalyzerFactory> cls; + try { + cls = (Class<IAnalyzerFactory>) Class.forName(className); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Bad option: " + + Options.ANALYZER_FACTORY_CLASS, e); + } + + if (!IAnalyzerFactory.class.isAssignableFrom(cls)) { + throw new RuntimeException(Options.ANALYZER_FACTORY_CLASS + + ": Must extend: " + IAnalyzerFactory.class.getName()); + } + + try { + + final Constructor<? extends IAnalyzerFactory> ctor = cls + .getConstructor(new Class[] { FullTextIndex.class }); + + // save reference. + analyzerFactory = ctor.newInstance(new Object[] { this }); + + } catch (Exception ex) { + + throw new RuntimeException(ex); + + } + + } + /* * Note: defer resolution of the index. */ @@ -565,279 +601,10 @@ */ protected Analyzer getAnalyzer(final String languageCode) { - final IKeyBuilder keyBuilder = getKeyBuilder(); - - Map<String, AnalyzerConstructor> map = getAnalyzers(); + return analyzerFactory.getAnalyzer(languageCode); - AnalyzerConstructor ctor = null; - - if (languageCode == null) { - - if (keyBuilder.isUnicodeSupported()) { - - // The configured local for the database. - final Locale locale = ((KeyBuilder) keyBuilder) - .getSortKeyGenerator().getLocale(); - - // The analyzer for that locale. - Analyzer a = getAnalyzer(locale.getLanguage()); - - if (a != null) - return a; - - } - - // fall through - - } else { - - /* - * Check the declared analyzers. We first check the three letter - * language code. If we do not have a match there then we check the - * 2 letter language code. - */ - - String code = languageCode; - - if (code.length() > 3) { - - code = code.substring(0, 2); - - ctor = map.get(languageCode); - - } - - if (ctor == null && code.length() > 2) { - - code = code.substring(0, 1); - - ctor = map.get(languageCode); - - } - - } - - if (ctor == null) { - - // request the default analyzer. - - ctor = map.get(""); - - if (ctor == null) { - - throw new IllegalStateException("No entry for empty string?"); - - } - - } - - Analyzer a = ctor.newInstance(); - - return a; - } - abstract private static class AnalyzerConstructor { - - abstract public Analyzer newInstance(); - - } - - /** - * A map containing instances of the various kinds of analyzers that we know - * about. - * <p> - * Note: There MUST be an entry under the empty string (""). This entry will - * be requested when there is no entry for the specified language code. - */ - private Map<String,AnalyzerConstructor> analyzers; - - /** - * Initializes the various kinds of analyzers that we know about. - * <p> - * Note: Each {@link Analyzer} is registered under both the 3 letter and the - * 2 letter language codes. See <a - * href="http://www.loc.gov/standards/iso639-2/php/code_list.php">ISO 639-2</a>. - * - * @todo get some informed advice on which {@link Analyzer}s map onto which - * language codes. - * - * @todo thread safety? Analyzers produce token processors so maybe there is - * no problem here once things are initialized. If so, maybe this - * could be static. - * - * @todo configuration. Could be configured by a file containing a class - * name and a list of codes that are handled by that class. - * - * @todo strip language code down to 2/3 characters during lookup. - * - * @todo There are a lot of pidgins based on french, english, and other - * languages that are not being assigned here. - */ - synchronized private Map<String,AnalyzerConstructor> getAnalyzers() { - - if (analyzers != null) { - - return analyzers; - - } - - analyzers = new HashMap<String, AnalyzerConstructor>(); - - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new BrazilianAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("por", a); - analyzers.put("pt", a); - } - - /* - * Claims to handle Chinese. Does single character extraction. Claims to - * produce smaller indices as a result. - * - * Note: you can not tokenize with the Chinese analyzer and the do - * search using the CJK analyzer and visa versa. - * - * Note: I have no idea whether this would work for Japanese and Korean - * as well. I expect so, but no real clue. - */ - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new ChineseAnalyzer(); - } - }; - analyzers.put("zho", a); - analyzers.put("chi", a); - analyzers.put("zh", a); - } - - /* - * Claims to handle Chinese, Japanese, Korean. Does double character - * extraction with overlap. - */ - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new CJKAnalyzer(Version.LUCENE_CURRENT); - } - }; -// analyzers.put("zho", a); -// analyzers.put("chi", a); -// analyzers.put("zh", a); - analyzers.put("jpn", a); - analyzers.put("ja", a); - analyzers.put("jpn", a); - analyzers.put("kor",a); - analyzers.put("ko",a); - } - - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new CzechAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("ces",a); - analyzers.put("cze",a); - analyzers.put("cs",a); - } - - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new DutchAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("dut",a); - analyzers.put("nld",a); - analyzers.put("nl",a); - } - - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new FrenchAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("fra",a); - analyzers.put("fre",a); - analyzers.put("fr",a); - } - - /* - * Note: There are a lot of language codes for German variants that - * might be useful here. - */ - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new GermanAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("deu",a); - analyzers.put("ger",a); - analyzers.put("de",a); - } - - // Note: ancient greek has a different code (grc). - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new GreekAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("gre",a); - analyzers.put("ell",a); - analyzers.put("el",a); - } - - // @todo what about other Cyrillic scripts? - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new RussianAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("rus",a); - analyzers.put("ru",a); - } - - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new ThaiAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("tha",a); - analyzers.put("th",a); - } - - // English - { - AnalyzerConstructor a = new AnalyzerConstructor() { - public Analyzer newInstance() { - return new StandardAnalyzer(Version.LUCENE_CURRENT); - } - }; - analyzers.put("eng", a); - analyzers.put("en", a); - /* - * Note: There MUST be an entry under the empty string (""). This - * entry will be requested when there is no entry for the specified - * language code. - */ - analyzers.put("", a); - } - - return analyzers; - - } - - /* * thread-local key builder. */ @@ -1202,7 +969,13 @@ * @param maxRank * The upper bound on the #of hits in the result set. * @param prefixMatch - * <strong>Option is not implemented yet</strong> + * When <code>true</code>, the matches will be on tokens which + * include the query tokens as a prefix. This includes exact + * matches as a special case when the prefix is the entire token, + * but it also allows longer matches. For example, + * <code>free</code> will be an exact match on <code>free</code> + * but a partial match on <code>freedom</code>. When + * <code>false</code>, only exact matches will be made. * @param timeout * The timeout -or- ZERO (0) for NO timeout (this is equivalent * to using {@link Long#MAX_VALUE}). Added: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java (rev 0) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java 2010-12-21 20:01:29 UTC (rev 4035) @@ -0,0 +1,53 @@ +/** + +Copyright (C) SYSTAP, LLC 2006-2010. All rights reserved. + +Contact: + SYSTAP, LLC + 4501 Tower Road + Greensboro, NC 27410 + lic...@bi... + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * Created on Dec 21, 2010 + */ + +package com.bigdata.search; + +import java.util.Locale; + +import org.apache.lucene.analysis.Analyzer; + +/** + * Factory interface for obtaining an {@link Analyzer}. + * + * @author <a href="mailto:tho...@us...">Bryan Thompson</a> + * @version $Id$ + */ +public interface IAnalyzerFactory { + + /** + * Return the token analyzer to be used for the given language code. + * + * @param languageCode + * The language code or <code>null</code> to use the default + * {@link Locale}. + * + * @return The token analyzer best suited to the indicated language family. + */ + Analyzer getAnalyzer(final String languageCode); + +} Property changes on: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/IAnalyzerFactory.java ___________________________________________________________________ Added: svn:keywords + Id Date Revision Author HeadURL This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 19:30:57
|
Revision: 4034 http://bigdata.svn.sourceforge.net/bigdata/?rev=4034&view=rev Author: mrpersonick Date: 2010-12-21 19:30:51 +0000 (Tue, 21 Dec 2010) Log Message: ----------- added support for queries specifying text search maxHits and minRelevance Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -794,9 +794,10 @@ final StatementPattern sp = it.next().getKey(); final Value s = sp.getSubjectVar().getValue(); final Value p = sp.getPredicateVar().getValue(); - final Value o = sp.getObjectVar().getValue(); - if (s == null && p != null && o == null) { - if (BD.RELEVANCE.equals(p)) { + if (s == null && p != null) { + if (BD.RELEVANCE.equals(p) || + BD.MAX_HITS.equals(p) || + BD.MIN_RELEVANCE.equals(p)) { final Var sVar = sp.getSubjectVar(); Set<StatementPattern> metadata = searchMetadata2.get(sVar); if (metadata != null) { @@ -1507,15 +1508,14 @@ throw new IllegalArgumentException("not a valid magic search: " + sp); } - final ISolutionExpander expander = - new FreeTextSearchExpander(database, (Literal) objValue); - final Var subjVar = sp.getSubjectVar(); final IVariableOrConstant<IV> search = com.bigdata.relation.rule.Var.var(subjVar.getName()); IVariableOrConstant<IV> relevance = new Constant(DummyIV.INSTANCE); + Literal maxHits = null; + Literal minRelevance = null; for (StatementPattern meta : metadata) { if (!meta.getSubjectVar().equals(subjVar)) { @@ -1523,14 +1523,32 @@ } final Value pVal = meta.getPredicateVar().getValue(); final Var oVar = meta.getObjectVar(); - if (pVal == null || oVar.hasValue()) { + final Value oVal = oVar.getValue(); + if (pVal == null) { throw new IllegalArgumentException("illegal metadata: " + meta); } if (BD.RELEVANCE.equals(pVal)) { + if (oVar.hasValue()) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } relevance = com.bigdata.relation.rule.Var.var(oVar.getName()); - } + } else if (BD.MAX_HITS.equals(pVal)) { + if (oVal == null || !(oVal instanceof Literal)) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + maxHits = (Literal) oVal; + } else if (BD.MIN_RELEVANCE.equals(pVal)) { + if (oVal == null || !(oVal instanceof Literal)) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + minRelevance = (Literal) oVal; + } } + final ISolutionExpander expander = + new FreeTextSearchExpander(database, (Literal) objValue, + maxHits, minRelevance); + return new SPOPredicate( new String[] { database.getSPORelation().getNamespace() }, -1, // partitionId Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -56,12 +56,18 @@ private final AbstractTripleStore database; - private final Literal query; + private final Literal query, maxHits, minRelevance; private Set<URI> graphs; public FreeTextSearchExpander(final AbstractTripleStore database, final Literal query) { + this(database, query, null, null); + } + + public FreeTextSearchExpander(final AbstractTripleStore database, + final Literal query, final Literal maxHits, + final Literal minRelevance) { if (database == null) throw new IllegalArgumentException(); @@ -73,6 +79,10 @@ this.query = query; + this.maxHits = maxHits; + + this.minRelevance = minRelevance; + } public boolean backchain() { @@ -135,8 +145,10 @@ // final long begin = System.nanoTime(); hiterator = database.getLexiconRelation() .getSearchEngine().search(query.getLabel(), - query.getLanguage(), false/* prefixMatch */, - 0d/* minCosine */, 10000/* maxRank */, + query.getLanguage(), + false/* prefixMatch */, + minRelevance == null ? 0d : minRelevance.doubleValue()/* minCosine */, + maxHits == null ? 10000 : maxHits.intValue()+1/* maxRank */, 1000L/* timeout */, TimeUnit.MILLISECONDS); // hiterator = database.getSearchEngine().search // ( query.getLabel(), Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 16:25:29 UTC (rev 4033) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 19:30:51 UTC (rev 4034) @@ -677,7 +677,7 @@ } - public void testWithRelevance() throws Exception { + public void testWithMetadata() throws Exception { final BigdataSail sail = getSail(); try { @@ -755,16 +755,19 @@ " ?s <"+RDFS.LABEL+"> ?o . " + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + " ?o <"+BD.RELEVANCE+"> ?score . " + - "}"; + "} " + + "order by desc(?score)"; final TupleQuery tupleQuery = cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); tupleQuery.setIncludeInferred(true /* includeInferred */); TupleQueryResult result = tupleQuery.evaluate(); + int i = 0; while (result.hasNext()) { - System.err.println(result.next()); + System.err.println(i++ + ": " + result.next().toString()); } + assertTrue("wrong # of results", i == 7); result = tupleQuery.evaluate(); @@ -777,7 +780,7 @@ null, // languageCode false, // prefixMatch 0d, // minCosine - 10000, // maxRank + 10000, // maxRank (=maxResults + 1) 1000L, // timeout TimeUnit.MILLISECONDS // unit ); @@ -788,16 +791,140 @@ final Literal score = vf.createLiteral(hit.getCosine()); final URI s = uris.get(id); final Literal o = literals.get(id); - answer.add(createBindingSet( + final BindingSet bs = createBindingSet( new BindingImpl("s", s), new BindingImpl("o", o), - new BindingImpl("score", score))); + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); } compare(result, answer); } + { + final String searchQuery = "how now brown cow"; + final int maxHits = 5; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + +// " ?o <"+BD.MIN_RELEVANCE+"> \"0.6\" . " + + " ?o <"+BD.MAX_HITS+"> \""+maxHits+"\" . " + + "} " + + "order by desc(?score)"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + System.err.println(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results", i == 5); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + 0d, // minCosine + maxHits+1, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); + } + + compare(result, answer); + + } + + { + final String searchQuery = "how now brown cow"; + final double minRelevance = 0.6d; + + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + + " ?o <"+BD.MIN_RELEVANCE+"> \""+minRelevance+"\" . " + +// " ?o <"+BD.MAX_HITS+"> \"5\" . " + + "} " + + "order by desc(?score)"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + int i = 0; + while (result.hasNext()) { + System.err.println(i++ + ": " + result.next().toString()); + } + assertTrue("wrong # of results", i == 3); + + result = tupleQuery.evaluate(); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + minRelevance, // minCosine + 10000, // maxRank (=maxResults + 1) + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + final BindingSet bs = createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score)); + System.err.println(bs); + answer.add(bs); + } + + compare(result, answer); + + } + } finally { cxn.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 16:25:35
|
Revision: 4033 http://bigdata.svn.sourceforge.net/bigdata/?rev=4033&view=rev Author: mrpersonick Date: 2010-12-21 16:25:29 +0000 (Tue, 21 Dec 2010) Log Message: ----------- magic predicates for maxHits and minRelevance for free test search query Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 16:02:41 UTC (rev 4032) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 16:25:29 UTC (rev 4033) @@ -187,12 +187,12 @@ * select ?s ?rank * where { * ?s bd:search "scale-out RDF triplestore" . - * ?s bd:rank ?rank . + * ?s bd:maxHits "5"^^xsd:int . * } * * </pre> */ - final URI RANK = new URIImpl(SEARCH_NAMESPACE+"rank"); + final URI MAX_HITS = new URIImpl(SEARCH_NAMESPACE+"maxHits"); /** * Magic predicate used to query for free text search metadata. Use @@ -203,13 +203,13 @@ * select ?s ?matched * where { * ?s bd:search "scale-out RDF triplestore" . - * ?s bd:numMatchedTokens ?matched . + * ?s bd:minRelevance "0.5"^^xsd:double . * } * * </pre> */ - final URI NUM_MATCHED_TOKENS = new URIImpl(SEARCH_NAMESPACE+"numMatchedTokens"); - + final URI MIN_RELEVANCE = new URIImpl(SEARCH_NAMESPACE+"minRelevance"); + /** * Sesame has the notion of a "null" graph. Any time you insert a statement * into a quad store and the context position is not specified, it is This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 16:02:48
|
Revision: 4032 http://bigdata.svn.sourceforge.net/bigdata/?rev=4032&view=rev Author: mrpersonick Date: 2010-12-21 16:02:41 +0000 (Tue, 21 Dec 2010) Log Message: ----------- finished the test case Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 15:22:47 UTC (rev 4031) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 16:02:41 UTC (rev 4032) @@ -32,10 +32,13 @@ import java.io.IOException; import java.util.Collection; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; +import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.concurrent.TimeUnit; import org.openrdf.model.BNode; import org.openrdf.model.Graph; @@ -70,9 +73,18 @@ import org.openrdf.sail.SailException; import com.bigdata.journal.BufferMode; +import com.bigdata.rdf.internal.IV; +import com.bigdata.rdf.internal.TermId; +import com.bigdata.rdf.internal.VTE; +import com.bigdata.rdf.internal.XSDDoubleIV; +import com.bigdata.rdf.lexicon.ITextIndexer; +import com.bigdata.rdf.model.BigdataValue; +import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.rio.StatementBuffer; import com.bigdata.rdf.sail.BigdataSail.Options; import com.bigdata.rdf.store.BD; +import com.bigdata.search.Hiterator; +import com.bigdata.search.IHit; /** * Test suite for high-level query against a graph containing statements about @@ -702,7 +714,7 @@ cxn.add(s5, RDFS.LABEL, l5); cxn.add(s6, RDFS.LABEL, l6); cxn.add(s7, RDFS.LABEL, l7); - + /* * Note: The either flush() or commit() is required to flush the * statement buffers to the database before executing any operations @@ -710,18 +722,38 @@ */ cxn.commit(); + final Map<IV, Literal> literals = new LinkedHashMap<IV, Literal>(); + literals.put(((BigdataValue)l1).getIV(), l1); + literals.put(((BigdataValue)l2).getIV(), l2); + literals.put(((BigdataValue)l3).getIV(), l3); + literals.put(((BigdataValue)l4).getIV(), l4); + literals.put(((BigdataValue)l5).getIV(), l5); + literals.put(((BigdataValue)l6).getIV(), l6); + literals.put(((BigdataValue)l7).getIV(), l7); + + final Map<IV, URI> uris = new LinkedHashMap<IV, URI>(); + uris.put(((BigdataValue)l1).getIV(), s1); + uris.put(((BigdataValue)l2).getIV(), s2); + uris.put(((BigdataValue)l3).getIV(), s3); + uris.put(((BigdataValue)l4).getIV(), s4); + uris.put(((BigdataValue)l5).getIV(), s5); + uris.put(((BigdataValue)l6).getIV(), s6); + uris.put(((BigdataValue)l7).getIV(), s7); + /**/ if (log.isInfoEnabled()) { log.info("\n" + sail.getDatabase().dumpStore()); } - { // run the query with no graphs specified + { + final String searchQuery = "how now brown cow"; + final String query = "select ?s ?o ?score " + "where " + "{ " + " ?s <"+RDFS.LABEL+"> ?o . " + - " ?o <"+BD.SEARCH+"> \"how now brown cow\" . " + + " ?o <"+BD.SEARCH+"> \""+searchQuery+"\" . " + " ?o <"+BD.RELEVANCE+"> ?score . " + "}"; @@ -735,10 +767,35 @@ } result = tupleQuery.evaluate(); -// Collection<BindingSet> answer = new LinkedList<BindingSet>(); -// answer.add(createBindingSet(new BindingImpl("s", alice))); -// -// compare(result, answer); + + Collection<BindingSet> answer = new LinkedList<BindingSet>(); + + final ITextIndexer search = + sail.getDatabase().getLexiconRelation().getSearchEngine(); + final Hiterator<IHit> hits = + search.search(searchQuery, + null, // languageCode + false, // prefixMatch + 0d, // minCosine + 10000, // maxRank + 1000L, // timeout + TimeUnit.MILLISECONDS // unit + ); + + while (hits.hasNext()) { + final IHit hit = hits.next(); + final IV id = new TermId(VTE.LITERAL, hit.getDocId()); + final Literal score = vf.createLiteral(hit.getCosine()); + final URI s = uris.get(id); + final Literal o = literals.get(id); + answer.add(createBindingSet( + new BindingImpl("s", s), + new BindingImpl("o", o), + new BindingImpl("score", score))); + } + + compare(result, answer); + } } finally { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 15:22:53
|
Revision: 4031 http://bigdata.svn.sourceforge.net/bigdata/?rev=4031&view=rev Author: thompsonbry Date: 2010-12-21 15:22:47 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Removed an unused field declared by BD.java (ATOMIC_UPDATE_SEPARATOR_KEY). Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:17:52 UTC (rev 4030) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:22:47 UTC (rev 4031) @@ -225,14 +225,14 @@ */ URI NULL_GRAPH = new URIImpl(NAMESPACE + "nullGraph"); - /** - * We need the abiltiy to do atomic add+drop in one operation via the - * remoting interface. Thus we need the ability to place - * statements to add and to delete in the same serialized document sent - * across the wire. This separator key, when included in a comment, will - * mark the separation point between statements to drop (above the - * separator) and statements to add (below the separator). - */ - URI ATOMIC_UPDATE_SEPARATOR_KEY = new URIImpl(NAMESPACE + "atomicUpdateSeparatorKey"); +// /** +// * We need the ability to do atomic add+drop in one operation via the +// * remoting interface. Thus we need the ability to place +// * statements to add and to delete in the same serialized document sent +// * across the wire. This separator key, when included in a comment, will +// * mark the separation point between statements to drop (above the +// * separator) and statements to add (below the separator). +// */ +// URI ATOMIC_UPDATE_SEPARATOR_KEY = new URIImpl(NAMESPACE + "atomicUpdateSeparatorKey"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 15:17:58
|
Revision: 4030 http://bigdata.svn.sourceforge.net/bigdata/?rev=4030&view=rev Author: mrpersonick Date: 2010-12-21 15:17:52 +0000 (Tue, 21 Dec 2010) Log Message: ----------- comments Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 15:13:44 UTC (rev 4029) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 15:17:52 UTC (rev 4030) @@ -748,15 +748,30 @@ // generate tails Collection<IPredicate> tails = new LinkedList<IPredicate>(); + // keep a list of free text searches for later to solve a named graphs // problem final Map<IPredicate, StatementPattern> searches = new HashMap<IPredicate, StatementPattern>(); + /* + * deal with free text search tails first. need to match up search + * metadata tails with the searches themselves. ie: + * + * select * + * where { + * ?s bd:search "foo" . + * ?s bd:relevance ?score . + * } + */ + // the statement patterns for the searches themselves final Set<StatementPattern> searchMetadata1 = new LinkedHashSet<StatementPattern>(); + // the statement patterns for metadata about the searches final Map<Var, Set<StatementPattern>> searchMetadata2 = new LinkedHashMap<Var, Set<StatementPattern>>(); + // do a first pass to gather up the actual searches and take them out + // of the master list of statement patterns Iterator<Map.Entry<StatementPattern, Boolean>> it = stmtPatterns.entrySet().iterator(); while (it.hasNext()) { @@ -773,6 +788,7 @@ } } } + // do a second pass to get the search metadata it = stmtPatterns.entrySet().iterator(); while (it.hasNext()) { final StatementPattern sp = it.next().getKey(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 15:13:50
|
Revision: 4029 http://bigdata.svn.sourceforge.net/bigdata/?rev=4029&view=rev Author: mrpersonick Date: 2010-12-21 15:13:44 +0000 (Tue, 21 Dec 2010) Log Message: ----------- javadoc Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:09:07 UTC (rev 4028) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:13:44 UTC (rev 4029) @@ -65,6 +65,14 @@ */ final String NAMESPACE = "http://www.bigdata.com/rdf#"; + /** + * The namespace used for magic search predicates. + * <p> + * @see #SEARCH + * @see #RELEVANCE + * @see #RANK + * @see #NUM_MATCHED_TOKENS + */ final String SEARCH_NAMESPACE = "http://www.bigdata.com/rdf/search#"; /** @@ -154,10 +162,52 @@ */ final URI SEARCH = new URIImpl(SEARCH_NAMESPACE+"search"); + /** + * Magic predicate used to query for free text search metadata. Use + * in conjunction with {@link #SEARCH} as follows: + * <p> + * <pre> + * + * select ?s ?relevance + * where { + * ?s bd:search "scale-out RDF triplestore" . + * ?s bd:relevance ?relevance . + * } + * + * </pre> + */ final URI RELEVANCE = new URIImpl(SEARCH_NAMESPACE+"relevance"); + /** + * Magic predicate used to query for free text search metadata. Use + * in conjunction with {@link #SEARCH} as follows: + * <p> + * <pre> + * + * select ?s ?rank + * where { + * ?s bd:search "scale-out RDF triplestore" . + * ?s bd:rank ?rank . + * } + * + * </pre> + */ final URI RANK = new URIImpl(SEARCH_NAMESPACE+"rank"); + /** + * Magic predicate used to query for free text search metadata. Use + * in conjunction with {@link #SEARCH} as follows: + * <p> + * <pre> + * + * select ?s ?matched + * where { + * ?s bd:search "scale-out RDF triplestore" . + * ?s bd:numMatchedTokens ?matched . + * } + * + * </pre> + */ final URI NUM_MATCHED_TOKENS = new URIImpl(SEARCH_NAMESPACE+"numMatchedTokens"); /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <mrp...@us...> - 2010-12-21 15:09:14
|
Revision: 4028 http://bigdata.svn.sourceforge.net/bigdata/?rev=4028&view=rev Author: mrpersonick Date: 2010-12-21 15:09:07 +0000 (Tue, 21 Dec 2010) Log Message: ----------- expanding free text search functionality to include search metadata support via SPARQL Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:05:59 UTC (rev 4027) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/BD.java 2010-12-21 15:09:07 UTC (rev 4028) @@ -63,8 +63,10 @@ /** * The namespace used for bigdata specific extensions. */ - String NAMESPACE = "http://www.bigdata.com/rdf#"; + final String NAMESPACE = "http://www.bigdata.com/rdf#"; + final String SEARCH_NAMESPACE = "http://www.bigdata.com/rdf/search#"; + /** * The namespace prefix used in SPARQL queries to signify query hints. You * can embed query hints into a SPARQL query as follows: @@ -150,7 +152,13 @@ * Note: The context position should be unbound when using statement * identifiers. */ - URI SEARCH = new URIImpl(NAMESPACE+"search"); + final URI SEARCH = new URIImpl(SEARCH_NAMESPACE+"search"); + + final URI RELEVANCE = new URIImpl(SEARCH_NAMESPACE+"relevance"); + + final URI RANK = new URIImpl(SEARCH_NAMESPACE+"rank"); + + final URI NUM_MATCHED_TOKENS = new URIImpl(SEARCH_NAMESPACE+"numMatchedTokens"); /** * Sesame has the notion of a "null" graph. Any time you insert a statement Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 15:05:59 UTC (rev 4027) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataEvaluationStrategyImpl2.java 2010-12-21 15:09:07 UTC (rev 4028) @@ -8,6 +8,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -751,11 +752,50 @@ // problem final Map<IPredicate, StatementPattern> searches = new HashMap<IPredicate, StatementPattern>(); + + final Set<StatementPattern> searchMetadata1 = + new LinkedHashSet<StatementPattern>(); + final Map<Var, Set<StatementPattern>> searchMetadata2 = + new LinkedHashMap<Var, Set<StatementPattern>>(); + Iterator<Map.Entry<StatementPattern, Boolean>> it = + stmtPatterns.entrySet().iterator(); + while (it.hasNext()) { + final StatementPattern sp = it.next().getKey(); + final Value s = sp.getSubjectVar().getValue(); + final Value p = sp.getPredicateVar().getValue(); + final Value o = sp.getObjectVar().getValue(); + if (s == null && p != null && o != null) { + if (BD.SEARCH.equals(p)) { + searchMetadata1.add(sp); + searchMetadata2.put(sp.getSubjectVar(), + new LinkedHashSet<StatementPattern>()); + it.remove(); + } + } + } + it = stmtPatterns.entrySet().iterator(); + while (it.hasNext()) { + final StatementPattern sp = it.next().getKey(); + final Value s = sp.getSubjectVar().getValue(); + final Value p = sp.getPredicateVar().getValue(); + final Value o = sp.getObjectVar().getValue(); + if (s == null && p != null && o == null) { + if (BD.RELEVANCE.equals(p)) { + final Var sVar = sp.getSubjectVar(); + Set<StatementPattern> metadata = searchMetadata2.get(sVar); + if (metadata != null) { + metadata.add(sp); + } + it.remove(); + } + } + } + for (Map.Entry<StatementPattern, Boolean> entry : stmtPatterns .entrySet()) { - StatementPattern sp = entry.getKey(); - boolean optional = entry.getValue(); - IPredicate tail = generateTail(sp, optional); + final StatementPattern sp = entry.getKey(); + final boolean optional = entry.getValue(); + final IPredicate tail = generateTail(sp, optional); // encountered a value not in the database lexicon if (tail == null) { if (log.isDebugEnabled()) { @@ -769,12 +809,17 @@ return null; } } - if (tail.getSolutionExpander() instanceof FreeTextSearchExpander) { - searches.put(tail, sp); - } tails.add(tail); } + for (StatementPattern sp : searchMetadata1) { + final Set<StatementPattern> metadata = + searchMetadata2.get(sp.getSubjectVar()); + final IPredicate tail = generateSearchTail(sp, metadata); + searches.put(tail, sp); + tails.add(tail); + } + /* * When in quads mode, we need to go through the free text searches and * make sure that they are properly filtered for the dataset where @@ -826,7 +871,7 @@ boolean needsFilter = true; // check the other tails one by one for (IPredicate<ISPO> tail : tails) { - ISolutionExpander<ISPO> expander = + final ISolutionExpander<ISPO> expander = tail.getSolutionExpander(); // only concerned with non-optional tails that are not // themselves magic searches @@ -837,7 +882,7 @@ // see if the search variable appears in this tail boolean appears = false; for (int i = 0; i < tail.arity(); i++) { - IVariableOrConstant term = tail.get(i); + final IVariableOrConstant term = tail.get(i); if (log.isDebugEnabled()) { log.debug(term); } @@ -857,8 +902,8 @@ if (log.isDebugEnabled()) { log.debug("needs filter: " + searchVar); } - FreeTextSearchExpander expander = (FreeTextSearchExpander) - search.getSolutionExpander(); + final FreeTextSearchExpander expander = + (FreeTextSearchExpander) search.getSolutionExpander(); expander.addNamedGraphsFilter(graphs); } } @@ -907,9 +952,9 @@ IAccessPath<ISPO> accessPath = database.getSPORelation() .getAccessPath(tail); accessPath = expander.getAccessPath(accessPath); - IChunkedOrderedIterator<ISPO> it = accessPath.iterator(); - while (it.hasNext()) { - log.debug(it.next().toString(database)); + IChunkedOrderedIterator<ISPO> it1 = accessPath.iterator(); + while (it1.hasNext()) { + log.debug(it1.next().toString(database)); } } } @@ -1257,23 +1302,6 @@ private IPredicate generateTail(final StatementPattern stmtPattern, final boolean optional) throws QueryEvaluationException { - // create a solution expander for free text search if necessary - ISolutionExpander<ISPO> expander = null; - final Value predValue = stmtPattern.getPredicateVar().getValue(); - if (log.isDebugEnabled()) { - log.debug(predValue); - } - if (predValue != null && BD.SEARCH.equals(predValue)) { - final Value objValue = stmtPattern.getObjectVar().getValue(); - if (log.isDebugEnabled()) { - log.debug(objValue); - } - if (objValue != null && objValue instanceof Literal) { - expander = new FreeTextSearchExpander(database, - (Literal) objValue); - } - } - // @todo why is [s] handled differently? // because [s] is the variable in free text searches, no need to test // to see if the free text search expander is in place @@ -1283,26 +1311,20 @@ return null; } - final IVariableOrConstant<IV> p; - if (expander == null) { - p = generateVariableOrConstant(stmtPattern.getPredicateVar()); - } else { - p = new Constant(DummyIV.INSTANCE); - } + final IVariableOrConstant<IV> p = generateVariableOrConstant( + stmtPattern.getPredicateVar()); if (p == null) { return null; } - final IVariableOrConstant<IV> o; - if (expander == null) { - o = generateVariableOrConstant(stmtPattern.getObjectVar()); - } else { - o = new Constant(DummyIV.INSTANCE); - } + final IVariableOrConstant<IV> o = generateVariableOrConstant( + stmtPattern.getObjectVar()); if (o == null) { return null; } - + + // for default and named graph expansion + ISolutionExpander<ISPO> expander = null; final IVariableOrConstant<IV> c; if (!database.isQuads()) { /* @@ -1361,79 +1383,68 @@ } System.err.println(stmtPattern.toString()); } - if (expander != null) { - /* - * @todo can this happen? If it does then we need to look at how - * to layer the expanders. - */ - // throw new AssertionError("expander already set"); - // we are doing a free text search, no need to do any named or - // default graph expansion work - c = null; - } else { - final Var cvar = stmtPattern.getContextVar(); - if (dataset == null) { - if (cvar == null) { - /* - * There is no dataset and there is no graph variable, - * so the default graph will be the RDF Merge of ALL - * graphs in the quad store. - * - * This code path uses an "expander" which strips off - * the context information and filters for the distinct - * (s,p,o) triples to realize the RDF Merge of the - * source graphs for the default graph. - */ + final Var cvar = stmtPattern.getContextVar(); + if (dataset == null) { + if (cvar == null) { + /* + * There is no dataset and there is no graph variable, + * so the default graph will be the RDF Merge of ALL + * graphs in the quad store. + * + * This code path uses an "expander" which strips off + * the context information and filters for the distinct + * (s,p,o) triples to realize the RDF Merge of the + * source graphs for the default graph. + */ + c = null; + expander = new DefaultGraphSolutionExpander(null/* ALL */); + } else { + /* + * There is no data set and there is a graph variable, + * so the query will run against all named graphs and + * [cvar] will be to the context of each (s,p,o,c) in + * turn. This handles constructions such as: + * + * "SELECT * WHERE {graph ?g {?g :p :o } }" + */ + expander = new NamedGraphSolutionExpander(null/* ALL */); + c = generateVariableOrConstant(cvar); + } + } else { // dataset != null + switch (stmtPattern.getScope()) { + case DEFAULT_CONTEXTS: { + /* + * Query against the RDF merge of zero or more source + * graphs. + */ + expander = new DefaultGraphSolutionExpander(dataset + .getDefaultGraphs()); + /* + * Note: cvar can not become bound since context is + * stripped for the default graph. + */ + if (cvar == null) c = null; - expander = new DefaultGraphSolutionExpander(null/* ALL */); + else + c = generateVariableOrConstant(cvar); + break; + } + case NAMED_CONTEXTS: { + /* + * Query against zero or more named graphs. + */ + expander = new NamedGraphSolutionExpander(dataset + .getNamedGraphs()); + if (cvar == null) {// || !cvar.hasValue()) { + c = null; } else { - /* - * There is no data set and there is a graph variable, - * so the query will run against all named graphs and - * [cvar] will be to the context of each (s,p,o,c) in - * turn. This handles constructions such as: - * - * "SELECT * WHERE {graph ?g {?g :p :o } }" - */ - expander = new NamedGraphSolutionExpander(null/* ALL */); c = generateVariableOrConstant(cvar); } - } else { // dataset != null - switch (stmtPattern.getScope()) { - case DEFAULT_CONTEXTS: { - /* - * Query against the RDF merge of zero or more source - * graphs. - */ - expander = new DefaultGraphSolutionExpander(dataset - .getDefaultGraphs()); - /* - * Note: cvar can not become bound since context is - * stripped for the default graph. - */ - if (cvar == null) - c = null; - else - c = generateVariableOrConstant(cvar); - break; - } - case NAMED_CONTEXTS: { - /* - * Query against zero or more named graphs. - */ - expander = new NamedGraphSolutionExpander(dataset - .getNamedGraphs()); - if (cvar == null) {// || !cvar.hasValue()) { - c = null; - } else { - c = generateVariableOrConstant(cvar); - } - break; - } - default: - throw new AssertionError(); - } + break; } + default: + throw new AssertionError(); + } } } @@ -1456,6 +1467,63 @@ s, p, o, c, optional, // optional filter, // filter on elements visited by the access path. + expander // named graphs expander + ); + + } + + private IPredicate generateSearchTail(final StatementPattern sp, + final Set<StatementPattern> metadata) + throws QueryEvaluationException { + + final Value predValue = sp.getPredicateVar().getValue(); + if (log.isDebugEnabled()) { + log.debug(predValue); + } + if (predValue == null || !BD.SEARCH.equals(predValue)) { + throw new IllegalArgumentException("not a valid magic search: " + sp); + } + final Value objValue = sp.getObjectVar().getValue(); + if (log.isDebugEnabled()) { + log.debug(objValue); + } + if (objValue == null || !(objValue instanceof Literal)) { + throw new IllegalArgumentException("not a valid magic search: " + sp); + } + + final ISolutionExpander expander = + new FreeTextSearchExpander(database, (Literal) objValue); + + final Var subjVar = sp.getSubjectVar(); + + final IVariableOrConstant<IV> search = + com.bigdata.relation.rule.Var.var(subjVar.getName()); + + IVariableOrConstant<IV> relevance = new Constant(DummyIV.INSTANCE); + + for (StatementPattern meta : metadata) { + if (!meta.getSubjectVar().equals(subjVar)) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + final Value pVal = meta.getPredicateVar().getValue(); + final Var oVar = meta.getObjectVar(); + if (pVal == null || oVar.hasValue()) { + throw new IllegalArgumentException("illegal metadata: " + meta); + } + if (BD.RELEVANCE.equals(pVal)) { + relevance = com.bigdata.relation.rule.Var.var(oVar.getName()); + } + } + + return new SPOPredicate( + new String[] { database.getSPORelation().getNamespace() }, + -1, // partitionId + search, // s = searchVar + relevance, // p = relevanceVar + new Constant(DummyIV.INSTANCE), // o = reserved + new Constant(DummyIV.INSTANCE), // c = reserved + false, // optional + null, // filter on elements visited by the access path. expander // free text search expander or named graphs expander ); @@ -1707,6 +1775,12 @@ /** * Override evaluation of StatementPatterns to recognize magic search * predicate. + * + * select * + * where { + * ?s bd:search "foo" . + * ?s bd:score ?score . + * } */ @Override public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 15:05:59 UTC (rev 4027) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/FreeTextSearchExpander.java 2010-12-21 15:09:07 UTC (rev 4028) @@ -13,6 +13,7 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.TermId; import com.bigdata.rdf.internal.VTE; +import com.bigdata.rdf.internal.XSDDoubleIV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; @@ -305,9 +306,12 @@ } ISPO[] spos = new ISPO[hits.length]; for (int i = 0; i < hits.length; i++) { - IV s = new TermId(VTE.LITERAL, hits[i].getDocId()); - if (INFO) log.info("hit: " + s); - spos[i] = new SPO(s, null, null); + final IV s = new TermId(VTE.LITERAL, hits[i].getDocId()); + final IV p = new XSDDoubleIV(hits[i].getCosine()); + final IV o = null; // reserved + final IV c = null; // reserved + spos[i] = new SPO(s, p, o, c); + if (INFO) log.info("hit: " + spos[i]); } // Arrays.sort(spos, SPOKeyOrder.SPO.getComparator()); return spos; @@ -316,9 +320,12 @@ private ISPO[] convertWhenBound(IHit[] hits) { ISPO[] result = new ISPO[0]; for (IHit hit : hits) { - IV s = new TermId(VTE.LITERAL, hit.getDocId()); + final IV s = new TermId(VTE.LITERAL, hit.getDocId()); if (s == boundVal) { - result = new ISPO[] { new SPO(s, null, null) }; + final IV p = new XSDDoubleIV(hit.getCosine()); + final IV o = null; // reserved + final IV c = null; // reserved + result = new ISPO[] { new SPO(s, p, o, c) }; break; } } Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 15:05:59 UTC (rev 4027) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/test/com/bigdata/rdf/sail/TestSearchQuery.java 2010-12-21 15:09:07 UTC (rev 4028) @@ -40,10 +40,10 @@ import org.openrdf.model.BNode; import org.openrdf.model.Graph; import org.openrdf.model.Literal; -import org.openrdf.model.Resource; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; import org.openrdf.model.impl.BNodeImpl; import org.openrdf.model.impl.GraphImpl; import org.openrdf.model.impl.LiteralImpl; @@ -665,4 +665,89 @@ } + public void testWithRelevance() throws Exception { + + final BigdataSail sail = getSail(); + try { + + sail.initialize(); + final BigdataSailRepository repo = new BigdataSailRepository(sail); + final BigdataSailRepositoryConnection cxn = + (BigdataSailRepositoryConnection) repo.getConnection(); + cxn.setAutoCommit(false); + + try { + + final ValueFactory vf = sail.getValueFactory(); + + final URI s1 = vf.createURI(BD.NAMESPACE+"s1"); + final URI s2 = vf.createURI(BD.NAMESPACE+"s2"); + final URI s3 = vf.createURI(BD.NAMESPACE+"s3"); + final URI s4 = vf.createURI(BD.NAMESPACE+"s4"); + final URI s5 = vf.createURI(BD.NAMESPACE+"s5"); + final URI s6 = vf.createURI(BD.NAMESPACE+"s6"); + final URI s7 = vf.createURI(BD.NAMESPACE+"s7"); + final Literal l1 = vf.createLiteral("how"); + final Literal l2 = vf.createLiteral("now"); + final Literal l3 = vf.createLiteral("brown"); + final Literal l4 = vf.createLiteral("cow"); + final Literal l5 = vf.createLiteral("how now"); + final Literal l6 = vf.createLiteral("brown cow"); + final Literal l7 = vf.createLiteral("how now brown cow"); + + cxn.add(s1, RDFS.LABEL, l1); + cxn.add(s2, RDFS.LABEL, l2); + cxn.add(s3, RDFS.LABEL, l3); + cxn.add(s4, RDFS.LABEL, l4); + cxn.add(s5, RDFS.LABEL, l5); + cxn.add(s6, RDFS.LABEL, l6); + cxn.add(s7, RDFS.LABEL, l7); + + /* + * Note: The either flush() or commit() is required to flush the + * statement buffers to the database before executing any operations + * that go around the sail. + */ + cxn.commit(); + +/**/ + if (log.isInfoEnabled()) { + log.info("\n" + sail.getDatabase().dumpStore()); + } + + { // run the query with no graphs specified + final String query = + "select ?s ?o ?score " + + "where " + + "{ " + + " ?s <"+RDFS.LABEL+"> ?o . " + + " ?o <"+BD.SEARCH+"> \"how now brown cow\" . " + + " ?o <"+BD.RELEVANCE+"> ?score . " + + "}"; + + final TupleQuery tupleQuery = + cxn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.setIncludeInferred(true /* includeInferred */); + TupleQueryResult result = tupleQuery.evaluate(); + + while (result.hasNext()) { + System.err.println(result.next()); + } + + result = tupleQuery.evaluate(); +// Collection<BindingSet> answer = new LinkedList<BindingSet>(); +// answer.add(createBindingSet(new BindingImpl("s", alice))); +// +// compare(result, answer); + } + + } finally { + cxn.close(); + } + } finally { + sail.__tearDownUnitTest(); + } + + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 15:06:06
|
Revision: 4027 http://bigdata.svn.sourceforge.net/bigdata/?rev=4027&view=rev Author: thompsonbry Date: 2010-12-21 15:05:59 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Bug fix to AbstractJournal where it was using a local FileMetadata object rather than the instance field and thus was not reporting the FileMetadata reference to DumpJournal. Added a NAMESPACE option to the BigdataSail. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-12-21 13:55:51 UTC (rev 4026) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/journal/AbstractJournal.java 2010-12-21 15:05:59 UTC (rev 4027) @@ -880,7 +880,7 @@ } else { - final FileMetadata fileMetadata = FileMetadata.createInstance( + /*final FileMetadata*/ fileMetadata = FileMetadata.createInstance( properties, !(this instanceof Journal), quorumToken); final BufferMode bufferMode = fileMetadata.bufferMode; Modified: branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-21 13:55:51 UTC (rev 4026) +++ branches/JOURNAL_HA_BRANCH/bigdata-sails/src/java/com/bigdata/rdf/sail/BigdataSail.java 2010-12-21 15:05:59 UTC (rev 4027) @@ -340,7 +340,6 @@ public static final String DEFAULT_ALLOW_AUTO_COMMIT = "false"; - /** * Options (default <code>false</code>) creates the SPO relation with * isolatable indices to allow read/write transactions. @@ -360,6 +359,15 @@ public static final String DEFAULT_STAR_JOINS = "false"; + /** + * Option specifies the namespace of the designed KB instance (default + * {@value #DEFAULT_NAMESPACE}). + */ + public static final String NAMESPACE = BigdataSail.class.getPackage() + .getName()+ ".namespace"; + + public static final String DEFAULT_NAMESPACE = "kb"; + } /** @@ -621,8 +629,10 @@ final ITransactionService txService = journal.getTransactionManager().getTransactionService(); - final String namespace = "kb"; - + final String namespace = properties.getProperty( + BigdataSail.Options.NAMESPACE, + BigdataSail.Options.DEFAULT_NAMESPACE); + // throws an exception if there are inconsistent properties checkProperties(properties); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-21 13:55:57
|
Revision: 4026 http://bigdata.svn.sourceforge.net/bigdata/?rev=4026&view=rev Author: thompsonbry Date: 2010-12-21 13:55:51 +0000 (Tue, 21 Dec 2010) Log Message: ----------- Bug fix to predicateUsage(). It was using the wrong iterator (itr vs itr2) in the while(itr.hasNext()) construction. Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-20 23:39:20 UTC (rev 4025) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-21 13:55:51 UTC (rev 4026) @@ -2747,7 +2747,7 @@ final StringBuilder sb = new StringBuilder(); - while (itr.hasNext()) { + while (itr2.hasNext()) { final BigdataValue term = itr2.next(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tho...@us...> - 2010-12-20 23:39:27
|
Revision: 4025 http://bigdata.svn.sourceforge.net/bigdata/?rev=4025&view=rev Author: thompsonbry Date: 2010-12-20 23:39:20 +0000 (Mon, 20 Dec 2010) Log Message: ----------- Modifications to the FullTextIndex to provide support for exact matches (finally) in addition to prefix matches and to disable the fieldId field for new RDF database instances (since that field was unused and just wasting space in the indices). Modified Paths: -------------- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/IKeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -212,7 +212,7 @@ * <p> * Note: While the ASCII encoding happens to use one byte for each character * that is NOT true of the Unicode encoding. The space requirements for the - * Unicode encoding depend on the text, the Local, the collator strength, + * Unicode encoding depend on the text, the Locale, the collator strength, * and the collator decomposition mode. * <p> * Note: The <i>successor</i> option is designed to encapsulate some Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/btree/keys/KeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -627,7 +627,7 @@ } - final public KeyBuilder append(double d) { + final public KeyBuilder append(final double d) { // performance tweak. if (len + 8 > buf.length) ensureCapacity(len+8); @@ -648,7 +648,7 @@ } - static public double decodeDouble(byte[] key,int off) { + static public double decodeDouble(final byte[] key,final int off) { long v = decodeLong(key, off); @@ -663,7 +663,7 @@ } - final public KeyBuilder append(float f) { + final public KeyBuilder append(final float f) { // performance tweak. if (len + 4 > buf.length) ensureCapacity(len+4); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/FullTextIndex.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -76,7 +76,6 @@ import com.bigdata.journal.IIndexManager; import com.bigdata.journal.IResourceLock; import com.bigdata.journal.ITx; -import com.bigdata.journal.TemporaryStore; import com.bigdata.journal.TimestampUtility; import com.bigdata.relation.AbstractRelation; import com.bigdata.relation.accesspath.IAccessPath; @@ -344,6 +343,17 @@ String INDEXER_TIMEOUT = "indexer.timeout"; String DEFAULT_INDEXER_TIMEOUT = "1000"; + + /** + * When <code>true</code>, the <code>fieldId</code> is stored as part of + * the key. When <code>false</code>, each key will be four bytes + * shorter. Applications which do not use <code>fieldId</code> are + * encouraged to disable it when creating the {@link FullTextIndex}. + */ + String FIELDS_ENABLED = FullTextIndex.class.getName() + + ".fieldsEnabled"; + + String DEFAULT_FIELDS_ENABLED = "true"; } @@ -367,6 +377,21 @@ private final long timeout; /** + * @see Options#FIELDS_ENABLED + */ + private final boolean fieldsEnabled; + + /** + * Return the value configured by the {@link Options#FIELDS_ENABLED} + * property. + */ + public boolean isFieldsEnabled() { + + return fieldsEnabled; + + } + + /** * The basename of the search index. */ public static final transient String NAME_SEARCH = "search"; @@ -434,6 +459,16 @@ } + { + + fieldsEnabled = Boolean.parseBoolean(properties.getProperty( + Options.FIELDS_ENABLED, Options.DEFAULT_FIELDS_ENABLED)); + + if (log.isInfoEnabled()) + log.info(Options.FIELDS_ENABLED + "=" + fieldsEnabled); + + } + /* * Note: defer resolution of the index. */ @@ -950,7 +985,7 @@ return tokenStream; } - + /** * Create a key for a term. * @@ -959,21 +994,26 @@ * @param token * The token whose key will be formed. * @param successor - * When <code>true</code> the successor of the token's text - * will be encoded into the key. This is useful when forming the + * When <code>true</code> the successor of the token's text will + * be encoded into the key. This is useful when forming the * <i>toKey</i> in a search. + * @param fieldsEnabled + * When <code>true</code> the <code>fieldId</code> will be + * included as a component in the generated key. When + * <code>false</code> it will not be present in the generated + * key. * @param docId - * The document identifier - use {@link Long#MIN_VALUE} when forming a - * search key. + * The document identifier - use {@link Long#MIN_VALUE} when + * forming a search key. * @param fieldId - * The field identifier - use {@link Integer#MIN_VALUE} when forming a - * search key. + * The field identifier - use {@link Integer#MIN_VALUE} when + * forming a search key. * * @return The key. */ static protected byte[] getTokenKey(final IKeyBuilder keyBuilder, - final String termText, final boolean successor, final long docId, - final int fieldId) { + final String termText, final boolean successor, + final boolean fieldsEnabled, final long docId, final int fieldId) { keyBuilder.reset(); @@ -982,14 +1022,16 @@ keyBuilder.append(docId); - keyBuilder.append(fieldId); + if (fieldsEnabled) + keyBuilder.append(fieldId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { - log.debug("{" + termText + "," + docId + "," + fieldId - + "}, successor=" + (successor?"true ":"false") + ", key=" + log.debug("{" + termText + "," + docId + + (fieldsEnabled ? "," + fieldId : "") + "}, successor=" + + (successor ? "true " : "false") + ", key=" + BytesUtil.toString(key)); } @@ -1146,15 +1188,15 @@ * The collection of hits is scored and hits that fail a threshold are * discarded. The remaining hits are placed into a total order and the * caller is returned an iterator which can read from that order. If the - * operation is interrupted, then only those {@link IHit}s that have - * already been computed will be returned. + * operation is interrupted, then only those {@link IHit}s that have already + * been computed will be returned. * * @param query * The query (it will be parsed into tokens). * @param languageCode * The language code that should be used when tokenizing the - * query -or- <code>null</code> to use the default - * {@link Locale}). + * query -or- <code>null</code> to use the default {@link Locale} + * ). * @param minCosine * The minimum cosine that will be returned. * @param maxRank @@ -1169,22 +1211,17 @@ * * @return The hit list. * - * @todo note that we can not incrementally materialize the search results - * since they are being delivered in rank order and the search - * algorithm achieves rank order by a post-search sort. mg4j supports + * @todo Note: we can not incrementally materialize the search results since + * they are being delivered in rank order and the search algorithm + * achieves rank order by a post-search sort. mg4j supports * incremental evaluation and would be a full-featured replacement for * this package. * - * @todo manage the life of the result sets and perhaps serialize them onto - * an index backed by a {@link TemporaryStore}. The fromIndex/toIndex - * might be with respect to that short-term result set. Reclaim result - * sets after N seconds. - * - * @todo consider other kinds of queries that we might write here. For + * @todo Consider other kinds of queries that we might write here. For * example, full text search should support AND OR NOT operators for * tokens. * - * @todo allow search within field(s). This will be a filter on the range + * @todo Allow search within field(s). This will be a filter on the range * iterator that is sent to the data service such that the search * terms are visited only when they occur in the matching field(s). */ @@ -1331,7 +1368,7 @@ log.info("Done: " + nhits + " hits in " + elapsed + "ms"); /* - * Note: The caller will only see those documents which satisify both + * Note: The caller will only see those documents which satisfy both * constraints (minCosine and maxRank). Results below a threshold will * be pruned. Any relevant results exceeding the maxRank will be pruned. */ Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hit.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -14,17 +14,17 @@ */ public class Hit implements IHit, Comparable<Hit>{ - final protected static Logger log = Logger.getLogger(Hit.class); + final private static transient Logger log = Logger.getLogger(Hit.class); - /** - * True iff the {@link #log} level is INFO or less. - */ - final protected static boolean INFO = log.isInfoEnabled(); - - /** - * True iff the {@link #log} level is DEBUG or less. - */ - final protected static boolean DEBUG = log.isDebugEnabled(); +// /** +// * True iff the {@link #log} level is INFO or less. +// */ +// final protected static boolean INFO = log.isInfoEnabled(); +// +// /** +// * True iff the {@link #log} level is DEBUG or less. +// */ +// final protected static boolean DEBUG = log.isDebugEnabled(); /** note: defaults to an illegal value. */ private long docId = -1; @@ -47,7 +47,7 @@ } - void setDocId(long docId) { + void setDocId(final long docId) { this.docId = docId; @@ -77,13 +77,13 @@ /** * Adds another component to the cosine. */ - public void add(String term, double weight) { + public void add(final String term, final double weight) { cosine += weight; nterms ++; - if(DEBUG) { + if(log.isDebugEnabled()) { log.debug("docId=" + docId + ", term: " + term + ", nterms=" + nterms + ", weight=" + weight + ", cosine=" + cosine); @@ -102,7 +102,7 @@ * Sorts {@link Hit}s into decreasing cosine order with ties broken by the * the <code>docId</code>. */ - public int compareTo(Hit o) { + public int compareTo(final Hit o) { if (cosine < o.cosine) return 1; Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/Hiterator.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -100,7 +100,7 @@ /** * The #of hits (approximate). * - * @todo differentiate between the #of hits and the #of hits that satisify + * @todo differentiate between the #of hits and the #of hits that satisfy * the minCosine and maxRank criteria * * @todo this and other search engine metadata (elapsed time) might go on a @@ -144,7 +144,7 @@ if(!hasNext()) throw new NoSuchElementException(); - A tmp = nextHit; + final A tmp = nextHit; nextHit = null; @@ -171,9 +171,6 @@ /** * @throws UnsupportedOperationException - * - * @todo should this even be supported? it makes no sense unless you can - * restart the iterator. */ public void remove() { Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/ReadIndexTask.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -35,7 +35,7 @@ */ public class ReadIndexTask implements Callable<Object> { - final protected static Logger log = Logger.getLogger(ReadIndexTask.class); + final private static Logger log = Logger.getLogger(ReadIndexTask.class); // /** // * True iff the {@link #log} level is INFO or less. @@ -48,8 +48,10 @@ // final protected static boolean DEBUG = log.isDebugEnabled(); private final String queryTerm; - private final boolean prefixMatch; +// private final boolean prefixMatch; +// private final int exactMatchLength; private final double queryTermWeight; + private final boolean fieldsEnabled; // private final FullTextIndex searchEngine; private final ConcurrentHashMap<Long, Hit> hits; private final ITupleIterator itr; @@ -95,36 +97,69 @@ this.queryTerm = termText; - this.prefixMatch = prefixMatch; +// this.prefixMatch = prefixMatch; this.queryTermWeight = queryTermWeight; + this.fieldsEnabled = searchEngine.isFieldsEnabled(); + // this.searchEngine = searchEngine; this.hits = hits; final IKeyBuilder keyBuilder = searchEngine.getKeyBuilder(); + +// if (!prefixMatch) { +// /* +// * Figure out how many bytes are in the Unicode sort key for the +// * termText. In order to be an exact match, the visited tuples may +// * not have more than this many bytes before the start of the docId +// * field. (It is not possible for them to have fewer bytes since the +// * Unicode sort key prefix length will be the same for both the +// * fromKey and the toKey. The Unicode sort key for the toKey is +// * formed by adding one to the LSB position). +// */ +// +// keyBuilder +// .appendText(termText, true/* unicode */, false/* successor */); +// +// exactMatchLength = keyBuilder.getLength(); +// +// } else { +// +// // ignored. +// exactMatchLength = -1; +// +// } + /* + * FIXME This would appear to start in the middle of the docId and + * fieldId value space since I would assume that Long.MIN_VALUE is the + * first docId. + */ final byte[] fromKey = FullTextIndex.getTokenKey(keyBuilder, termText, - false/* successor */, 0L/* docId */, 0/* fieldId */); + false/* successor */, fieldsEnabled, Long.MIN_VALUE/* docId */, + Integer.MIN_VALUE/* fieldId */); final byte[] toKey; // FIXME prefixMatch can not be turned off right now. -// if (prefixMatch) { + if (prefixMatch) { /* * Accepts anything starting with the search term. E.g., given * "bro", it will match "broom" and "brown" but not "break". */ + toKey = FullTextIndex.getTokenKey(keyBuilder, termText, + true/* successor */, fieldsEnabled, Long.MIN_VALUE/* docId */, + Integer.MIN_VALUE/* fieldId */); + } else { + /* + * Accepts only those entries that exactly match the search term. + */ toKey = FullTextIndex.getTokenKey(keyBuilder, termText, - true/* successor */, Long.MIN_VALUE/* docId */, Integer.MIN_VALUE/* fieldId */); -// } else { -// /* -// * Accepts only those entries that exactly match the search term. -// */ -// toKey = FullTextIndex.getTokenKey(keyBuilder, termText+"\0", -// false/* successor */, 0L/* docId */, 0/* fieldId */); -// } + false/* successor */, fieldsEnabled, + Long.MAX_VALUE/* docId */, Integer.MAX_VALUE/* fieldId */); + } if (log.isDebugEnabled()) log.debug // System.err.println @@ -161,7 +196,8 @@ while (itr.hasNext()) { - if (t.isInterrupted()) { + // don't test for interrupted on each result -- too much work. + if (nhits % 100 == 0 && t.isInterrupted()) { if (log.isInfoEnabled()) log.info("Interrupted: queryTerm=" + queryTerm + ", nhits=" @@ -182,10 +218,29 @@ // - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/); final ByteArrayBuffer kbuf = tuple.getKeyBuffer(); + + /* + * The byte offset of the docId in the key. + * + * Note: This is also the byte length of the match on the unicode + * sort key, which appears at the head of the key. + */ + final int docIdOffset = kbuf.limit() - Bytes.SIZEOF_LONG /* docId */ + - (fieldsEnabled ? Bytes.SIZEOF_INT/* fieldId */: 0); + +// if (!prefixMatch && docIdOffset != exactMatchLength) { +// +// /* +// * The Unicode sort key associated with this tuple is longer +// * than the given token - hence it can not be an exact match. +// */ +// +// continue; +// +// } // decode the document identifier. - final long docId = KeyBuilder.decodeLong(kbuf.array(), kbuf.limit() - - Bytes.SIZEOF_LONG /*docId*/ - Bytes.SIZEOF_INT/*fieldId*/); + final long docId = KeyBuilder.decodeLong(kbuf.array(), docIdOffset); /* * Extract the term frequency and normalized term-frequency (term Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermFrequencyData.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -27,8 +27,9 @@ */ final public HashMap<String,TermMetadata> terms = new HashMap<String,TermMetadata>(); - public TermFrequencyData(long docId, int fieldId, String token) { - + public TermFrequencyData(final long docId, final int fieldId, + final String token) { + this.docId = docId; this.fieldId = fieldId; @@ -43,9 +44,10 @@ * @param token * The token. * - * @return true iff the termText did not previously exist for this {@link TermFrequencyData}. + * @return true iff the termText did not previously exist for this + * {@link TermFrequencyData}. */ - public boolean add(String token) { + public boolean add(final String token) { final String termText = token; @@ -127,7 +129,7 @@ for(TermMetadata md : terms.values()) { - int termFreq = md.termFreq(); + final int termFreq = md.termFreq(); md.localTermWeight = (double)termFreq / magnitude; Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TermMetadata.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -2,8 +2,6 @@ import java.util.ArrayList; -import org.apache.lucene.analysis.Token; - /** * Mutable metadata for the occurrences of a term within a field of some * document. @@ -33,8 +31,7 @@ */ public double localTermWeight; - // @todo make private? - ArrayList<String> occurrences = new ArrayList<String>(); + private final ArrayList<String> occurrences = new ArrayList<String>(); /** * Add an occurrence. Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/java/com/bigdata/search/TokenBuffer.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -3,10 +3,7 @@ import java.util.Arrays; import java.util.Iterator; -import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KV; @@ -319,12 +316,12 @@ final String termText = termMetadata.termText(); final byte[] key = FullTextIndex.getTokenKey(keyBuilder, termText, - false/* successor */, docId, fieldId); + false/* successor */, textIndexer.isFieldsEnabled(), docId, fieldId); if(log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "," + fieldId + "}: #occurences=" - + termMetadata.occurrences.size()); + + termMetadata.termFreq()); } final byte[] val = textIndexer.getTokenValue(buf, termMetadata); Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestKeyBuilder.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -33,7 +33,6 @@ import junit.framework.TestCase2; import com.bigdata.btree.BytesUtil; -import com.bigdata.btree.ITupleSerializer; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.search.FullTextIndex.Options; @@ -43,8 +42,6 @@ * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ - * - * @todo write tests in which the docId is a negative long integer. */ public class TestKeyBuilder extends TestCase2 { @@ -94,36 +91,35 @@ private IKeyBuilder keyBuilder; /** - * @todo this test needs to populate an index with terms that would match on - * a prefix match and then verify that they do match and that terms - * that are not prefix matches do not match. - */ - public void test_prefixMatch_unicode() { - - } - - /** * Unit test verifies the relative sort order of a term and its successor, * of a prefix of that term and its successor, and that the prefix and the * successor of the prefix are ordered before and after the term and its * successor respectively. */ public void test_keyOrder() { - + + doKeyOrderTest(-1L/*docId*/, 0/*fieldId*/, true/*fieldsEnabled*/); + doKeyOrderTest(0L/*docId*/, 0/*fieldId*/, true/*fieldsEnabled*/); + doKeyOrderTest(1L/*docId*/, 12/*fieldId*/, true/*fieldsEnabled*/); + + doKeyOrderTest(-1L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + doKeyOrderTest(0L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + doKeyOrderTest(1L/*docId*/, 0/*fieldId*/, false/*fieldsEnabled*/); + + } + + protected void doKeyOrderTest(final long docId, final int fieldId, + final boolean fieldsEnabled) { + final IKeyBuilder keyBuilder = getKeyBuilder(); - final long docId = 0L; - - final int fieldId = 0; - - // the full term. final byte[] k0 = FullTextIndex.getTokenKey(keyBuilder, "brown", - false/* successor */, docId, fieldId); + false/* successor */, fieldsEnabled, docId, fieldId); // the successor of the full term. final byte[] k0s = FullTextIndex.getTokenKey(keyBuilder, "brown", - true/* successor */, docId, fieldId); + true/* successor */, fieldsEnabled, docId, fieldId); // verify sort key order for the full term and its successor. assertTrue(BytesUtil.compareBytes(k0, k0s) < 0); @@ -131,11 +127,11 @@ // a prefix of that term. final byte[] k1 = FullTextIndex.getTokenKey(keyBuilder, "bro", - false/* successor */, docId, fieldId); + false/* successor */, fieldsEnabled, docId, fieldId); // the successor of that prefix. final byte[] k1s = FullTextIndex.getTokenKey(keyBuilder, "bro", - true/* successor */, docId, fieldId); + true/* successor */, fieldsEnabled, docId, fieldId); // verify sort key order for prefix and its successor. assertTrue(BytesUtil.compareBytes(k0, k0s) < 0); @@ -184,76 +180,4 @@ } -/* - * @todo Finish the exact match test. - */ -// /** -// * @todo this test needs to populate an index with terms that would match if -// * we were allowing a prefix match and then verify that the terms are -// * NOT matched. it should also verify that terms that are exact -// * matches are matched. -// * -// * @todo also test ability to extract the docId and fieldId from the key. -// * -// * @todo refactor into an {@link ITupleSerializer}. -// * -// * @todo make the fieldId optional in the key. this needs to be part of the -// * state of the {@link ITupleSerializer}. -// */ -// public void test_exactMatch_unicode() { -// -// final IKeyBuilder keyBuilder = getKeyBuilder(); -// -// final long docId = 0L; -// -// final int fieldId = 0; -// -// -// // the full term. -// final byte[] termSortKey = FullTextIndex.getTokenKey(keyBuilder, "brown", -// false/* successor */, docId, fieldId); -// -// // the successor of the full term allowing prefix matches. -// final byte[] termPrefixMatchSuccessor = FullTextIndex.getTokenKey(keyBuilder, "brown", -// true/* successor */, docId, fieldId); -// -//// // the successor of the full term for an exact match. -//// final byte[] termExactMatchSuccessor = FullTextIndex.getTokenKey( -//// keyBuilder, "brown \0", true/* successor */, docId, fieldId); -//// -//// /* -//// * verify sort key order for the full term and its prefix match -//// * successor. -//// */ -//// LT(termSortKey, termPrefixMatchSuccessor); -// -//// /* -//// * verify sort key for the full term orders before its exact match -//// * successor. -//// */ -//// LT(termSortKey, termExactMatchSuccessor); -// -// // term that is longer than the full term. -// final byte[] longerTermSortKey = FullTextIndex.getTokenKey(keyBuilder, -// "browns", false/* successor */, docId, fieldId); -// -// // verify sort order for the full term and the longer term. -// LT(termSortKey, longerTermSortKey); -// -// /* -// * verify longer term is less than the prefix match successor of the -// * full term. -// */ -// LT(longerTermSortKey, termPrefixMatchSuccessor); -// -//// /* -//// * verify longer term is greater than the exact match successor of the -//// * full term. -//// */ -//// GT(longerTermSortKey, termExactMatchSuccessor); -// -// fail("finish test"); -// -// } - } Modified: branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata/src/test/com/bigdata/search/TestPrefixSearch.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -36,8 +36,10 @@ import com.bigdata.journal.ProxyTestCase; /** - * Unit test for prefix search. Prefix search allows a query "bro" to match - * "brown" rather than requiring an exact match on the search term(s). + * Unit test for prefix and exact match searches. Prefix search allows a query + * "bro" to match "brown" rather than requiring an exact match on the search + * term(s). Exact match searches should only visit tuples which match the full + * length of the token (once encoded as a Unicode sort key). * * @author <a href="mailto:tho...@us...">Bryan Thompson</a> * @version $Id$ @@ -148,12 +150,28 @@ } /* + * Search (one term, prefix match on that term in both documents + * (the prefix match is an exact match in this case)). + */ + { + + final Hiterator itr = ndx + .search("brown", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(2, itr.size()); + + } + + /* * Search (one term, exact match on that term in both documents). */ { final Hiterator itr = ndx - .search("brown", languageCode, false/*prefixMatch*/); + .search("brown", languageCode, true/* prefixMatch */); if(INFO) log.info("hits:" + itr); @@ -176,18 +194,65 @@ } /* + * Search (one term, no exact match on that term). + */ + { + + final Hiterator itr = ndx + .search("bro", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(0, itr.size()); + + } + + /* * Search (one term, prefix match on that term in one document). */ { - final Hiterator itr = ndx.search("qui", languageCode); + final Hiterator itr = ndx + .search("qui", languageCode, true/* prefixMatch */); - if(INFO) log.info("hits:" + itr); + if (INFO) + log.info("hits:" + itr); assertEquals(1, itr.size()); } + /* + * Search (one term, no exact match on that term). + */ + { + + final Hiterator itr = ndx + .search("qui", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(0, itr.size()); + + } + + /* + * Search (one term, exact match on that term in one document). + */ + { + + final Hiterator itr = ndx + .search("quick", languageCode, false/* prefixMatch */); + + if (INFO) + log.info("hits:" + itr); + + assertEquals(1, itr.size()); + + } + } finally { indexManager.destroy(); Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/lexicon/BigdataRDFFullTextIndex.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -37,7 +37,6 @@ import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.store.AbstractTripleStore; -import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.search.FullTextIndex; import com.bigdata.search.TokenBuffer; @@ -94,7 +93,9 @@ } - public void index(int capacity, Iterator<BigdataValue> valuesIterator) { + public void index(final int capacity, + final Iterator<BigdataValue> valuesIterator) { + final TokenBuffer buffer = new TokenBuffer(capacity, this); int n = 0; @@ -141,8 +142,7 @@ final IV termId = val.getIV(); - assert termId != null; // the termId must have been - // assigned. + assert termId != null; // the termId must have been assigned. // don't bother text indexing inline values for now if (termId.isInline()) { Modified: branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java =================================================================== --- branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-20 12:06:07 UTC (rev 4024) +++ branches/JOURNAL_HA_BRANCH/bigdata-rdf/src/java/com/bigdata/rdf/store/AbstractTripleStore.java 2010-12-20 23:39:20 UTC (rev 4025) @@ -880,7 +880,7 @@ String DEFAULT_INLINE_BNODES = "false"; - /** + /** * Set up database to inline date/times directly into the statement * indices rather than using the lexicon to map them to term identifiers * and back. Date times will be converted to UTC, then stored as @@ -894,14 +894,14 @@ String DEFAULT_INLINE_DATE_TIMES = "false"; /** - * The name of the {@link IExtensionFactory} class. The implementation - * MUST declare a constructor that accepts an - * {@link IDatatypeURIResolver} as its only argument. The - * {@link IExtension}s constructed by the factory need a resolver to - * resolve datatype URIs to term identifiers in the database. - * - * @see #DEFAULT_EXTENSION_FACTORY_CLASS - */ + * The name of the {@link IExtensionFactory} class. The implementation + * MUST declare a constructor that accepts an + * {@link IDatatypeURIResolver} as its only argument. The + * {@link IExtension}s constructed by the factory need a resolver to + * resolve datatype URIs to term identifiers in the database. + * + * @see #DEFAULT_EXTENSION_FACTORY_CLASS + */ String EXTENSION_FACTORY_CLASS = AbstractTripleStore.class.getName() + ".extensionFactoryClass"; @@ -1255,6 +1255,20 @@ // set property that will let the contained relations locate their container. tmp.setProperty(RelationSchema.CONTAINER, getNamespace()); + if (Boolean.valueOf(tmp.getProperty(Options.TEXT_INDEX, + Options.DEFAULT_TEXT_INDEX))) { + + /* + * If the text index is enabled for a new kb instance, then disable + * the fieldId component of the full text index key since it is not + * used by the RDF database and will just waste space in the index. + * + * Note: Also see below where this is set on the global row store. + */ + tmp.setProperty(FullTextIndex.Options.FIELDS_ENABLED, "false"); + + } + final IResourceLock resourceLock = acquireExclusiveLock(); try { @@ -1336,7 +1350,7 @@ ((BaseAxioms)axioms).init(); } - + /* * Update the global row store to set the axioms and the * vocabulary objects. @@ -1354,6 +1368,14 @@ // vocabulary. map.put(TripleStoreSchema.VOCABULARY, vocab); + if (lexiconRelation.isTextIndex()) { + /* + * Per the logic and commentary at the top of create(), + * disable this option on the global row store. + */ + map.put(FullTextIndex.Options.FIELDS_ENABLED, "false"); + } + // Write the map on the row store. getIndexManager().getGlobalRowStore().write( RelationSchema.INSTANCE, map); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |