From: <amy...@us...> - 2012-05-10 11:51:17
|
Revision: 2054 http://ogsa-dai.svn.sourceforge.net/ogsa-dai/?rev=2054&view=rev Author: amykrause Date: 2012-05-10 11:51:09 +0000 (Thu, 10 May 2012) Log Message: ----------- Builder for equi-join batch implementation of join. Added more operators and expressions to cardinality statistics optimiser. Fixed bug with COUNT(*) in scalar group by operator. Modified Paths: -------------- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/common/simple/SimpleTableSchema.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchTableScanBuilder.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/PipelineWorkflowBuilder.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/AttributeStatistics.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityEstimator.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityInnerThetaJoinVisitor.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySelectVisitor.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySemiJoinVisitor.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityUtils.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/HistogramBasedAttributeStatistics.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/HistogramBinAligner.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/HistogramCardinalityUtils.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/ScalarAttributeStatistics.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/ScalarCardinalityUtils.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/SimpleCardinalityStatistics.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/operators/GroupByOperator.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/operators/ScalarGroupByOperator.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/optimiser/implosion/ExtendedTableScanImplosionOptimiser.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/optimiser/join/EquiJoinBatch.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/optimiser/join/EquiJoinBatchPlan.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/optimiser/join/InnerThetaJoinPossibleJoin.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/optimiser/join/JoinImplementationOptimiser.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/expression/arithmetic/visitors/AttrExtrArithmeticExprVisitor.java Added Paths: ----------- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchJoinBuilder.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/OLD_BatchTableScanBuilder.java Removed Paths: ------------- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/OLD_EstimatingOperatorVisitor.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/OLD_HistogramBasedAttributeStatistics.java sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/OLD_HistogramUtils.java Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/common/simple/SimpleTableSchema.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/common/simple/SimpleTableSchema.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/common/simple/SimpleTableSchema.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -97,10 +97,30 @@ String localName, DataNode dataNode) { + this(schema, localName, dataNode, null); + } + + /** + * Constructs a new table schema. + * + * @param schema + * table schema + * @param localName + * table name + * @param dataNode + * data node that accesses the table + */ + public SimpleTableSchema( + TableMetaData schema, + String localName, + DataNode dataNode, + PhysicalSchema physicalSchema) + { mDataNodeTables = new ArrayList<DataNodeTable>(); mDataNodeTables.add(new SimpleDataNodeTable(dataNode, localName)); mName = schema.getName(); mSchema = schema; + mPhysicalSchema = physicalSchema; } public TableMetaData getSchema() Added: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchJoinBuilder.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchJoinBuilder.java (rev 0) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchJoinBuilder.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -0,0 +1,404 @@ +// Copyright (c) The University of Edinburgh, 2012. +// +// LICENCE-START +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// LICENCE-END + +package uk.org.ogsadai.dqp.execute.workflow; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import uk.org.ogsadai.client.toolkit.SingleActivityOutput; +import uk.org.ogsadai.client.toolkit.activities.generic.GenericActivity; +import uk.org.ogsadai.common.msgs.DAILogger; +import uk.org.ogsadai.data.DataValue; +import uk.org.ogsadai.data.IntegerData; +import uk.org.ogsadai.data.StringData; +import uk.org.ogsadai.dqp.execute.ActivityConstructionException; +import uk.org.ogsadai.dqp.lqp.Attribute; +import uk.org.ogsadai.dqp.lqp.Operator; +import uk.org.ogsadai.dqp.lqp.operators.InnerThetaJoinOperator; +import uk.org.ogsadai.tuple.ColumnMetadata; +import uk.org.ogsadai.tuple.TupleMetadata; + +/** + * Builds activities for operator QUERY APPLY operator. The apply controls + * a child table scan so that it is executed multple times with subsets of + * the data that comes from the other input. This builder works in conjunction + * with the BatchTableScanBuilder for building the FILTERED TABLE SCAN operator. + * + * @author The OGSA-DAI Project Team. + */ +public class BatchJoinBuilder implements ActivityPipelineBuilder +{ + public static final DAILogger LOG = + DAILogger.getLogger(BatchJoinBuilder.class); + + /** Copyright notice. */ + private static final String COPYRIGHT_NOTICE = + "Copyright (c) The University of Edinburgh, 2012"; + + // default batch size + private DataValue mBatchSize = new IntegerData(100); + + public void setBatchSize(int batchSize) + { + mBatchSize = new IntegerData(batchSize); + } + + @Override + public SingleActivityOutput build( + Operator op, + List<SingleActivityOutput> outputs, PipelineWorkflowBuilder builder) + throws ActivityConstructionException + { + InnerThetaJoinOperator operator = (InnerThetaJoinOperator)op; + + int batchDataIndex = (Integer)operator.getAnnotation("branch"); + int dataIndex = 1 - batchDataIndex; + TupleMetadata batchMetadata = + operator.getChild(batchDataIndex).getHeading().getTupleMetadata(); + TupleMetadata readFirstMetadata = + operator.getChild(dataIndex).getHeading().getTupleMetadata(); + + + // Construct the ListSplit activity + GenericActivity listSplit = + new GenericActivity("uk.org.ogsadai.ListSplit"); + listSplit.createInput("input"); + listSplit.createInput("size"); + listSplit.createOutput("output"); + listSplit.connectInput("input", outputs.get(dataIndex)); + listSplit.addInput("size", mBatchSize); + + GenericActivity tee = + new GenericActivity("uk.org.ogsadai.Tee"); + tee.createInput("input"); + tee.createOutput("output", 2); + tee.connectInput("input", listSplit.getOutput("output")); + + // Build the join operator + List<SingleActivityOutput> joinInputs = + new ArrayList<SingleActivityOutput>(2); + + // output from ListSplit must be read first by the join activity + // otherwise the workflow may deadlock + joinInputs.add(tee.getOutput("output",0)); + joinInputs.add(outputs.get(batchDataIndex)); + + SingleActivityOutput joinOutput = buildJoinForRepeatedUse( + operator, "data1", 0, + readFirstMetadata, batchMetadata, + joinInputs, builder); + + // Construct the ToSingleList activity + GenericActivity toSingleList = + new GenericActivity("uk.org.ogsadai.ToSingleList"); + toSingleList.createInput("input"); + toSingleList.createOutput("output"); + toSingleList.connectInput("input", joinOutput); + + // Construct GroupBy activity + GenericActivity groupBy = + new GenericActivity("uk.org.ogsadai.GroupBy"); + groupBy.createInput("data"); + groupBy.createInput("columnIds"); + groupBy.createInput("aggregates"); + groupBy.createInput("resultColumnNames"); + groupBy.createOutput("result"); + + // Connect columnIds input + GenericActivity columnIdsControlledRepeat = + BuilderUtils.createControlledRepeat(); + columnIdsControlledRepeat.connectInput( + "input",tee.getOutput("output",1)); + columnIdsControlledRepeat.addInputList( + "repeatedInput", new String[] {} ); + groupBy.connectInput( + "columnIds", + columnIdsControlledRepeat.getOutput("repeatedOutput")); + + @SuppressWarnings("unchecked") + Set<Attribute> attrToBind = + (Set<Attribute>) operator.getAnnotation("attributesToBind"); +// intersect( +// operator.getChild(dataIndex).getHeading().getAttributes(), +// operator.getPredicate().getAttributes(), +// AttributeMatchMode.NAME_AND_NULL_SOURCE); + + String[] aggregateExpressions = + new String[] { + "STRING_AGGREGATE(" + attrToBind.iterator().next() + ")"}; + String[] aggregateNames = new String[] { "dqpBatchData_267276" }; + + // Connect aggregates input + GenericActivity aggregatesControlledRepeat = + BuilderUtils.createControlledRepeat(); + aggregatesControlledRepeat.connectInput( + "input",columnIdsControlledRepeat.getOutput("output")); + aggregatesControlledRepeat.addInputList( + "repeatedInput", aggregateExpressions); + groupBy.connectInput( + "aggregates", + aggregatesControlledRepeat.getOutput("repeatedOutput")); + + // Connect resultColumnNames input + GenericActivity resultColumnNamesControlledRepeat = + BuilderUtils.createControlledRepeat(); + resultColumnNamesControlledRepeat.connectInput( + "input",aggregatesControlledRepeat.getOutput("output")); + resultColumnNamesControlledRepeat.addInputList( + "repeatedInput", aggregateNames); + groupBy.connectInput( + "resultColumnNames", + resultColumnNamesControlledRepeat.getOutput("repeatedOutput")); + + // Connect the data input + groupBy.connectInput( + "data", + resultColumnNamesControlledRepeat.getOutput("output")); + + // Connect the GroupBy activity to the external input of the filtered + // table scan operator + Operator filteredTableScanOp = getFilteredTableScanOperator(operator); + builder.addOutput( + operator, filteredTableScanOp, groupBy.getOutput("result")); + + // Add all activities to the builder + builder.add(listSplit); + builder.add(tee); + builder.add(toSingleList); + builder.add(groupBy); + builder.add(columnIdsControlledRepeat); + builder.add(aggregatesControlledRepeat); + builder.add(resultColumnNamesControlledRepeat); + + return toSingleList.getOutput("output"); + } + + + private static Operator getFilteredTableScanOperator(Operator op) + throws ActivityConstructionException + { + Operator tableScan = (Operator)op.getAnnotation("tableScan"); + if (tableScan == null) + { + throw new ActivityConstructionException( + new Exception("No filtered table scan operator found.")); + } + return tableScan; + } + + /** + * Builds the theta join for repeated use. This is required when the theta + * join will be invoked multiple times as part of a batch join execution. + * When the theta join is invoked multiple times we require more controlled + * repeaters to coordinate the inputs. + * + * @param operator + * LQP operator for which to build activities + * @param readFirst + * the side which is read first by the join activity + * @param controllingOutput + * the output which controls the repeated expression + * @param outputs + * outputs from the children of the operator which have already + * been built + * @param builder + * pipeline workflow builder + * + * @return the unconnected activity output + * + * @throws ActivityConstructionException + */ + private SingleActivityOutput buildJoinForRepeatedUse( + InnerThetaJoinOperator operator, + String readFirst, + int controllingOutput, + TupleMetadata metadataLeft, TupleMetadata metadataRight, + List<SingleActivityOutput> outputs, + PipelineWorkflowBuilder builder) + throws ActivityConstructionException + { + String sql = operator.getPredicate().toString(); + + GenericActivity conditionRepeater = + BuilderUtils.createControlledRepeat(); + builder.add(conditionRepeater); + GenericActivity readFirstRepeater = + BuilderUtils.createControlledRepeat(); + builder.add(readFirstRepeater); + + SingleActivityOutput dataOutputs[] = new SingleActivityOutput[]{ + outputs.get(0), outputs.get(1) + }; + + conditionRepeater.connectInput("input", outputs.get(controllingOutput)); + conditionRepeater.addInput("repeatedInput", sql ); + + readFirstRepeater.connectInput( + "input", conditionRepeater.getOutput("output")); + readFirstRepeater.addInput("repeatedInput", readFirst); + + GenericActivity join = + new GenericActivity("uk.org.ogsadai.TupleThetaJoin"); + join.createInput("data1"); + join.createInput("data2"); + join.createInput("condition"); + join.createOutput("result", GenericActivity.LIMITED_VALIDATION); + + dataOutputs[controllingOutput] = + addMetadataForRepeatedUse( + metadataLeft, metadataRight, + join, + builder, + readFirstRepeater.getOutput("output")); + + join.connectInput("data1", dataOutputs[0]); + join.connectInput("data2", dataOutputs[1]); + + join.connectInput( + "condition", conditionRepeater.getOutput("repeatedOutput")); + + join.createInput("readFirst"); + join.connectInput( + "readFirst", readFirstRepeater.getOutput("repeatedOutput")); + builder.add(join); + + return join.getOutput("result"); + } + + /** + * Adds the tuple metadata as specified in the metadata annotation. + * + * @param operator inner theta join operator + * + * @param join join activity + * + * @param builder + * pipeline workflow builder + * + * @param controllerOutput + * output of the controller used to control the metadata + * + * @return + * output that now returns the data that as was obtained from the + * controllerOutput + */ + private SingleActivityOutput addMetadataForRepeatedUse( + TupleMetadata left, TupleMetadata right, + GenericActivity join, + PipelineWorkflowBuilder builder, + SingleActivityOutput controllerOutput) + { + SingleActivityOutput resultOutput = controllerOutput; + + String metadata = convertMetadata(left); + join.createInput("data1Metadata"); + GenericActivity repeater = BuilderUtils.createControlledRepeat(); + builder.add(repeater); + repeater.connectInput("input", resultOutput); + repeater.addInput("repeatedInput", new StringData(metadata)); + resultOutput = repeater.getOutput("output"); + join.connectInput( + "data1Metadata", repeater.getOutput("repeatedOutput")); + + metadata = convertMetadata(right); + join.createInput("data2Metadata"); + repeater = BuilderUtils.createControlledRepeat(); + builder.add(repeater); + repeater.connectInput("input", resultOutput); + repeater.addInput("repeatedInput", new StringData(metadata)); + resultOutput = repeater.getOutput("output"); + join.connectInput( + "data2Metadata", repeater.getOutput("repeatedOutput")); + + return resultOutput; + } + + /** + * Converts a <code>TupleMetadata</tt> object into a string representation + * that can be passed to a join activity. + * + * @param metadata metadata + * + * @return metadata rendered as a string suitable for the join activity + */ + private String convertMetadata(TupleMetadata metadata) + { + StringBuilder result = new StringBuilder(); + for (int i=0; i<metadata.getColumnCount(); i++) + { + ColumnMetadata column = metadata.getColumnMetadata(i); + result.append(column.getName()); + result.append("\n"); + result.append(column.getTableName()); + result.append("\n"); + result.append(column.getResourceID()); + result.append("\n"); + result.append(column.getDRES()); + result.append("\n"); + result.append(column.getType()); + result.append("\n"); + result.append(column.isNullable()); + result.append("\n"); + result.append(column.getPrecision()); + result.append("\n"); + result.append(column.getColumnDisplaySize()); + result.append("\n"); + } + return result.toString(); + } + + +// /** +// * Intersects a list and a collection. Returns a set of those entries in the +// * list that are contained on the collection. +// * +// * @param list +// * list +// * @param collection +// * collection +// * @param attributeMatchMode +// * attribute match mode +// * +// * @return intersection of the list and the collection +// */ +// private Set<Attribute> intersect( +// List<Attribute> list, +// Collection<Attribute> collection, +// AttributeMatchMode attributeMatchMode) +// { +// Set<Attribute> intersection = new HashSet<Attribute>(); +// for (Attribute a1 : list) +// { +// boolean found = false; +// for (Attribute a2 : collection) +// { +// if (a1.equals(a2, attributeMatchMode)) +// { +// found = true; +// break; +// } +// } +// if (found) +// { +// intersection.add(a1); +// } +// } +// return intersection; +// } + +} Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchTableScanBuilder.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchTableScanBuilder.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/BatchTableScanBuilder.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -1,4 +1,4 @@ -// Copyright (c) The University of Edinburgh, 2011. +// Copyright (c) The University of Edinburgh, 2011-2012. // // LICENCE-START // Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,14 +17,18 @@ package uk.org.ogsadai.dqp.execute.workflow; import java.util.List; +import java.util.Set; import uk.org.ogsadai.client.toolkit.SingleActivityOutput; import uk.org.ogsadai.client.toolkit.activities.generic.GenericActivity; import uk.org.ogsadai.dqp.execute.ActivityConstructionException; import uk.org.ogsadai.dqp.lqp.Attribute; +import uk.org.ogsadai.dqp.lqp.CommonPredicate; import uk.org.ogsadai.dqp.lqp.Operator; -import uk.org.ogsadai.dqp.lqp.OperatorID; -import uk.org.ogsadai.dqp.lqp.operators.FilteredTableScanOperator; +import uk.org.ogsadai.dqp.lqp.Predicate; +import uk.org.ogsadai.dqp.lqp.operators.TableScanOperator; +import uk.org.ogsadai.dqp.lqp.optimiser.join.DQPReplace; +import uk.org.ogsadai.dqp.lqp.udf.repository.SimpleFunctionRepository; /** * Builds activities for FILTER TABLE SCAN operator such that the table @@ -50,7 +54,7 @@ { /** Copyright notice. */ private static final String COPYRIGHT_NOTICE = - "Copyright (c) The University of Edinburgh, 2011"; + "Copyright (c) The University of Edinburgh, 2011-2012"; @Override public SingleActivityOutput build( @@ -59,12 +63,12 @@ PipelineWorkflowBuilder builder) throws ActivityConstructionException { - FilteredTableScanOperator operator = (FilteredTableScanOperator)op; + TableScanOperator operator = (TableScanOperator)op; // The data that controls the parameterization and repeated execution - // of the SQL query comes from the QueryApply operation above this + // of the SQL query comes from the Join operation above this // operator in the query plan. - Operator queryApply = findQueryApplyOperator(operator); + Operator batchJoin = findBatchJoinOperator(operator); // Construct CreateEmptyTupleList activity GenericActivity createEmptyTupleList = @@ -95,8 +99,8 @@ ifEmptyList.createOutput("outputNonEmpty"); // Tell the workflow builder about this input, it will be connected to - // and output constructed in the builder for query apply - builder.addInput(queryApply, operator, ifEmptyList.getInput("data")); + // and output constructed in the builder for batch join + builder.addInput(batchJoin, operator, ifEmptyList.getInput("data")); ifEmptyList.connectInput( "content", createEmptyTupleList.getOutput("result")); @@ -107,6 +111,20 @@ stringReplace.createInput("data"); stringReplace.createOutput("result"); + // Put an IN expression into the filtered table scan + @SuppressWarnings("unchecked") + Set<Attribute> attrForInClause = + (Set<Attribute>) operator.getAnnotation("attributesForInClause"); + StringBuilder sb = new StringBuilder(); + sb.append(attrForInClause.iterator().next()); + sb.append(" IN ( DQP_REPLACE(").append("dqpBatchData_267276").append(") )"); + + SimpleFunctionRepository funcRepo = new SimpleFunctionRepository(); + funcRepo.register(DQPReplace.class); + + Predicate pred = new CommonPredicate(sb.toString(), funcRepo); + operator.getQuery().addPredicate(pred); + // Need to repeat the template input for each instance of the // data input so we add a controlled repeater GenericActivity controlledRepeater = @@ -179,24 +197,18 @@ // Final output is the output of the endIf activity return endIf.getOutput("output"); } - - /** - * Finds the first query apply operator above this operator in the query - * plan. - * - * @param op this operator - * - * @return query apply operator above this operator in the query plan, or - * <code>null</code> is not is found (but this should never be the case). - */ - private static Operator findQueryApplyOperator(Operator op) + + private static Operator findBatchJoinOperator(Operator op) + throws ActivityConstructionException { - Operator current = op; - while (current.getID() != OperatorID.QUERY_APPLY) + Operator join = (Operator)op.getAnnotation("join"); + if (join == null) { - current = current.getParent(); - if (current == null) return null; + throw new ActivityConstructionException( + new Exception("No batch join operator found.")); } - return current; + return join; + } + } Added: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/OLD_BatchTableScanBuilder.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/OLD_BatchTableScanBuilder.java (rev 0) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/OLD_BatchTableScanBuilder.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -0,0 +1,202 @@ +// Copyright (c) The University of Edinburgh, 2011. +// +// LICENCE-START +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// LICENCE-END + +package uk.org.ogsadai.dqp.execute.workflow; + +import java.util.List; + +import uk.org.ogsadai.client.toolkit.SingleActivityOutput; +import uk.org.ogsadai.client.toolkit.activities.generic.GenericActivity; +import uk.org.ogsadai.dqp.execute.ActivityConstructionException; +import uk.org.ogsadai.dqp.lqp.Attribute; +import uk.org.ogsadai.dqp.lqp.Operator; +import uk.org.ogsadai.dqp.lqp.OperatorID; +import uk.org.ogsadai.dqp.lqp.operators.FilteredTableScanOperator; + +/** + * Builds activities for FILTER TABLE SCAN operator such that the table + * scan is implemented as multiple queries to the data source each time + * specifying the exact values that should be obtained. This builder works + * in conjunction with the BatchQueryApplyBuilder for building the QUERY APPLY + * operator. + * <p> + * Three activities (ignoring controllers) are built and chained together in the + * following order: + * <ul> + * <li>uk.org.ogsadai.StringReplace</li> + * <li>uk.org.ogsadai.SQLQuery</li> + * <li>uk.org.ogsadai.Rename</li> + * </ul> + * This activity chain will be given multiple inputs - one for each execution + * of an SQL query. The StringReplace activity will be used to replace a + * marker in a template SQL query to produce the actual SQL query. + * + * @author The OGSA-DAI Project Team. + */ +public class OLD_BatchTableScanBuilder implements ActivityPipelineBuilder +{ + /** Copyright notice. */ + private static final String COPYRIGHT_NOTICE = + "Copyright (c) The University of Edinburgh, 2011"; + + @Override + public SingleActivityOutput build( + Operator op, + List<SingleActivityOutput> outputs, + PipelineWorkflowBuilder builder) + throws ActivityConstructionException + { + FilteredTableScanOperator operator = (FilteredTableScanOperator)op; + + // The data that controls the parameterization and repeated execution + // of the SQL query comes from the QueryApply operation above this + // operator in the query plan. + Operator queryApply = findQueryApplyOperator(operator); + + // Construct CreateEmptyTupleList activity + GenericActivity createEmptyTupleList = + new GenericActivity("uk.org.ogsadai.CreateEmptyTupleList"); + createEmptyTupleList.createInput("resultColumnNames"); + createEmptyTupleList.createInput("resultColumnTypes"); + createEmptyTupleList.createOutput("result"); + + List<Attribute> attributes = operator.getHeading().getAttributes(); + String[] resultColumnNames = new String[attributes.size()]; + int[] resultColumnTypes = new int[attributes.size()]; + for (int i=0; i<resultColumnNames.length; i++) + { + resultColumnNames[i] = attributes.get(i).toString(); + resultColumnTypes[i] = attributes.get(i).getType(); + } + createEmptyTupleList.addInputList( + "resultColumnNames", resultColumnNames); + createEmptyTupleList.addInputList( + "resultColumnTypes", resultColumnTypes); + + // Construct IfEmptyList activity + GenericActivity ifEmptyList = + new GenericActivity("uk.org.ogsadai.astro.IfEmptyList"); + ifEmptyList.createInput("data"); + ifEmptyList.createInput("content"); + ifEmptyList.createOutput("outputEmpty"); + ifEmptyList.createOutput("outputNonEmpty"); + + // Tell the workflow builder about this input, it will be connected to + // and output constructed in the builder for query apply + builder.addInput(queryApply, operator, ifEmptyList.getInput("data")); + ifEmptyList.connectInput( + "content", createEmptyTupleList.getOutput("result")); + + // Construct the StringReplace activity + GenericActivity stringReplace = + new GenericActivity("uk.org.ogsadai.StringReplace"); + stringReplace.createInput("template"); + stringReplace.createInput("data"); + stringReplace.createOutput("result"); + + // Need to repeat the template input for each instance of the + // data input so we add a controlled repeater + GenericActivity controlledRepeater = + BuilderUtils.createControlledRepeat(); + controlledRepeater.addInput( + "repeatedInput", + operator.getPhysicalDatabaseQuery().replace( + "DQP_REPLACE", "$REPLACE")); + controlledRepeater.connectInput( + "input", ifEmptyList.getOutput("outputNonEmpty")); + + stringReplace.connectInput( + "template", controlledRepeater.getOutput("repeatedOutput")); + stringReplace.connectInput( + "data", controlledRepeater.getOutput("output")); + + // Construct the SQLQuery activity + GenericActivity query = new GenericActivity("uk.org.ogsadai.SQLQuery"); + query.createInput("expression"); + query.createOutput("data"); + query.setResourceID(operator.getDataNode().getResourceID()); + query.connectInput("expression", stringReplace.getOutput("result")); + + // Controlled Repeat for MetadataRename resultColumnNames + GenericActivity controlledRepeatResultColumnNames = + BuilderUtils.createControlledRepeat(); + + // Construct the Rename activity + GenericActivity rename = + new GenericActivity("uk.org.ogsadai.MetadataRename"); + rename.createInput("data"); + rename.createInput("resultColumnNames"); + rename.createOutput("result", GenericActivity.LIMITED_VALIDATION); + + String[] newNames = new String[attributes.size()]; + for (int i=0; i<newNames.length; i++) + { + newNames[i] = attributes.get(i).toString(); + } + + controlledRepeatResultColumnNames.connectInput( + "input", query.getOutput("data")); + controlledRepeatResultColumnNames.addInputList( + "repeatedInput", newNames); + + rename.connectInput( + "data", controlledRepeatResultColumnNames.getOutput("output")); + rename.connectInput( + "resultColumnNames", + controlledRepeatResultColumnNames.getOutput("repeatedOutput")); + + // Create the EndIf activity + GenericActivity endIf = new GenericActivity("uk.org.ogsadai.EndIf"); + endIf.createInput("trueInput"); + endIf.createInput("falseInput"); + endIf.createOutput("output"); + endIf.connectInput("trueInput", rename.getOutput("result")); + endIf.connectInput("falseInput", ifEmptyList.getOutput("outputEmpty")); + + // Add all activities to the builder + builder.add(createEmptyTupleList); + builder.add(ifEmptyList); + builder.add(controlledRepeater); + builder.add(stringReplace); + builder.add(query); + builder.add(rename); + builder.add(controlledRepeatResultColumnNames); + builder.add(endIf); + + // Final output is the output of the endIf activity + return endIf.getOutput("output"); + } + + /** + * Finds the first query apply operator above this operator in the query + * plan. + * + * @param op this operator + * + * @return query apply operator above this operator in the query plan, or + * <code>null</code> is not is found (but this should never be the case). + */ + private static Operator findQueryApplyOperator(Operator op) + { + Operator current = op; + while (current.getID() != OperatorID.QUERY_APPLY) + { + current = current.getParent(); + if (current == null) return null; + } + return current; + } +} Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/PipelineWorkflowBuilder.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/PipelineWorkflowBuilder.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/execute/workflow/PipelineWorkflowBuilder.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -210,7 +210,9 @@ throw new ActivityConstructionException( new Exception("Configuration exception: " + "No builder defined for operator " + - operator.getClass().getName())); + operator.getClass().getName() + + " and implementation annotation '" + + implementation + "'.")); } mCurrentOperator = operator; Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/AttributeStatistics.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/AttributeStatistics.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/AttributeStatistics.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -6,6 +6,8 @@ double getNumRows(); double getNumValues(); + + double getNumNulls(); AttributeStatistics processEqualConstant(Object constant); Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityEstimator.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityEstimator.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityEstimator.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -1,3 +1,19 @@ +// Copyright (c) The University of Edinburgh, 2012. +// +// LICENCE-START +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// LICENCE-END + package uk.org.ogsadai.dqp.lqp.cardinality; import java.util.List; @@ -39,8 +55,18 @@ import uk.org.ogsadai.dqp.lqp.operators.UnionOperator; import uk.org.ogsadai.expression.arithmetic.ArithmeticExpression; +/** + * An operator visitor that estimates cardinalities after the visited operator + * has been applied. + * + * @author The OGSA-DAI Project Team. + */ public class CardinalityEstimator implements OperatorVisitor { + /** Copyright notice */ + private static final String COPYRIGHT_NOTICE = + "Copyright (c) The University of Edinburgh, 2012."; + private DataDictionary mDataDictionary; private ApplyOperator mWrapperApplyOperator; @@ -48,11 +74,26 @@ { } + /** + * Set the wrapper operator for the visited operator. The wrapper (if not + * <code>null</code>) must be used to access child operators. + * + * @param applyOp + * wrapper operator + */ public void setApplyWrapper(ApplyOperator applyOp) { mWrapperApplyOperator = applyOp; } + /** + * Set the data dictionary. The data dictionary is only used when visiting + * table scan operators. It must contain physical schemas for the visited + * table scans. + * + * @param dataDictionary + * data dictionary including physical schemas + */ public void setDataDictionary(DataDictionary dataDictionary) { mDataDictionary = dataDictionary; @@ -88,7 +129,6 @@ stats, expression); result.addAttributeStatistics(attr, attrStats); } - addStatistics(operator, result); } @@ -96,7 +136,6 @@ public void visit(RenameOperator operator) { CardinalityStatistics stats = getChildStatistics(operator, 0); - RenameMap renameMap = operator.getRenameMap(); SimpleCardinalityStatistics result = new SimpleCardinalityStatistics(); @@ -163,8 +202,6 @@ // output relation is the number of values in the grouping column. // The grouping attribute is the only attribute that retains its // distribution - no information about other attributes. - // TODO : If the grouping attribute is a key, the output stats are - // the same as the input stats. for (Attribute attribute : operator.getHeading().getAttributes()) { if (groupingAttr != attribute) @@ -189,8 +226,7 @@ AttributeStatistics attrStats = stats.getStatistics(attr); max *= attrStats.getNumValues(); } - // if there is more than one grouping attribute the product might be - // too large + // product might be too large double numRows = Math.min(max, stats.getCardinality()); for (Attribute attribute : operator.getHeading().getAttributes()) @@ -215,40 +251,48 @@ @Override public void visit(TableScanOperator operator) { + // if there already is an annotation then we don't touch it + // it means that the table scan has been imploded + if (Annotation.getCardinalityStatisticsAnnotation(operator) != null) + { + return; + } + try { - // Go to physical data dictionary and get table scan count + // Go to physical data dictionary and get table scan statistics PhysicalSchema physicalSchema = - mDataDictionary.getTableSchema(operator.getTableName()). - getPhysicalSchema(); + mDataDictionary.getTableSchema( + operator.getTableName()).getPhysicalSchema(); - if (physicalSchema != null && - physicalSchema instanceof StatisticsPhysicalSchema) + if (physicalSchema == null) { - StatisticsPhysicalSchema statsPhysicalSchema = - (StatisticsPhysicalSchema) physicalSchema; - - CardinalityStatistics cardStats = - statsPhysicalSchema.getCardinalityStatistics(); - - cardStats = CardinalityUtils.makeRenamedCopy( - cardStats, - operator.getTableName()); - - addStatistics(operator, cardStats); - } - else if (physicalSchema == null) - { throw new RuntimeException( "No physical schema data for table : " + operator.getTableName()); } - else + else if (!(physicalSchema instanceof StatisticsPhysicalSchema)) { throw new RuntimeException( "Physical schema not StatisticsPhysicalSchema, table : " + operator.getTableName()); } + + StatisticsPhysicalSchema statsPhysicalSchema = + (StatisticsPhysicalSchema) physicalSchema; + + CardinalityStatistics cardStats = + statsPhysicalSchema.getCardinalityStatistics(); + + SimpleCardinalityStatistics renamedStatistics = + new SimpleCardinalityStatistics(); + for (Attribute attr : operator.getHeading().getAttributes()) + { + Attribute physAttr = operator.getPhysicalAttribute(attr); + AttributeStatistics attrStats = cardStats.getStatistics(physAttr); + renamedStatistics.addAttributeStatistics(attr, attrStats); + } + addStatistics(operator, renamedStatistics); } catch(TableNotFoundException ex) { @@ -307,28 +351,47 @@ @Override public void visit(FullOuterJoinOperator operator) { - // TODO Auto-generated method stub - CardinalityStatistics lhsStats = getChildStatistics(operator, 0); CardinalityStatistics rhsStats = getChildStatistics(operator, 1); + CardinalityInnerThetaJoinVisitor visitor = + new CardinalityInnerThetaJoinVisitor(lhsStats, rhsStats); + operator.getPredicate().getExpression().accept(visitor); + CardinalityStatistics result = + CardinalityUtils.sum(visitor.getResult(), lhsStats); + result = CardinalityUtils.sum(result, rhsStats); + addStatistics(operator, result); } @Override public void visit(LeftOuterJoinOperator operator) { - // TODO Auto-generated method stub + // cardinality estimate is the size of LEFT INNER_JOIN RIGHT + // plus size of LEFT CardinalityStatistics lhsStats = getChildStatistics(operator, 0); CardinalityStatistics rhsStats = getChildStatistics(operator, 1); + CardinalityInnerThetaJoinVisitor visitor = + new CardinalityInnerThetaJoinVisitor(lhsStats, rhsStats); + operator.getPredicate().getExpression().accept(visitor); + CardinalityStatistics result = + CardinalityUtils.sum(visitor.getResult(), lhsStats); + addStatistics(operator, result); } @Override public void visit(RightOuterJoinOperator operator) { - // TODO Auto-generated method stub + // cardinality estimate is the size of LEFT INNER_JOIN RIGHT + // plus size of RIGHT CardinalityStatistics lhsStats = getChildStatistics(operator, 0); CardinalityStatistics rhsStats = getChildStatistics(operator, 1); + CardinalityInnerThetaJoinVisitor visitor = + new CardinalityInnerThetaJoinVisitor(lhsStats, rhsStats); + operator.getPredicate().getExpression().accept(visitor); + CardinalityStatistics result = + CardinalityUtils.sum(visitor.getResult(), rhsStats); + addStatistics(operator, result); } @Override @@ -361,12 +424,9 @@ @Override public void visit(UnionOperator operator) { - // TODO Auto-generated method stub - CardinalityStatistics lhsStats = getChildStatistics(operator, 0); CardinalityStatistics rhsStats = getChildStatistics(operator, 1); - throw new UnsupportedOperationException( - "Operator type " + operator + " is not supported."); + addStatistics(operator, CardinalityUtils.sum(lhsStats, rhsStats)); } @Override @@ -418,28 +478,60 @@ "Operator type " + operator + " is not supported."); } + /** + * Adds cardinality statistics and cardinality annotations to the given + * operator. + * + * @param operator + * operator to be annotated + * @param stats + * cardinality statistics + */ private void addStatistics( - Operator op, CardinalityStatistics stats) + Operator operator, CardinalityStatistics stats) { - Annotation.addCardinalityStatisticsAnnotation(op, stats); - Annotation.addCardinalityAnnotation(op, stats.getCardinality()); + Annotation.addCardinalityStatisticsAnnotation(operator, stats); + Annotation.addCardinalityAnnotation(operator, stats.getCardinality()); } - - private CardinalityStatistics getChildStatistics(Operator op, int index) + + /** + * Gets the statistics annotation of the child at the given index. + * + * @param operator + * operator + * @param index + * child index + * @return cardinality statistics of the child + * @throws RuntimeException + * if the child operator has no statistics annotation + */ + private CardinalityStatistics getChildStatistics( + Operator operator, int index) { - Operator child = getChild(op, index); + Operator child = getChild(operator, index); return getStatistics(child); } - - private CardinalityStatistics getStatistics(Operator op) + + /** + * Returns the cardinality statistics annotation of the given operator or + * raises an exception if the annotation is not set. + * + * @param operator + * operator + * @return cardinality statistics for the operator + * @throws RuntimeException + * if the operator has no statistics annotation + */ + private CardinalityStatistics getStatistics(Operator operator) { CardinalityStatistics cardStats = - Annotation.getCardinalityStatisticsAnnotation(op); + Annotation.getCardinalityStatisticsAnnotation(operator); if (cardStats == null) { throw new RuntimeException( - "Operator " + op + " has no CardinalityStatistics annotation"); + "Operator " + operator + + " has no CardinalityStatistics annotation"); } else @@ -447,14 +539,25 @@ return cardStats; } } - - private Operator getChild(Operator op, int index) + + /** + * Retrieves the child of the given operator within the LQP. If the operator + * is wrapped by an apply operator then the apply operator's child is + * returned. + * + * @param operator + * parent operator + * @param index + * child index + * @return child of the operator in the query plan + */ + private Operator getChild(Operator operator, int index) { if (mWrapperApplyOperator!= null) { return mWrapperApplyOperator.getChild(index); } - return op.getChild(index); + return operator.getChild(index); } Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityInnerThetaJoinVisitor.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityInnerThetaJoinVisitor.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityInnerThetaJoinVisitor.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -7,6 +7,7 @@ import uk.org.ogsadai.expression.AndExpression; import uk.org.ogsadai.expression.ComparisonExpression; import uk.org.ogsadai.expression.EqualExpression; +import uk.org.ogsadai.expression.Expression; import uk.org.ogsadai.expression.ExpressionVisitor; import uk.org.ogsadai.expression.GreaterThanExpression; import uk.org.ogsadai.expression.GreaterThanOrEqualExpression; @@ -49,47 +50,47 @@ } @Override - public void visitAndExpression(AndExpression expression) { - // TODO Auto-generated method stub - + public void visitAndExpression(AndExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitOrExpression(OrExpression expression) { - // TODO Auto-generated method stub - + public void visitOrExpression(OrExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitNotExpression(NotExpression expression) { - // TODO Auto-generated method stub - + public void visitNotExpression(NotExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitLessThanExpression(LessThanExpression expression) { - // TODO Auto-generated method stub - + public void visitLessThanExpression(LessThanExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override public void visitLessThanOrEqualExpression( - LessThanOrEqualExpression expression) { - // TODO Auto-generated method stub - + LessThanOrEqualExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitGreaterThanExpression(GreaterThanExpression expression) { - // TODO Auto-generated method stub - + public void visitGreaterThanExpression(GreaterThanExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override public void visitGreaterThanOrEqualExpression( - GreaterThanOrEqualExpression expression) { - // TODO Auto-generated method stub - + GreaterThanOrEqualExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override @@ -99,31 +100,36 @@ mLeftChild, mRightChild, expression, ArithmeticOperator.EQUAL); + + if (mResult == null) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); + } } @Override - public void visitNotEqualExpression(NotEqualExpression expression) { - // TODO Auto-generated method stub - + public void visitNotEqualExpression(NotEqualExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitIsNullExpression(IsNullExpression expression) { - // TODO Auto-generated method stub - + public void visitIsNullExpression(IsNullExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitLikeExpression(LikeExpression expression) { - // TODO Auto-generated method stub - + public void visitLikeExpression(LikeExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } @Override - public void visitInExpression(InExpression expression) { - // TODO Auto-generated method stub - + public void visitInExpression(InExpression expression) + { + mResult = processProductSelect(mLeftChild, mRightChild, expression); } private CardinalityStatistics processArithmeticOperator( @@ -217,4 +223,21 @@ } } } + + private static CardinalityStatistics processProductSelect( + CardinalityStatistics leftChild, + CardinalityStatistics rightChild, + Expression expression) + { + // first estimate the statistics for a product + CardinalityStatistics cardStats = + CardinalityUtils.product(leftChild, rightChild); + + // then apply a select with the given expression + CardinalitySelectVisitor visitor = + new CardinalitySelectVisitor(null, cardStats); + expression.accept(visitor); + return visitor.getResult(); + + } } Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySelectVisitor.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySelectVisitor.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySelectVisitor.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -173,6 +173,14 @@ (TableColumn)expressions[1]); } + if (operandTypes[0] == OperandType.CONST && + operandTypes[1] == OperandType.CONST) + { + return processConstOperatorConst( + inputStatistics, + expression); + } + // Leaves the cardinality estimates as they are return inputStatistics; @@ -231,6 +239,28 @@ // Leaves the cardinality estimates as they are } + private static CardinalityStatistics processConstOperatorConst( + CardinalityStatistics inputStatistics, + ComparisonExpression expression) + { + try + { + if (expression.evaluate(null)) + { + return inputStatistics; + } + else + { + // if the expression is false then the result set is empty + return CardinalityUtils.rescaleToZero(inputStatistics); + } + } + catch (ExpressionEvaluationException e) + { + throw new RuntimeException(e); + } + } + private static CardinalityStatistics processAttrOperatorConstant( CardinalityStatistics inputStatistics, TableColumn tableColumn, Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySemiJoinVisitor.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySemiJoinVisitor.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalitySemiJoinVisitor.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -163,7 +163,6 @@ // Find statistics AttributeStatistics lhsAttrStats; AttributeStatistics rhsAttrStats; - if (lhsStats.contains(attr1)) { lhsAttrStats = lhsStats.getStatistics(attr1); Modified: sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityUtils.java =================================================================== --- sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityUtils.java 2012-04-26 04:20:18 UTC (rev 2053) +++ sandbox/dqp/server/src/main/java/uk/org/ogsadai/dqp/lqp/cardinality/CardinalityUtils.java 2012-05-10 11:51:09 UTC (rev 2054) @@ -8,6 +8,9 @@ import uk.org.ogsadai.dqp.lqp.AttributeImpl; import uk.org.ogsadai.dqp.lqp.Operator; import uk.org.ogsadai.dqp.lqp.OperatorID; +import uk.org.ogsadai.dqp.lqp.RenameMap; +import uk.org.ogsadai.dqp.lqp.exceptions.AmbiguousAttributeException; +import uk.org.ogsadai.dqp.lqp.exceptions.AmbiguousMappingException; import uk.org.ogsadai.dqp.lqp.operators.ApplyOperator; public class CardinalityUtils @@ -29,6 +32,35 @@ } + /** + * Maps renamed attributes in the input cardinality statistics to the + * original names and creates a copy of the statistics with the original + * attribute names. + * + * @param stats + * input statistics + * @param renameMap + * rename map + * @return a copy of the input statistics with the original attribute names + * @throws AmbiguousMappingException + * @throws AmbiguousAttributeException + */ + public static CardinalityStatistics mapRenamedToOriginal( + CardinalityStatistics stats, RenameMap renameMap) + throws AmbiguousMappingException, AmbiguousAttributeException + { + SimpleCardinalityStatistics result = + new SimpleCardinalityStatistics(); + for (Entry<Attribute, AttributeStatistics> entry : + stats.getStatistics().entrySet()) + { + result.addAttributeStatistics( + renameMap.getOriginalAttribute( + entry.getKey()), entry.getValue()); + } + return result; + } + public static CardinalityStatistics difference( CardinalityStatistics stats0, CardinalityStatistics stats1) @@ -46,9 +78,8 @@ result.addAttributeStatistics(entry.getKey(), resultAttrStats); } return result; - } + } - private static AttributeStatistics difference( AttributeStatistics attrStats0, AttributeStatistics attrStats1) @@ -64,6 +95,52 @@ return ScalarCardinalityUtils.difference(attrStats0, attrStats1); } } + + public static CardinalityStatistics sum( + CardinalityStatistics stats0, + CardinalityStatistics stats1) + { + SimpleCardinalityStatistics result = new SimpleCardinalityStatistics(); + + for (Entry<Attribute, AttributeStatistics> entry : + stats0.getStatistics().entrySet()) + { + if (stats1.contains(entry.getKey())) + { + AttributeStatistics attrStats = + stats1.getStatistics(entry.getKey()); + + AttributeStatistics resultAttrStats = + sum(entry.getValue(), attrStats); + result.addAttributeStatistics(entry.getKey(), resultAttrStats); + } + else + { + // add NULL for each other attribute + result.addAttributeStatistics( + entry.getKey(), + new ScalarAttributeStatistics(entry.getValue().getNumRows(), 1)); + ... [truncated message content] |