| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.asterix.optimizer.rules.cbo; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.asterix.common.annotations.IndexedNLJoinExpressionAnnotation; |
| import org.apache.asterix.common.annotations.SkipSecondaryIndexSearchExpressionAnnotation; |
| import org.apache.asterix.metadata.entities.Index; |
| import org.apache.commons.lang3.mutable.Mutable; |
| import org.apache.commons.lang3.mutable.MutableInt; |
| import org.apache.commons.lang3.mutable.MutableObject; |
| import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; |
| import org.apache.hyracks.algebricks.common.utils.Pair; |
| import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; |
| import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator; |
| import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext; |
| import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag; |
| import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag; |
| import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable; |
| import org.apache.hyracks.algebricks.core.algebra.base.OperatorAnnotations; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.BroadcastExpressionAnnotation; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.HashJoinExpressionAnnotation; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation; |
| import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression; |
| import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions; |
| import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator; |
| import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities; |
| import org.apache.hyracks.algebricks.core.algebra.prettyprint.IPlanPrettyPrinter; |
| import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule; |
| import org.apache.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig; |
| import org.apache.logging.log4j.LogManager; |
| import org.apache.logging.log4j.Logger; |
| |
| public class EnumerateJoinsRule implements IAlgebraicRewriteRule { |
| |
| private static final Logger LOGGER = LogManager.getLogger(); |
| |
| protected final JoinEnum joinEnum; |
| |
| public EnumerateJoinsRule(JoinEnum joinEnum) { |
| this.joinEnum = joinEnum; |
| } |
| |
| @Override |
| public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) { |
| return false; |
| } |
| |
| /** |
| * If this method returns false, that means CBO code will not be used to optimize the part of the join graph that |
| * was passed in. Currently, we do not optimize query graphs with outer joins in them. If the CBO code is activated |
| * a new join graph (with inputs possibly switched) will be created and the return value will be true. |
| */ |
| @Override |
| public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) |
| throws AlgebricksException { |
| boolean cboMode = this.getCBOMode(context); |
| boolean cboTestMode = this.getCBOTestMode(context); |
| if (!(cboMode || cboTestMode)) { |
| return false; |
| } |
| // If we reach here, then either cboMode or cboTestMode is true. |
| // If cboTestMode is true, then we use predefined cardinalities for datasets for asterixdb regression tests. |
| // If cboMode is true, then all datasets need to have samples, otherwise the check in doAllDataSourcesHaveSamples() |
| // further below will return false. |
| ILogicalOperator op = opRef.getValue(); |
| if (!((op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) |
| || ((op.getOperatorTag() == LogicalOperatorTag.DISTRIBUTE_RESULT)))) { |
| return false; |
| } |
| |
| // if this join has already been seen before, no need to apply the rule again |
| if (context.checkIfInDontApplySet(this, op)) { |
| return false; |
| } |
| |
| List<ILogicalOperator> joinOps = new ArrayList<>(); |
| List<ILogicalOperator> internalEdges = new ArrayList<>(); |
| HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap = new HashMap<>(); |
| // The data scan operators. Will be in the order of the from clause. |
| // Important for position ordering when assigning bits to join expressions. |
| List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps = new ArrayList<>(); |
| HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap = new HashMap<>(); |
| |
| IPlanPrettyPrinter pp = context.getPrettyPrinter(); |
| printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan1"); |
| boolean canTransform = getJoinOpsAndLeafInputs(op, emptyTupleAndDataSourceOps, joinLeafInputsHashMap, |
| dataSourceEmptyTupleHashMap, internalEdges, joinOps); |
| |
| if (!canTransform) { |
| return false; |
| } |
| |
| // if this happens, something in the input plan is not acceptable to the new code. |
| if (emptyTupleAndDataSourceOps.size() != joinLeafInputsHashMap.size()) { |
| throw new IllegalStateException( |
| "ETS " + emptyTupleAndDataSourceOps.size() + " != LI " + joinLeafInputsHashMap.size()); |
| } |
| |
| printPlan(pp, (AbstractLogicalOperator) op, "Original Whole plan2"); |
| |
| int numberOfFromTerms = emptyTupleAndDataSourceOps.size(); |
| |
| joinEnum.initEnum((AbstractLogicalOperator) op, cboMode, cboTestMode, numberOfFromTerms, |
| emptyTupleAndDataSourceOps, joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps, |
| context); |
| |
| if (cboMode) { |
| if (!doAllDataSourcesHaveSamples(emptyTupleAndDataSourceOps, context)) { |
| return false; |
| } |
| } |
| |
| if (internalEdges.size() > 0) { |
| pushAssignsIntoLeafInputs(joinLeafInputsHashMap, internalEdges); |
| } |
| |
| int cheapestPlan = joinEnum.enumerateJoins(); // MAIN CALL INTO CBO |
| if (cheapestPlan == PlanNode.NO_PLAN) { |
| return false; |
| } |
| |
| PlanNode cheapestPlanNode = joinEnum.allPlans.get(cheapestPlan); |
| |
| if (numberOfFromTerms > 1) { |
| buildNewTree(cheapestPlanNode, joinLeafInputsHashMap, joinOps, new MutableInt(0)); |
| printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 1"); |
| ILogicalOperator root = addConstantInternalEdgesAtTheTop(joinOps.get(0), internalEdges); |
| printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan after buildNewTree 2"); |
| printPlan(pp, (AbstractLogicalOperator) root, "New Whole Plan after buildNewTree"); |
| // this will be the new root |
| opRef.setValue(root); |
| if (LOGGER.isTraceEnabled()) { |
| LOGGER.trace("---------------------------- Printing Leaf Inputs"); |
| printLeafPlans(pp, joinLeafInputsHashMap); |
| // print joins starting from the bottom |
| for (int i = joinOps.size() - 1; i >= 0; i--) { |
| printPlan(pp, (AbstractLogicalOperator) joinOps.get(i), "join " + i); |
| } |
| printPlan(pp, (AbstractLogicalOperator) joinOps.get(0), "New Whole Plan"); |
| printPlan(pp, (AbstractLogicalOperator) root, "New Whole Plan"); |
| } |
| |
| // turn of this rule for all joins in this set (subtree) |
| for (ILogicalOperator joinOp : joinOps) { |
| context.addToDontApplySet(this, joinOp); |
| } |
| |
| } else { |
| buildNewTree(cheapestPlanNode, joinLeafInputsHashMap); |
| } |
| |
| return true; |
| } |
| |
| private boolean getCBOMode(IOptimizationContext context) { |
| PhysicalOptimizationConfig physOptConfig = context.getPhysicalOptimizationConfig(); |
| return physOptConfig.getCBOMode(); |
| } |
| |
| private boolean getCBOTestMode(IOptimizationContext context) { |
| PhysicalOptimizationConfig physOptConfig = context.getPhysicalOptimizationConfig(); |
| return physOptConfig.getCBOTestMode(); |
| } |
| |
| /** |
| * Should not see any kind of joins here. store the emptyTupeSourceOp and DataSource operators. |
| * Each leaf input will normally have both, but sometimes only emptyTupeSourceOp will be present (in lists) |
| */ |
| private Pair<EmptyTupleSourceOperator, DataSourceScanOperator> containsLeafInputOnly(ILogicalOperator op) { |
| DataSourceScanOperator dataSourceOp = null; |
| ILogicalOperator currentOp = op; |
| while (currentOp.getInputs().size() == 1) { |
| if (currentOp.getOperatorTag() == LogicalOperatorTag.DATASOURCESCAN) { |
| // we should not see two data scans in the same path |
| if (dataSourceOp != null) { |
| return null; |
| } |
| dataSourceOp = (DataSourceScanOperator) currentOp; |
| } |
| currentOp = currentOp.getInputs().get(0).getValue(); |
| } |
| if (currentOp.getOperatorTag() == LogicalOperatorTag.EMPTYTUPLESOURCE) { |
| return new Pair<>((EmptyTupleSourceOperator) currentOp, dataSourceOp); |
| } |
| return null; |
| } |
| |
| /** |
| * Check to see if there is only one assign here and nothing below that other than a join. |
| * have not seen cases where there is more than one assign in a leafinput. |
| */ |
| private boolean onlyOneAssign(ILogicalOperator nextOp) { |
| if (nextOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) { |
| return false; |
| } |
| List<Mutable<ILogicalOperator>> nextOpInputs = nextOp.getInputs(); |
| return nextOpInputs.get(0).getValue().getOperatorTag() == LogicalOperatorTag.INNERJOIN; |
| } |
| |
| private ILogicalOperator findSelectOrDataScan(ILogicalOperator op) { |
| LogicalOperatorTag tag; |
| while (true) { |
| if (op.getInputs().size() > 1) { |
| return null; // Assuming only a linear plan for single table queries (as leafInputs are linear). |
| } |
| tag = op.getOperatorTag(); |
| if (tag == LogicalOperatorTag.EMPTYTUPLESOURCE) { |
| return null; // if this happens, there is nothing we can do in CBO code since there is no datasourcescan |
| } |
| if ((tag == LogicalOperatorTag.SELECT) || (tag == LogicalOperatorTag.DATASOURCESCAN)) { |
| return op; |
| } |
| |
| op = op.getInputs().get(0).getValue(); |
| } |
| } |
| |
| /** |
| * This is the main routine that stores all the join operators and the leafInputs. We will later reuse the same |
| * join operators but switch the leafInputs (see buildNewTree). The whole scheme is based on the assumption that the |
| * leafInputs can be switched. The various data structures make the leafInputs accessible efficiently. |
| */ |
| private boolean getJoinOpsAndLeafInputs(ILogicalOperator op, |
| List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps, |
| HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, |
| HashMap<DataSourceScanOperator, EmptyTupleSourceOperator> dataSourceEmptyTupleHashMap, |
| List<ILogicalOperator> internalEdges, List<ILogicalOperator> joinOps) { |
| |
| if (op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) { |
| return false; |
| } |
| |
| if (op.getOperatorTag() == LogicalOperatorTag.INNERJOIN) { |
| joinOps.add(op); |
| for (int i = 0; i < 2; i++) { |
| ILogicalOperator nextOp = op.getInputs().get(i).getValue(); |
| boolean canTransform = getJoinOpsAndLeafInputs(nextOp, emptyTupleAndDataSourceOps, |
| joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps); |
| if (!canTransform) { |
| return false; |
| } |
| } |
| } else { |
| Pair<EmptyTupleSourceOperator, DataSourceScanOperator> etsDataSource = containsLeafInputOnly(op); |
| if (etsDataSource != null) { // a leaf input |
| EmptyTupleSourceOperator etsOp = etsDataSource.first; |
| DataSourceScanOperator dataSourceOp = etsDataSource.second; |
| emptyTupleAndDataSourceOps.add(new Pair<>(etsOp, dataSourceOp)); |
| if (op.getOperatorTag().equals(LogicalOperatorTag.DISTRIBUTE_RESULT)) {// single table query |
| ILogicalOperator selectOp = findSelectOrDataScan(op); |
| if (selectOp == null) { |
| return false; |
| } else { |
| joinLeafInputsHashMap.put(etsOp, selectOp); |
| } |
| } else { |
| joinLeafInputsHashMap.put(etsOp, op); |
| } |
| dataSourceEmptyTupleHashMap.put(dataSourceOp, etsOp); |
| } else { // This must be an internal edge |
| if (onlyOneAssign(op)) { |
| // currently, will handle only assign statement and nothing else in an internal Edge. |
| // we can lift this restriction later if the need arises. This just makes some code easier. |
| internalEdges.add(op); |
| boolean canTransform = |
| getJoinOpsAndLeafInputs(op.getInputs().get(0).getValue(), emptyTupleAndDataSourceOps, |
| joinLeafInputsHashMap, dataSourceEmptyTupleHashMap, internalEdges, joinOps); |
| if (!canTransform) { |
| return false; |
| } |
| |
| //internalEdges.add(op); // better to store the parent; do this soon. |
| } else { |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| private void addCardCostAnnotations(ILogicalOperator op, PlanNode plan) { |
| op.getAnnotations().put(OperatorAnnotations.OP_OUTPUT_CARDINALITY, |
| (double) Math.round(plan.getJoinNode().getCardinality() * 100) / 100); |
| op.getAnnotations().put(OperatorAnnotations.OP_COST_TOTAL, |
| (double) Math.round(plan.computeTotalCost() * 100) / 100); |
| if (plan.IsScanNode()) { |
| op.getAnnotations().put(OperatorAnnotations.OP_INPUT_CARDINALITY, |
| (double) Math.round(plan.getJoinNode().getOrigCardinality() * 100) / 100); |
| op.getAnnotations().put(OperatorAnnotations.OP_COST_LOCAL, |
| (double) Math.round(plan.computeOpCost() * 100) / 100); |
| } else { |
| op.getAnnotations().put(OperatorAnnotations.OP_LEFT_EXCHANGE_COST, |
| (double) Math.round(plan.getLeftExchangeCost().computeTotalCost() * 100) / 100); |
| op.getAnnotations().put(OperatorAnnotations.OP_RIGHT_EXCHANGE_COST, |
| (double) Math.round(plan.getRightExchangeCost().computeTotalCost() * 100) / 100); |
| op.getAnnotations().put(OperatorAnnotations.OP_COST_LOCAL, |
| (double) Math.round((plan.computeOpCost()) * 100) / 100); |
| } |
| |
| if (op.getOperatorTag().equals(LogicalOperatorTag.SELECT)) { |
| op.getAnnotations().put(OperatorAnnotations.OP_COST_LOCAL, 0.0); |
| } |
| } |
| |
| /** |
| * Finds the DataSourceScanOperator given a leafInput |
| */ |
| private ILogicalOperator findDataSourceScanOperator(ILogicalOperator op) { |
| ILogicalOperator origOp = op; |
| while (op != null && op.getOperatorTag() != LogicalOperatorTag.EMPTYTUPLESOURCE) { |
| if (op.getOperatorTag().equals(LogicalOperatorTag.DATASOURCESCAN)) { |
| return op; |
| } |
| op = op.getInputs().get(0).getValue(); |
| } |
| return origOp; |
| } |
| |
| private void removeJoinAnnotations(AbstractFunctionCallExpression afcExpr) { |
| afcExpr.removeAnnotation(BroadcastExpressionAnnotation.class); |
| afcExpr.removeAnnotation(IndexedNLJoinExpressionAnnotation.class); |
| afcExpr.removeAnnotation(HashJoinExpressionAnnotation.class); |
| } |
| |
| private void setAnnotation(AbstractFunctionCallExpression afcExpr, IExpressionAnnotation anno) { |
| FunctionIdentifier fi = afcExpr.getFunctionIdentifier(); |
| List<Mutable<ILogicalExpression>> arguments = afcExpr.getArguments(); |
| |
| if (fi.equals(AlgebricksBuiltinFunctions.AND)) { |
| for (Mutable<ILogicalExpression> iLogicalExpressionMutable : arguments) { |
| ILogicalExpression argument = iLogicalExpressionMutable.getValue(); |
| AbstractFunctionCallExpression expr = (AbstractFunctionCallExpression) argument; |
| expr.putAnnotation(anno); |
| } |
| } else { |
| afcExpr.putAnnotation(anno); |
| } |
| } |
| |
| //Internal edges are assign statements. The RHS has a variable in it. |
| // We need to find the internal edge that has a variable coming from this leaf leafInput. |
| private int findInternalEdge(ILogicalOperator leafInput, List<ILogicalOperator> internalEdges) |
| throws AlgebricksException { |
| int i = -1; |
| |
| for (ILogicalOperator ie : internalEdges) { |
| i++; |
| // this will be an Assign, so no need to check |
| AssignOperator aOp = (AssignOperator) ie; |
| List<LogicalVariable> vars = new ArrayList<>(); |
| aOp.getExpressions().get(0).getValue().getUsedVariables(vars); |
| HashSet<LogicalVariable> vars2 = new HashSet<>(); |
| VariableUtilities.getLiveVariables(leafInput, vars2); |
| if (vars2.containsAll(vars)) { // note that this will fail if there variables from different leafInputs |
| return i; |
| } |
| } |
| |
| return -1; |
| } |
| |
| private ILogicalOperator addAssignToLeafInput(ILogicalOperator leafInput, ILogicalOperator internalEdge) { |
| ILogicalOperator root = leafInput; |
| // this will be an Assign, so no need to check |
| AssignOperator aOp = (AssignOperator) internalEdge; |
| aOp.getInputs().get(0).setValue(root); |
| return aOp; |
| } |
| |
| private void skipAllIndexes(PlanNode plan, ILogicalOperator leafInput) { |
| if (plan.scanOp == PlanNode.ScanMethod.TABLE_SCAN && leafInput.getOperatorTag() == LogicalOperatorTag.SELECT) { |
| SelectOperator selOper = (SelectOperator) leafInput; |
| ILogicalExpression expr = selOper.getCondition().getValue(); |
| |
| List<Mutable<ILogicalExpression>> conjs = new ArrayList<>(); |
| |
| conjs.clear(); |
| if (expr.splitIntoConjuncts(conjs)) { |
| conjs.remove(new MutableObject<ILogicalExpression>(ConstantExpression.TRUE)); |
| for (Mutable<ILogicalExpression> conj : conjs) { |
| if (conj.getValue().getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL)) { |
| AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) conj.getValue(); |
| // remove any annotations that may have been here from other parts of the code. We know we want a datascan. |
| afce.removeAnnotation(SkipSecondaryIndexSearchExpressionAnnotation.class); |
| afce.putAnnotation(SkipSecondaryIndexSearchExpressionAnnotation.INSTANCE_ANY_INDEX); |
| } |
| } |
| } else { |
| if ((expr.getExpressionTag().equals(LogicalExpressionTag.FUNCTION_CALL))) { |
| AbstractFunctionCallExpression afce = (AbstractFunctionCallExpression) expr; |
| // remove any annotations that may have been here from other parts of the code. We know we want a datascan. |
| afce.removeAnnotation(SkipSecondaryIndexSearchExpressionAnnotation.class); |
| afce.putAnnotation(SkipSecondaryIndexSearchExpressionAnnotation.INSTANCE_ANY_INDEX); |
| } |
| } |
| } |
| } |
| |
| // This is for single table queries |
| private void buildNewTree(PlanNode plan, |
| HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) { |
| ILogicalOperator leftInput = joinLeafInputsHashMap.get(plan.getEmptyTupleSourceOp()); |
| skipAllIndexes(plan, leftInput); |
| ILogicalOperator selOp = findSelectOrDataScan(leftInput); |
| if (selOp != null) { |
| addCardCostAnnotations(selOp, plan); |
| } |
| addCardCostAnnotations(findDataSourceScanOperator(leftInput), plan); |
| } |
| |
| // This one is for join queries |
| private void buildNewTree(PlanNode plan, HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, |
| List<ILogicalOperator> joinOps, MutableInt totalNumberOfJoins) { |
| // we have to move the inputs in op around so that they match the tree structure in pn |
| // we use the existing joinOps and switch the leafInputs appropriately. |
| List<PlanNode> allPlans = joinEnum.getAllPlans(); |
| int leftIndex = plan.getLeftPlanIndex(); |
| int rightIndex = plan.getRightPlanIndex(); |
| PlanNode leftPlan = allPlans.get(leftIndex); |
| PlanNode rightPlan = allPlans.get(rightIndex); |
| ILogicalOperator joinOp = joinOps.get(totalNumberOfJoins.intValue()); |
| |
| if (plan.IsJoinNode()) { |
| AbstractBinaryJoinOperator abJoinOp = (AbstractBinaryJoinOperator) joinOp; |
| ILogicalExpression expr = plan.getJoinExpr(); |
| abJoinOp.getCondition().setValue(expr); |
| // add the annotations |
| if (plan.getJoinOp() == PlanNode.JoinMethod.INDEX_NESTED_LOOP_JOIN) { |
| // this annotation is needed for the physical optimizer to replace this with the unnest operator later |
| AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) expr; |
| removeJoinAnnotations(afcExpr); |
| setAnnotation(afcExpr, IndexedNLJoinExpressionAnnotation.INSTANCE_ANY_INDEX); |
| } else if (plan.getJoinOp() == PlanNode.JoinMethod.HYBRID_HASH_JOIN |
| || plan.getJoinOp() == PlanNode.JoinMethod.BROADCAST_HASH_JOIN |
| || plan.getJoinOp() == PlanNode.JoinMethod.CARTESIAN_PRODUCT_JOIN) { |
| if (plan.getJoinOp() == PlanNode.JoinMethod.BROADCAST_HASH_JOIN) { |
| // Broadcast the right branch. |
| BroadcastExpressionAnnotation bcast = |
| new BroadcastExpressionAnnotation(plan.side == HashJoinExpressionAnnotation.BuildSide.RIGHT |
| ? BroadcastExpressionAnnotation.BroadcastSide.RIGHT |
| : BroadcastExpressionAnnotation.BroadcastSide.LEFT); |
| AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) expr; |
| removeJoinAnnotations(afcExpr); |
| setAnnotation(afcExpr, bcast); |
| } else if (plan.getJoinOp() == PlanNode.JoinMethod.HYBRID_HASH_JOIN) { |
| HashJoinExpressionAnnotation hjAnnotation = new HashJoinExpressionAnnotation(plan.side); |
| AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) expr; |
| removeJoinAnnotations(afcExpr); |
| setAnnotation(afcExpr, hjAnnotation); |
| } else { |
| if (expr != ConstantExpression.TRUE) { |
| AbstractFunctionCallExpression afcExpr = (AbstractFunctionCallExpression) expr; |
| removeJoinAnnotations(afcExpr); |
| } |
| } |
| } |
| addCardCostAnnotations(joinOp, plan); |
| } |
| |
| if (leftPlan.IsScanNode()) { |
| // leaf |
| ILogicalOperator leftInput = joinLeafInputsHashMap.get(leftPlan.getEmptyTupleSourceOp()); |
| skipAllIndexes(leftPlan, leftInput); |
| ILogicalOperator selOp = findSelectOrDataScan(leftInput); |
| if (selOp != null) { |
| addCardCostAnnotations(selOp, leftPlan); |
| } |
| joinOp.getInputs().get(0).setValue(leftInput); |
| addCardCostAnnotations(findDataSourceScanOperator(leftInput), leftPlan); |
| } else { |
| // join |
| totalNumberOfJoins.increment(); |
| ILogicalOperator leftInput = joinOps.get(totalNumberOfJoins.intValue()); |
| joinOp.getInputs().get(0).setValue(leftInput); |
| buildNewTree(allPlans.get(leftIndex), joinLeafInputsHashMap, joinOps, totalNumberOfJoins); |
| } |
| |
| if (rightPlan.IsScanNode()) { |
| // leaf |
| ILogicalOperator rightInput = joinLeafInputsHashMap.get(rightPlan.getEmptyTupleSourceOp()); |
| skipAllIndexes(rightPlan, rightInput); |
| ILogicalOperator selOp = findSelectOrDataScan(rightInput); |
| if (selOp != null) { |
| addCardCostAnnotations(selOp, rightPlan); |
| } |
| joinOp.getInputs().get(1).setValue(rightInput); |
| addCardCostAnnotations(findDataSourceScanOperator(rightInput), rightPlan); |
| } else { |
| // join |
| totalNumberOfJoins.increment(); |
| ILogicalOperator rightInput = joinOps.get(totalNumberOfJoins.intValue()); |
| joinOp.getInputs().get(1).setValue(rightInput); |
| buildNewTree(allPlans.get(rightIndex), joinLeafInputsHashMap, joinOps, totalNumberOfJoins); |
| } |
| } |
| |
| // in some very rare cases, there is an internal edge that has an assign statement such as $$var = 20 but this variable |
| // is not used anywhere in the current join graph but is used outside the current join graph. So we add this assign to the top of |
| // our plan, so the rest of the code will be happy. Strange that this assign appears in the join graph. |
| private ILogicalOperator addConstantInternalEdgesAtTheTop(ILogicalOperator op, |
| List<ILogicalOperator> internalEdges) { |
| if (internalEdges.size() == 0) { |
| return op; |
| } |
| ILogicalOperator root = op; |
| for (ILogicalOperator ie : internalEdges) { |
| // this will be an Assign, so no need to check |
| AssignOperator aOp = (AssignOperator) ie; |
| aOp.getInputs().get(0).setValue(root); |
| root = aOp; |
| } |
| return root; |
| } |
| |
| public static void printPlan(IPlanPrettyPrinter pp, AbstractLogicalOperator op, String text) |
| throws AlgebricksException { |
| if (LOGGER.isTraceEnabled()) { |
| pp.reset(); |
| pp.printOperator(op, true, false); |
| LOGGER.trace("---------------------------- {}\n{}\n----------------------------", text, pp); |
| } |
| } |
| |
| private void printLeafPlans(IPlanPrettyPrinter pp, |
| HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap) throws AlgebricksException { |
| Iterator<Map.Entry<EmptyTupleSourceOperator, ILogicalOperator>> li = |
| joinLeafInputsHashMap.entrySet().iterator(); |
| int i = 0; |
| while (li.hasNext()) { |
| Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> pair = li.next(); |
| ILogicalOperator element = pair.getValue(); |
| printPlan(pp, (AbstractLogicalOperator) element, "Printing Leaf Input" + i); |
| i++; |
| } |
| } |
| |
| // for every internal edge assign (again assuming only 1 for now), find the corresponding leafInput and place the assign |
| // on top of that LeafInput. Modify the joinLeafInputsHashMap as well. |
| private void pushAssignsIntoLeafInputs(HashMap<EmptyTupleSourceOperator, ILogicalOperator> joinLeafInputsHashMap, |
| List<ILogicalOperator> internalEdges) throws AlgebricksException { |
| |
| for (Map.Entry<EmptyTupleSourceOperator, ILogicalOperator> mapElement : joinLeafInputsHashMap.entrySet()) { |
| ILogicalOperator joinLeafInput = mapElement.getValue(); |
| EmptyTupleSourceOperator ets = mapElement.getKey(); |
| int internalEdgeNumber = findInternalEdge(joinLeafInput, internalEdges); |
| if (internalEdgeNumber != -1) { |
| joinLeafInput = addAssignToLeafInput(joinLeafInput, internalEdges.get(internalEdgeNumber)); |
| joinLeafInputsHashMap.put(ets, joinLeafInput); |
| internalEdges.remove(internalEdgeNumber); // no longer needed |
| } |
| } |
| |
| } |
| |
| private boolean substituteVarOnce(ILogicalExpression exp, LogicalVariable oldVar, LogicalVariable newVar) { |
| switch (exp.getExpressionTag()) { |
| case FUNCTION_CALL: |
| AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) exp; |
| for (int i = 0; i < fun.getArguments().size(); i++) { |
| ILogicalExpression arg = fun.getArguments().get(i).getValue(); |
| if (substituteVarOnce(arg, oldVar, newVar)) { |
| return true; |
| } |
| } |
| return false; |
| case VARIABLE: |
| VariableReferenceExpression varExpr = (VariableReferenceExpression) exp; |
| if (varExpr.getVariableReference().equals(oldVar)) { |
| varExpr.setVariable(newVar); |
| return true; |
| } |
| return false; |
| default: |
| return false; |
| } |
| } |
| |
| // check to see if every dataset has a sample. If not, CBO code cannot run. A warning message must be issued as well. |
| private boolean doAllDataSourcesHaveSamples( |
| List<Pair<EmptyTupleSourceOperator, DataSourceScanOperator>> emptyTupleAndDataSourceOps, |
| IOptimizationContext context) throws AlgebricksException { |
| for (Pair<EmptyTupleSourceOperator, DataSourceScanOperator> emptyTupleAndDataSourceOp : emptyTupleAndDataSourceOps) { |
| if (emptyTupleAndDataSourceOp.getSecond() != null) { |
| DataSourceScanOperator scanOp = emptyTupleAndDataSourceOp.getSecond(); |
| Index index = joinEnum.getStatsHandle().findSampleIndex(scanOp, context); |
| if (index == null) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| } |