blob: fa5000ede9ef5db833bfae6b308fa29881efa95f [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.algebricks.rewriter.rules;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
/**
* Complex rewrite rule for producing joins from unnests.
* This rule is limited to creating left-deep trees.
*/
public class ComplexUnnestToProductRule implements IAlgebraicRewriteRule {
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
return false;
}
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (op.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN
&& op.getOperatorTag() != LogicalOperatorTag.UNNEST) {
return false;
}
// We may pull selects above the join we create in order to eliminate possible dependencies between
// the outer and inner input plans of the join.
List<ILogicalOperator> topSelects = new ArrayList<ILogicalOperator>();
// Keep track of the operators and used variables participating in the inner input plan.
HashSet<LogicalVariable> innerUsedVars = new HashSet<LogicalVariable>();
List<ILogicalOperator> innerOps = new ArrayList<ILogicalOperator>();
HashSet<LogicalVariable> outerUsedVars = new HashSet<LogicalVariable>();
List<ILogicalOperator> outerOps = new ArrayList<ILogicalOperator>();
innerOps.add(op);
VariableUtilities.getUsedVariables(op, innerUsedVars);
Mutable<ILogicalOperator> opRef2 = op.getInputs().get(0);
AbstractLogicalOperator op2 = (AbstractLogicalOperator) opRef2.getValue();
// Find an unnest or join and partition the plan between the first unnest and that operator into independent parts.
if (!findPlanPartition(op2, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, false)) {
// We could not find an unnest or join.
return false;
}
// The last operator must be an unnest or join.
AbstractLogicalOperator unnestOrJoin = (AbstractLogicalOperator) outerOps.get(outerOps.size() - 1);
ILogicalOperator outerRoot = null;
ILogicalOperator innerRoot = null;
EmptyTupleSourceOperator ets = new EmptyTupleSourceOperator();
// If we found a join, simply use it as the outer root.
if (unnestOrJoin.getOperatorTag() != LogicalOperatorTag.INNERJOIN
&& unnestOrJoin.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
// We've found a second unnest. First, sanity check that the unnest does not produce any vars that are used by the plan above (until the first unnest).
List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getProducedVariables(unnestOrJoin, producedVars);
for (LogicalVariable producedVar : producedVars) {
if (innerUsedVars.contains(producedVar)) {
return false;
}
}
// Continue finding a partitioning of the plan such that the inner and outer partitions are independent, in order to feed a join.
// Now, we look below the second unnest or join.
VariableUtilities.getUsedVariables(unnestOrJoin, outerUsedVars);
AbstractLogicalOperator unnestChild = (AbstractLogicalOperator) unnestOrJoin.getInputs().get(0).getValue();
if (!findPlanPartition(unnestChild, innerUsedVars, outerUsedVars, innerOps, outerOps, topSelects, true)) {
// We could not find a suitable partitioning.
return false;
}
}
innerRoot = buildOperatorChain(innerOps, ets, context);
context.computeAndSetTypeEnvironmentForOperator(innerRoot);
outerRoot = buildOperatorChain(outerOps, null, context);
context.computeAndSetTypeEnvironmentForOperator(outerRoot);
InnerJoinOperator product = new InnerJoinOperator(
new MutableObject<ILogicalExpression>(ConstantExpression.TRUE));
// Outer branch.
product.getInputs().add(new MutableObject<ILogicalOperator>(outerRoot));
// Inner branch.
product.getInputs().add(new MutableObject<ILogicalOperator>(innerRoot));
context.computeAndSetTypeEnvironmentForOperator(product);
// Put the selects on top of the join.
ILogicalOperator topOp = product;
if (!topSelects.isEmpty()) {
topOp = buildOperatorChain(topSelects, product, context);
}
// Plug the selects + product in the plan.
opRef.setValue(topOp);
context.computeAndSetTypeEnvironmentForOperator(topOp);
return true;
}
private ILogicalOperator buildOperatorChain(List<ILogicalOperator> ops, ILogicalOperator bottomOp,
IOptimizationContext context) throws AlgebricksException {
ILogicalOperator root = ops.get(0);
ILogicalOperator prevOp = root;
for (int i = 1; i < ops.size(); i++) {
ILogicalOperator inputOp = ops.get(i);
prevOp.getInputs().clear();
prevOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
prevOp = inputOp;
}
if (bottomOp != null) {
context.computeAndSetTypeEnvironmentForOperator(bottomOp);
prevOp.getInputs().clear();
prevOp.getInputs().add(new MutableObject<ILogicalOperator>(bottomOp));
}
return root;
}
private boolean findPlanPartition(AbstractLogicalOperator op, HashSet<LogicalVariable> innerUsedVars,
HashSet<LogicalVariable> outerUsedVars, List<ILogicalOperator> innerOps, List<ILogicalOperator> outerOps,
List<ILogicalOperator> topSelects, boolean belowSecondUnnest) throws AlgebricksException {
if (belowSecondUnnest && innerUsedVars.isEmpty()) {
// Trivially joinable.
return true;
}
if (!belowSecondUnnest) {
// Bail on the following operators.
switch (op.getOperatorTag()) {
case AGGREGATE:
case SUBPLAN:
case GROUP:
case UNNEST_MAP:
return false;
}
}
switch (op.getOperatorTag()) {
case UNNEST:
case DATASOURCESCAN: {
// We may have reached this state by descending through a subplan.
outerOps.add(op);
return true;
}
case INNERJOIN:
case LEFTOUTERJOIN: {
// Make sure that no variables that are live under this join are needed by the inner.
List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
VariableUtilities.getLiveVariables(op, liveVars);
for (LogicalVariable liveVar : liveVars) {
if (innerUsedVars.contains(liveVar)) {
return false;
}
}
outerOps.add(op);
return true;
}
case SELECT: {
// Remember this select to pulling it above the join.
if (innerUsedVars.isEmpty()) {
outerOps.add(op);
} else {
topSelects.add(op);
}
break;
}
case PROJECT: {
// Throw away projects from the plan since we are pulling selects up.
break;
}
case EMPTYTUPLESOURCE:
case NESTEDTUPLESOURCE: {
if (belowSecondUnnest) {
// We have successfully partitioned the plan into independent parts to be plugged into the join.
return true;
} else {
// We could not find a second unnest or a join.
return false;
}
}
default: {
// The inner is trivially independent.
if (!belowSecondUnnest && innerUsedVars.isEmpty()) {
outerOps.add(op);
break;
}
// Examine produced vars to determine which partition uses them.
List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getProducedVariables(op, producedVars);
int outerMatches = 0;
int innerMatches = 0;
for (LogicalVariable producedVar : producedVars) {
if (outerUsedVars.contains(producedVar)) {
outerMatches++;
}
if (innerUsedVars.contains(producedVar)) {
innerMatches++;
}
}
HashSet<LogicalVariable> targetUsedVars = null;
if (outerMatches == producedVars.size() && !producedVars.isEmpty()) {
// All produced vars used by outer partition.
outerOps.add(op);
targetUsedVars = outerUsedVars;
}
if (innerMatches == producedVars.size() && !producedVars.isEmpty()) {
// All produced vars used by inner partition.
innerOps.add(op);
targetUsedVars = innerUsedVars;
}
if (innerMatches == 0 && outerMatches == 0) {
// Op produces variables that are not used in the part of the plan we've seen (or it doesn't produce any vars).
// Try to figure out where it belongs by analyzing the used variables.
List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
VariableUtilities.getUsedVariables(op, usedVars);
for (LogicalVariable usedVar : usedVars) {
boolean canBreak = false;
if (outerUsedVars.contains(usedVar)) {
outerOps.add(op);
targetUsedVars = outerUsedVars;
canBreak = true;
}
if (innerUsedVars.contains(usedVar)) {
innerOps.add(op);
targetUsedVars = innerUsedVars;
canBreak = true;
}
if (canBreak) {
break;
}
}
// TODO: For now we bail here, but we could remember such ops and determine their target partition at a later point.
if (targetUsedVars == null) {
return false;
}
} else {
// The current operator produces variables that are used by both partitions, so the inner and outer are not independent and, therefore, we cannot create a join.
// TODO: We may still be able to split the operator to create a viable partitioning.
return false;
}
// Update used variables of partition that op belongs to.
if (op.hasNestedPlans() && op.getOperatorTag() != LogicalOperatorTag.SUBPLAN) {
AbstractOperatorWithNestedPlans opWithNestedPlans = (AbstractOperatorWithNestedPlans) op;
opWithNestedPlans.getUsedVariablesExceptNestedPlans(targetUsedVars);
} else {
VariableUtilities.getUsedVariables(op, targetUsedVars);
}
break;
}
}
if (!op.hasInputs()) {
if (!belowSecondUnnest) {
// We could not find a second unnest or a join.
return false;
} else {
// We have successfully partitioned the plan into independent parts to be plugged into the join.
return true;
}
}
return findPlanPartition((AbstractLogicalOperator) op.getInputs().get(0).getValue(), innerUsedVars,
outerUsedVars, innerOps, outerOps, topSelects, belowSecondUnnest);
}
}