blob: 089bbc526925d09c8061ee4f35fdda415215a743 [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.algebricks.rewriter.rules;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;
import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
/**
* Factors out common sub-expressions by assigning them to a variables, and replacing the common sub-expressions with references to those variables.
*
* Preconditions/Assumptions:
* Assumes no projects are in the plan. This rule ignores variable reference expressions and constants (other rules deal with those separately).
*
* Postconditions/Examples:
* Plan with extracted sub-expressions. Generates one assign operator per extracted expression.
*
* Example 1 - Simple Arithmetic Example (simplified)
*
* Before plan:
* assign [$$1] <- [5 + 6 - 10]
* assign [$$0] <- [5 + 6 + 30]
*
* After plan:
* assign [$$1] <- [$$5 - 10]
* assign [$$0] <- [$$5 + 30]
* assign [$$5] <- [5 + 6]
*
* Example 2 - Cleaning up 'Distinct By' (simplified)
*
* Before plan: (notice how $$0 is not live after the distinct)
* assign [$$3] <- [field-access($$0, 1)]
* distinct ([%0->$$5])
* assign [$$5] <- [field-access($$0, 1)]
* unnest $$0 <- [scan-dataset]
*
* After plan: (notice how the issue of $$0 is fixed)
* assign [$$3] <- [$$5]
* distinct ([$$5])
* assign [$$5] <- [field-access($$0, 1)]
* unnest $$0 <- [scan-dataset]
*
* Example 3 - Pulling Common Expressions Above Joins (simplified)
*
* Before plan:
* assign [$$9] <- funcZ(funcY($$8))
* join (funcX(funcY($$8)))
*
* After plan:
* assign [$$9] <- funcZ($$10))
* select (funcX($$10))
* assign [$$10] <- [funcY($$8)]
* join (TRUE)
*/
public class ExtractCommonExpressionsRule implements IAlgebraicRewriteRule {
private final List<ILogicalExpression> originalAssignExprs = new ArrayList<ILogicalExpression>();
private final CommonExpressionSubstitutionVisitor substVisitor = new CommonExpressionSubstitutionVisitor();
private final Map<ILogicalExpression, ExprEquivalenceClass> exprEqClassMap = new HashMap<ILogicalExpression, ExprEquivalenceClass>();
// Set of operators for which common subexpression elimination should not be performed.
private static final Set<LogicalOperatorTag> ignoreOps = new HashSet<LogicalOperatorTag>();
static {
ignoreOps.add(LogicalOperatorTag.UNNEST);
ignoreOps.add(LogicalOperatorTag.UNNEST_MAP);
ignoreOps.add(LogicalOperatorTag.ORDER);
ignoreOps.add(LogicalOperatorTag.PROJECT);
ignoreOps.add(LogicalOperatorTag.AGGREGATE);
ignoreOps.add(LogicalOperatorTag.RUNNINGAGGREGATE);
}
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
return false;
}
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
exprEqClassMap.clear();
substVisitor.setContext(context);
boolean modified = removeCommonExpressions(opRef, context);
if (modified) {
context.computeAndSetTypeEnvironmentForOperator(opRef.getValue());
}
return modified;
}
private void updateEquivalenceClassMap(LogicalVariable lhs, Mutable<ILogicalExpression> rhsExprRef, ILogicalExpression rhsExpr, ILogicalOperator op) {
ExprEquivalenceClass exprEqClass = exprEqClassMap.get(rhsExpr);
if (exprEqClass == null) {
exprEqClass = new ExprEquivalenceClass(op, rhsExprRef);
exprEqClassMap.put(rhsExpr, exprEqClass);
}
exprEqClass.setVariable(lhs);
}
private boolean removeCommonExpressions(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
if (context.checkIfInDontApplySet(this, opRef.getValue())) {
return false;
}
boolean modified = false;
// Recurse into children.
for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
if (removeCommonExpressions(inputOpRef, context)) {
modified = true;
}
}
// TODO: Deal with replicate properly. Currently, we just clear the expr equivalence map, since we want to avoid incorrect expression replacement
// (the resulting new variables should be assigned live below a replicate).
if (op.getOperatorTag() == LogicalOperatorTag.REPLICATE) {
exprEqClassMap.clear();
return modified;
}
// Exclude these operators.
if (ignoreOps.contains(op.getOperatorTag())) {
return modified;
}
// Remember a copy of the original assign expressions, so we can add them to the equivalence class map
// after replacing expressions within the assign operator itself.
if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) op;
originalAssignExprs.clear();
int numVars = assignOp.getVariables().size();
for (int i = 0; i < numVars; i++) {
Mutable<ILogicalExpression> exprRef = assignOp.getExpressions().get(i);
ILogicalExpression expr = exprRef.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE
|| expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
continue;
}
originalAssignExprs.add(expr.cloneExpression());
}
}
// Perform common subexpression elimination.
substVisitor.setOperator(op);
if (op.acceptExpressionTransform(substVisitor)) {
modified = true;
}
// Update equivalence class map.
if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
AssignOperator assignOp = (AssignOperator) op;
int numVars = assignOp.getVariables().size();
for (int i = 0; i < numVars; i++) {
Mutable<ILogicalExpression> exprRef = assignOp.getExpressions().get(i);
ILogicalExpression expr = exprRef.getValue();
if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE
|| expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
continue;
}
// Update equivalence class map.
LogicalVariable lhs = assignOp.getVariables().get(i);
updateEquivalenceClassMap(lhs, exprRef, exprRef.getValue(), op);
// Update equivalence class map with original assign expression.
updateEquivalenceClassMap(lhs, exprRef, originalAssignExprs.get(i), op);
}
}
// TODO: For now do not perform replacement in nested plans
// due to the complication of figuring out whether the firstOp in an equivalence class is within a subplan,
// and the resulting variable will not be visible to the outside.
// Since subplans should be eliminated in most cases, this behavior is acceptable for now.
/*
if (op.hasNestedPlans()) {
AbstractOperatorWithNestedPlans opWithNestedPlan = (AbstractOperatorWithNestedPlans) op;
for (ILogicalPlan nestedPlan : opWithNestedPlan.getNestedPlans()) {
for (Mutable<ILogicalOperator> rootRef : nestedPlan.getRoots()) {
if (removeCommonExpressions(rootRef, context)) {
modified = true;
}
}
}
}
*/
if (modified) {
context.computeAndSetTypeEnvironmentForOperator(op);
context.addToDontApplySet(this, op);
}
return modified;
}
private class CommonExpressionSubstitutionVisitor implements ILogicalExpressionReferenceTransform {
private final Set<LogicalVariable> liveVars = new HashSet<LogicalVariable>();
private final List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
private IOptimizationContext context;
private ILogicalOperator op;
public void setContext(IOptimizationContext context) {
this.context = context;
}
public void setOperator(ILogicalOperator op) throws AlgebricksException {
this.op = op;
liveVars.clear();
usedVars.clear();
}
@Override
public boolean transform(Mutable<ILogicalExpression> exprRef) throws AlgebricksException {
if (liveVars.isEmpty() && usedVars.isEmpty()) {
VariableUtilities.getLiveVariables(op, liveVars);
VariableUtilities.getUsedVariables(op, usedVars);
}
AbstractLogicalExpression expr = (AbstractLogicalExpression) exprRef.getValue();
boolean modified = false;
ExprEquivalenceClass exprEqClass = exprEqClassMap.get(expr);
if (exprEqClass != null) {
// Replace common subexpression with existing variable.
if (exprEqClass.variableIsSet()) {
// Check if the replacing variable is live at this op.
// However, if the op is already using variables that are not live, then a replacement may enable fixing the plan.
// This behavior is necessary to, e.g., properly deal with distinct by.
// Also just replace the expr if we are replacing common exprs from within the same operator.
if (liveVars.contains(exprEqClass.getVariable()) || !liveVars.containsAll(usedVars)
|| op == exprEqClass.getFirstOperator()) {
exprRef.setValue(new VariableReferenceExpression(exprEqClass.getVariable()));
// Do not descend into children since this expr has been completely replaced.
return true;
}
} else {
if (assignCommonExpression(exprEqClass, expr)) {
exprRef.setValue(new VariableReferenceExpression(exprEqClass.getVariable()));
// Do not descend into children since this expr has been completely replaced.
return true;
}
}
} else {
if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE
&& expr.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
exprEqClass = new ExprEquivalenceClass(op, exprRef);
exprEqClassMap.put(expr, exprEqClass);
}
}
// Descend into function arguments.
if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
if (transform(arg)) {
modified = true;
}
}
}
return modified;
}
private boolean assignCommonExpression(ExprEquivalenceClass exprEqClass, ILogicalExpression expr) throws AlgebricksException {
AbstractLogicalOperator firstOp = (AbstractLogicalOperator) exprEqClass.getFirstOperator();
Mutable<ILogicalExpression> firstExprRef = exprEqClass.getFirstExpression();
if (firstOp.getOperatorTag() == LogicalOperatorTag.INNERJOIN || firstOp.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) {
// Do not extract common expressions from within the same join operator.
if (firstOp == op) {
return false;
}
AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) firstOp;
Mutable<ILogicalExpression> joinCond = joinOp.getCondition();
ILogicalExpression enclosingExpr = getEnclosingExpression(joinCond, firstExprRef.getValue());
if (enclosingExpr == null) {
// No viable enclosing expression that we can pull out from the join.
return false;
}
// Place a Select operator beneath op that contains the enclosing expression.
SelectOperator selectOp = new SelectOperator(new MutableObject<ILogicalExpression>(enclosingExpr));
selectOp.getInputs().add(new MutableObject<ILogicalOperator>(op.getInputs().get(0).getValue()));
op.getInputs().get(0).setValue(selectOp);
// Set firstOp to be the select below op, since we want to assign the common subexpr there.
firstOp = (AbstractLogicalOperator) selectOp;
} else if (firstOp.getInputs().size() > 1) {
// Bail for any non-join operator with multiple inputs.
return false;
}
LogicalVariable newVar = context.newVar();
AssignOperator newAssign = new AssignOperator(newVar, new MutableObject<ILogicalExpression>(firstExprRef.getValue().cloneExpression()));
// Place assign below firstOp.
newAssign.getInputs().add(new MutableObject<ILogicalOperator>(firstOp.getInputs().get(0).getValue()));
newAssign.setExecutionMode(firstOp.getExecutionMode());
firstOp.getInputs().get(0).setValue(newAssign);
// Replace original expr with variable reference, and set var in expression equivalence class.
firstExprRef.setValue(new VariableReferenceExpression(newVar));
exprEqClass.setVariable(newVar);
context.computeAndSetTypeEnvironmentForOperator(newAssign);
context.computeAndSetTypeEnvironmentForOperator(firstOp);
return true;
}
private ILogicalExpression getEnclosingExpression(Mutable<ILogicalExpression> conditionExprRef, ILogicalExpression commonSubExpr) {
ILogicalExpression conditionExpr = conditionExprRef.getValue();
if (conditionExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return null;
}
if (isEqJoinCondition(commonSubExpr)) {
// Do not eliminate the common expression if we could use it for an equi-join.
return null;
}
AbstractFunctionCallExpression conditionFuncExpr = (AbstractFunctionCallExpression) conditionExpr;
// Boolean expression that encloses the common subexpression.
ILogicalExpression enclosingBoolExpr = null;
// We are not dealing with arbitrarily nested and/or expressions here.
FunctionIdentifier funcIdent = conditionFuncExpr.getFunctionIdentifier();
if (funcIdent.equals(AlgebricksBuiltinFunctions.AND) || funcIdent.equals(AlgebricksBuiltinFunctions.OR)) {
Iterator<Mutable<ILogicalExpression>> argIter = conditionFuncExpr.getArguments().iterator();
while (argIter.hasNext()) {
Mutable<ILogicalExpression> argRef = argIter.next();
if (containsExpr(argRef.getValue(), commonSubExpr)) {
enclosingBoolExpr = argRef.getValue();
// Remove the enclosing expression from the argument list.
// We are going to pull it out into a new select operator.
argIter.remove();
break;
}
}
// If and/or only has a single argument left, pull it out and remove the and/or function.
if (conditionFuncExpr.getArguments().size() == 1) {
conditionExprRef.setValue(conditionFuncExpr.getArguments().get(0).getValue());
}
} else {
if (!containsExpr(conditionExprRef.getValue(), commonSubExpr)) {
return null;
}
enclosingBoolExpr = conditionFuncExpr;
// Replace the enclosing expression with TRUE.
conditionExprRef.setValue(ConstantExpression.TRUE);
}
return enclosingBoolExpr;
}
}
private boolean containsExpr(ILogicalExpression expr, ILogicalExpression searchExpr) {
if (expr == searchExpr) {
return true;
}
if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return false;
}
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
if (containsExpr(argRef.getValue(), searchExpr)) {
return true;
}
}
return false;
}
private boolean isEqJoinCondition(ILogicalExpression expr) {
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
if (funcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ)) {
ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
&& arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
return true;
}
}
return false;
}
private final class ExprEquivalenceClass {
// First operator in which expression is used.
private final ILogicalOperator firstOp;
// Reference to expression in first op.
private final Mutable<ILogicalExpression> firstExprRef;
// Variable that this expression has been assigned to.
private LogicalVariable var;
public ExprEquivalenceClass(ILogicalOperator firstOp, Mutable<ILogicalExpression> firstExprRef) {
this.firstOp = firstOp;
this.firstExprRef = firstExprRef;
}
public ILogicalOperator getFirstOperator() {
return firstOp;
}
public Mutable<ILogicalExpression> getFirstExpression() {
return firstExprRef;
}
public void setVariable(LogicalVariable var) {
this.var = var;
}
public LogicalVariable getVariable() {
return var;
}
public boolean variableIsSet() {
return var != null;
}
}
}