blob: c90b3f1cfe4eb7c36a14b88fd4f566b00069a6c2 [file] [log] [blame]
package edu.uci.ics.asterix.optimizer.rules;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import edu.uci.ics.asterix.aql.util.FunctionUtils;
import edu.uci.ics.asterix.om.base.ADouble;
import edu.uci.ics.asterix.om.base.AFloat;
import edu.uci.ics.asterix.om.base.AInt32;
import edu.uci.ics.asterix.om.base.IAObject;
import edu.uci.ics.asterix.om.constants.AsterixConstantValue;
import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
/**
* Looks for a select operator, containing a condition:
*
* similarity-function GE/GT/LE/LE constant/variable
*
* Rewrites the select condition (and possibly the assign expr) with the equivalent similarity-check function.
*
*/
public class SimilarityCheckRule implements IAlgebraicRewriteRule {
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
// Look for select.
if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
return false;
}
SelectOperator select = (SelectOperator) op;
Mutable<ILogicalExpression> condExpr = select.getCondition();
// Gather assigns below this select.
List<AssignOperator> assigns = new ArrayList<AssignOperator>();
AbstractLogicalOperator childOp = (AbstractLogicalOperator) select.getInputs().get(0).getValue();
// Skip selects.
while (childOp.getOperatorTag() == LogicalOperatorTag.SELECT) {
childOp = (AbstractLogicalOperator) childOp.getInputs().get(0).getValue();
}
while (childOp.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
assigns.add((AssignOperator) childOp);
childOp = (AbstractLogicalOperator) childOp.getInputs().get(0).getValue();
}
return replaceSelectConditionExprs(condExpr, assigns, context);
}
private boolean replaceSelectConditionExprs(Mutable<ILogicalExpression> expRef, List<AssignOperator> assigns, IOptimizationContext context) throws AlgebricksException {
ILogicalExpression expr = expRef.getValue();
if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
return false;
}
AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
// Recursively traverse conjuncts.
// TODO: Ignore disjuncts for now, because some replacements may be invalid.
// For example, if the result of the similarity function is used somewhere upstream,
// then we may still need the true similarity value even if the GE/GT/LE/LE comparison returns false.
if (funcIdent == AlgebricksBuiltinFunctions.AND) {
boolean found = true;
for (int i = 0; i < funcExpr.getArguments().size(); ++i) {
found = found && replaceSelectConditionExprs(funcExpr.getArguments().get(i), assigns, context);
}
return found;
}
// Look for GE/GT/LE/LT.
if (funcIdent != AlgebricksBuiltinFunctions.GE && funcIdent != AlgebricksBuiltinFunctions.GT &&
funcIdent != AlgebricksBuiltinFunctions.LE && funcIdent != AlgebricksBuiltinFunctions.LT) {
return false;
}
// One arg should be a function call or a variable, the other a constant.
AsterixConstantValue constVal = null;
ILogicalExpression nonConstExpr = null;
ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
// Normalized GE/GT/LE/LT as if constant was on the right hand side.
FunctionIdentifier normFuncIdent = null;
// One of the args must be a constant.
if (arg1.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
ConstantExpression constExpr = (ConstantExpression) arg1;
constVal = (AsterixConstantValue) constExpr.getValue();
nonConstExpr = arg2;
// Get func ident as if swapping lhs and rhs.
normFuncIdent = getLhsAndRhsSwappedFuncIdent(funcIdent);
} else if (arg2.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
ConstantExpression constExpr = (ConstantExpression) arg2;
constVal = (AsterixConstantValue) constExpr.getValue();
nonConstExpr = arg1;
// Constant is already on rhs, so nothing to be done for normalizedFuncIdent.
normFuncIdent = funcIdent;
} else {
return false;
}
// The other arg is a function call. We can directly replace the select condition with an equivalent similarity check expression.
if (nonConstExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
return replaceWithFunctionCallArg(expRef, normFuncIdent, constVal, (AbstractFunctionCallExpression) nonConstExpr);
}
// The other arg ist a variable. We may have to introduce an assign operator that assigns the result of a similarity-check function to a variable.
if (nonConstExpr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
return replaceWithVariableArg(expRef, normFuncIdent, constVal, (VariableReferenceExpression) nonConstExpr, assigns, context);
}
return false;
}
private boolean replaceWithVariableArg(Mutable<ILogicalExpression> expRef, FunctionIdentifier normFuncIdent,
AsterixConstantValue constVal, VariableReferenceExpression varRefExpr, List<AssignOperator> assigns, IOptimizationContext context) throws AlgebricksException {
// Find variable in assigns to determine its originating function.
LogicalVariable var = varRefExpr.getVariableReference();
Mutable<ILogicalExpression> simFuncExprRef = null;
ScalarFunctionCallExpression simCheckFuncExpr = null;
AssignOperator matchingAssign = null;
for (int i = 0; i < assigns.size(); i++) {
AssignOperator assign = assigns.get(i);
for (int j = 0; j < assign.getVariables().size(); j++) {
// Check if variables match.
if (var != assign.getVariables().get(j)) {
continue;
}
// Check if corresponding expr is a function call.
if (assign.getExpressions().get(j).getValue().getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
continue;
}
simFuncExprRef = assign.getExpressions().get(j);
// Analyze function expression and get equivalent similarity check function.
simCheckFuncExpr = getSimilarityCheckExpr(normFuncIdent, constVal, (AbstractFunctionCallExpression) simFuncExprRef.getValue());
matchingAssign = assign;
break;
}
if (simCheckFuncExpr != null) {
break;
}
}
// Only non-null if we found that varRefExpr refers to an optimizable similarity function call.
if (simCheckFuncExpr != null) {
// Create a new assign under matchingAssign which assigns the result of our similarity-check function to a variable.
LogicalVariable newVar = context.newVar();
AssignOperator newAssign = new AssignOperator(newVar, new MutableObject<ILogicalExpression>(simCheckFuncExpr));
// Hook up inputs.
newAssign.getInputs().add(new MutableObject<ILogicalOperator>(matchingAssign.getInputs().get(0).getValue()));
matchingAssign.getInputs().get(0).setValue(newAssign);
// Replace select condition with a get-item on newVar.
List<Mutable<ILogicalExpression>> selectGetItemArgs = new ArrayList<Mutable<ILogicalExpression>>();
// First arg is a variable reference expr on newVar.
selectGetItemArgs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(newVar)));
// Second arg is the item index to be accessed, here 0.
selectGetItemArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(0)))));
ILogicalExpression selectGetItemExpr = new ScalarFunctionCallExpression(FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.GET_ITEM), selectGetItemArgs);
// Replace the old similarity function call with the new getItemExpr.
expRef.setValue(selectGetItemExpr);
// Replace expr corresponding to original variable in the original assign with a get-item on newVar.
List<Mutable<ILogicalExpression>> assignGetItemArgs = new ArrayList<Mutable<ILogicalExpression>>();
// First arg is a variable reference expr on newVar.
assignGetItemArgs.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(newVar)));
// Second arg is the item index to be accessed, here 1.
assignGetItemArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(1)))));
ILogicalExpression assignGetItemExpr = new ScalarFunctionCallExpression(FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.GET_ITEM), assignGetItemArgs);
// Replace the original assign expr with the get-item expr.
simFuncExprRef.setValue(assignGetItemExpr);
context.computeAndSetTypeEnvironmentForOperator(newAssign);
context.computeAndSetTypeEnvironmentForOperator(matchingAssign);
return true;
}
return false;
}
private boolean replaceWithFunctionCallArg(Mutable<ILogicalExpression> expRef, FunctionIdentifier normFuncIdent,
AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) {
// Analyze func expr to see if it is an optimizable similarity function.
ScalarFunctionCallExpression simCheckFuncExpr = getSimilarityCheckExpr(normFuncIdent, constVal, funcExpr);
// Replace the expr in the select condition.
if (simCheckFuncExpr != null) {
// Get item 0 from var.
List<Mutable<ILogicalExpression>> getItemArgs = new ArrayList<Mutable<ILogicalExpression>>();
// First arg is the similarity-check function call.
getItemArgs.add(new MutableObject<ILogicalExpression>(simCheckFuncExpr));
// Second arg is the item index to be accessed.
getItemArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(new AInt32(0)))));
ILogicalExpression getItemExpr = new ScalarFunctionCallExpression(FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.GET_ITEM), getItemArgs);
// Replace the old similarity function call with the new getItemExpr.
expRef.setValue(getItemExpr);
return true;
}
return false;
}
private ScalarFunctionCallExpression getSimilarityCheckExpr(FunctionIdentifier normFuncIdent,
AsterixConstantValue constVal, AbstractFunctionCallExpression funcExpr) {
// Remember args from original similarity function to add them to the similarity-check function later.
ArrayList<Mutable<ILogicalExpression>> similarityArgs = null;
ScalarFunctionCallExpression simCheckFuncExpr = null;
// Look for jaccard function call, and GE or GT.
if (funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.SIMILARITY_JACCARD) {
IAObject jaccThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.GE) {
if (constVal.getObject() instanceof AFloat) {
jaccThresh = constVal.getObject();
} else {
jaccThresh = new AFloat((float)((ADouble) constVal.getObject()).getDoubleValue());
}
} else if (normFuncIdent == AlgebricksBuiltinFunctions.GT) {
float threshVal = 0.0f;
if (constVal.getObject() instanceof AFloat) {
threshVal = ((AFloat) constVal.getObject()).getFloatValue();
} else {
threshVal = (float)((ADouble) constVal.getObject()).getDoubleValue();
}
float f = threshVal + Float.MIN_VALUE;
if (f > 1.0f) f = 1.0f;
jaccThresh = new AFloat(f);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(jaccThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.SIMILARITY_JACCARD_CHECK), similarityArgs);
}
// Look for edit-distance function call, and LE or LT.
if(funcExpr.getFunctionIdentifier() == AsterixBuiltinFunctions.EDIT_DISTANCE) {
AInt32 aInt = (AInt32) constVal.getObject();
AInt32 edThresh;
if (normFuncIdent == AlgebricksBuiltinFunctions.LE) {
edThresh = aInt;
} else if (normFuncIdent == AlgebricksBuiltinFunctions.LT) {
int ed = aInt.getIntegerValue() - 1;
if (ed < 0) ed = 0;
edThresh = new AInt32(ed);
} else {
return null;
}
similarityArgs = new ArrayList<Mutable<ILogicalExpression>>();
similarityArgs.addAll(funcExpr.getArguments());
similarityArgs.add(new MutableObject<ILogicalExpression>(new ConstantExpression(new AsterixConstantValue(edThresh))));
simCheckFuncExpr = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.EDIT_DISTANCE_CHECK), similarityArgs);
}
// Preserve all annotations.
if (simCheckFuncExpr != null) {
simCheckFuncExpr.getAnnotations().putAll(funcExpr.getAnnotations());
}
return simCheckFuncExpr;
}
private FunctionIdentifier getLhsAndRhsSwappedFuncIdent(FunctionIdentifier oldFuncIdent) {
if (oldFuncIdent == AlgebricksBuiltinFunctions.GE) {
return AlgebricksBuiltinFunctions.LE;
}
if (oldFuncIdent == AlgebricksBuiltinFunctions.GT) {
return AlgebricksBuiltinFunctions.LT;
}
if (oldFuncIdent == AlgebricksBuiltinFunctions.LE) {
return AlgebricksBuiltinFunctions.GE;
}
if (oldFuncIdent == AlgebricksBuiltinFunctions.LT) {
return AlgebricksBuiltinFunctions.GT;
}
throw new IllegalStateException();
}
@Override
public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
return false;
}
}