| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| package org.apache.impala.analysis; |
| |
| import java.util.List; |
| |
| import org.apache.impala.catalog.Db; |
| import org.apache.impala.catalog.Function.CompareMode; |
| import org.apache.impala.catalog.PrimitiveType; |
| import org.apache.impala.catalog.ScalarFunction; |
| import org.apache.impala.catalog.Type; |
| import org.apache.impala.common.AnalysisException; |
| import org.apache.impala.common.Reference; |
| import org.apache.impala.thrift.TExprNode; |
| import org.apache.impala.thrift.TExprNodeType; |
| |
| import com.google.common.base.Preconditions; |
| import com.google.common.collect.Lists; |
| |
| /** |
| * Class representing a [NOT] IN predicate. It determines if a specified value |
| * (first child) matches any value in a subquery (second child) or a list |
| * of values (remaining children). |
| */ |
| public class InPredicate extends Predicate { |
| private static final String IN_SET_LOOKUP = "in_set_lookup"; |
| private static final String NOT_IN_SET_LOOKUP = "not_in_set_lookup"; |
| private static final String IN_ITERATE = "in_iterate"; |
| private static final String NOT_IN_ITERATE = "not_in_iterate"; |
| private final boolean isNotIn_; |
| |
| public boolean isNotIn() { return isNotIn_; } |
| |
| public static void initBuiltins(Db db) { |
| for (Type t: Type.getSupportedTypes()) { |
| if (t.isNull()) continue; |
| // TODO we do not support codegen for CHAR and the In predicate must be codegened |
| // because it has variable number of arguments. This will force CHARs to be |
| // cast up to strings; meaning that "in" comparisons will not have CHAR comparison |
| // semantics. |
| if (t.getPrimitiveType() == PrimitiveType.CHAR) continue; |
| |
| String typeString = t.getPrimitiveType().toString().toLowerCase(); |
| if (t.isScalarType(PrimitiveType.VARCHAR)) typeString = "string"; |
| |
| db.addBuiltin(ScalarFunction.createBuiltin(IN_ITERATE, |
| Lists.newArrayList(t, t), true, Type.BOOLEAN, |
| "impala::InPredicate::InIterate", null, null, false)); |
| db.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_ITERATE, |
| Lists.newArrayList(t, t), true, Type.BOOLEAN, |
| "impala::InPredicate::NotInIterate", null, null, false)); |
| |
| String prepareFn = "impala::InPredicate::SetLookupPrepare_" + typeString; |
| String closeFn = "impala::InPredicate::SetLookupClose_" + typeString; |
| |
| db.addBuiltin(ScalarFunction.createBuiltin(IN_SET_LOOKUP, |
| Lists.newArrayList(t, t), true, Type.BOOLEAN, |
| "impala::InPredicate::InSetLookup", prepareFn, closeFn, false)); |
| db.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_SET_LOOKUP, |
| Lists.newArrayList(t, t), true, Type.BOOLEAN, |
| "impala::InPredicate::NotInSetLookup", prepareFn, closeFn, false)); |
| |
| } |
| } |
| |
| // First child is the comparison expr for which we |
| // should check membership in the inList (the remaining children). |
| public InPredicate(Expr compareExpr, List<Expr> inList, boolean isNotIn) { |
| children_.add(compareExpr); |
| children_.addAll(inList); |
| isNotIn_ = isNotIn; |
| } |
| |
| // C'tor for initializing an [NOT] IN predicate with a subquery child. |
| public InPredicate(Expr compareExpr, Expr subquery, boolean isNotIn) { |
| Preconditions.checkNotNull(compareExpr); |
| Preconditions.checkNotNull(subquery); |
| children_.add(compareExpr); |
| children_.add(subquery); |
| isNotIn_ = isNotIn; |
| } |
| |
| /** |
| * Copy c'tor used in clone(). |
| */ |
| protected InPredicate(InPredicate other) { |
| super(other); |
| isNotIn_ = other.isNotIn_; |
| } |
| |
| @Override |
| protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { |
| super.analyzeImpl(analyzer); |
| if (contains(Subquery.class)) { |
| // An [NOT] IN predicate with a subquery must contain two children, the second of |
| // which is a Subquery. |
| if (children_.size() != 2 || !(getChild(1) instanceof Subquery)) { |
| throw new AnalysisException("Unsupported IN predicate with a subquery: " + |
| toSqlImpl()); |
| } |
| Subquery subquery = (Subquery)getChild(1); |
| subquery.getStatement().setIsRuntimeScalar(false); |
| if (!subquery.returnsScalarColumn()) { |
| throw new AnalysisException("Subquery must return a single column: " + |
| subquery.toSql()); |
| } |
| |
| // Ensure that the column in the lhs of the IN predicate and the result of |
| // the subquery are type compatible. No need to perform any |
| // casting at this point. Any casting needed will be performed when the |
| // subquery is unnested. |
| List<Expr> subqueryExprs = subquery.getStatement().getResultExprs(); |
| Expr compareExpr = children_.get(0); |
| Expr subqueryExpr = subqueryExprs.get(0); |
| analyzer.getCompatibleType(compareExpr.getType(), compareExpr, subqueryExpr); |
| } else { |
| Preconditions.checkState(getChildren().size() >= 2); |
| analyzer.castAllToCompatibleType(children_); |
| Type childType = children_.get(0).getType(); |
| |
| if (childType.isNull()) { |
| // Make sure the BE never sees TYPE_NULL by picking an arbitrary type |
| for (int i = 0; i < children_.size(); ++i) { |
| uncheckedCastChild(Type.BOOLEAN, i); |
| } |
| } |
| |
| // Choose SetLookup or Iterate strategy. SetLookup can be used if all the exprs in |
| // the IN list are constant, and is faster than iterating if the IN list is big |
| // enough. |
| boolean allConstant = true; |
| for (int i = 1; i < children_.size(); ++i) { |
| if (!children_.get(i).isConstant()) { |
| allConstant = false; |
| break; |
| } |
| } |
| boolean useSetLookup = allConstant; |
| // Threshold based on InPredicateBenchmark results |
| int setLookupThreshold = children_.get(0).getType().isStringType() ? 2 : 6; |
| if (children_.size() - 1 < setLookupThreshold) useSetLookup = false; |
| |
| // Only lookup fn_ if all subqueries have been rewritten. If the second child is a |
| // subquery, it will have type ArrayType, which cannot be resolved to a builtin |
| // function and will fail analysis. |
| Type[] argTypes = {getChild(0).type_, getChild(1).type_}; |
| if (useSetLookup) { |
| fn_ = getBuiltinFunction(analyzer, isNotIn_ ? NOT_IN_SET_LOOKUP : IN_SET_LOOKUP, |
| argTypes, CompareMode.IS_NONSTRICT_SUPERTYPE_OF); |
| } else { |
| fn_ = getBuiltinFunction(analyzer, isNotIn_ ? NOT_IN_ITERATE : IN_ITERATE, |
| argTypes, CompareMode.IS_NONSTRICT_SUPERTYPE_OF); |
| } |
| Preconditions.checkNotNull(fn_); |
| Preconditions.checkState(fn_.getReturnType().isBoolean()); |
| castForFunctionCall(false, analyzer.isDecimalV2()); |
| } |
| |
| // TODO: Fix selectivity_ for nested predicate |
| Reference<SlotRef> slotRefRef = new Reference<SlotRef>(); |
| Reference<Integer> idxRef = new Reference<Integer>(); |
| if (isSingleColumnPredicate(slotRefRef, idxRef) && idxRef.getRef() == 0 |
| && slotRefRef.getRef().getNumDistinctValues() > 0) { |
| if (isNotIn()) { |
| selectivity_ = 1.0 - ((double) (getChildren().size() - 1) |
| / (double) slotRefRef.getRef().getNumDistinctValues()); |
| } else { |
| selectivity_ = (double) (getChildren().size() - 1) |
| / (double) slotRefRef.getRef().getNumDistinctValues(); |
| } |
| selectivity_ = Math.max(0.0, Math.min(1.0, selectivity_)); |
| } |
| } |
| |
| @Override |
| protected float computeEvalCost() { |
| if (!hasChildCosts()) return UNKNOWN_COST; |
| // BINARY_PREDICATE_COST accounts for the cost of performing the comparison. |
| return getChildCosts() + BINARY_PREDICATE_COST * (children_.size() - 1); |
| } |
| |
| @Override |
| protected void toThrift(TExprNode msg) { |
| // Can't serialize a predicate with a subquery |
| Preconditions.checkState(!contains(Subquery.class)); |
| msg.node_type = TExprNodeType.FUNCTION_CALL; |
| } |
| |
| @Override |
| public String toSqlImpl(ToSqlOptions options) { |
| StringBuilder strBuilder = new StringBuilder(); |
| String notStr = (isNotIn_) ? "NOT " : ""; |
| strBuilder.append(getChild(0).toSql(options) + " " + notStr + "IN "); |
| boolean hasSubquery = contains(Subquery.class); |
| if (!hasSubquery) strBuilder.append("("); |
| for (int i = 1; i < children_.size(); ++i) { |
| strBuilder.append(getChild(i).toSql(options)); |
| strBuilder.append((i+1 != children_.size()) ? ", " : ""); |
| } |
| if (!hasSubquery) strBuilder.append(")"); |
| return strBuilder.toString(); |
| } |
| |
| /** |
| * If predicate is of the form "<SlotRef> [NOT] IN", returns the |
| * SlotRef. |
| */ |
| @Override |
| public SlotRef getBoundSlot() { |
| return getChild(0).unwrapSlotRef(true); |
| } |
| |
| /** |
| * Negates an InPredicate. |
| */ |
| @Override |
| public Expr negate() { |
| return new InPredicate(getChild(0), children_.subList(1, children_.size()), |
| !isNotIn_); |
| } |
| |
| @Override |
| public Expr clone() { return new InPredicate(this); } |
| } |