blob: c3d256ad9351ecd93b6c0eb715712980f0b0882e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.impala.catalog.Db;
import org.apache.impala.catalog.Function.CompareMode;
import org.apache.impala.catalog.ScalarFunction;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.thrift.TExprNode;
import org.apache.impala.thrift.TExprNodeType;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
public class LikePredicate extends Predicate {
enum Operator {
LIKE("LIKE"),
ILIKE("ILIKE"),
RLIKE("RLIKE"),
REGEXP("REGEXP"),
IREGEXP("IREGEXP");
private final String description_;
private Operator(String description) {
this.description_ = description;
}
@Override
public String toString() {
return description_;
}
}
public static void initBuiltins(Db db) {
db.addBuiltin(ScalarFunction.createBuiltin(
Operator.LIKE.name(), Lists.<Type>newArrayList(Type.STRING, Type.STRING),
false, Type.BOOLEAN, "_ZN6impala13LikePredicate4LikeEPN10impala_udf15FunctionContextERKNS1_9StringValES6_",
"_ZN6impala13LikePredicate11LikePrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE",
"_ZN6impala13LikePredicate9LikeCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE", true));
db.addBuiltin(ScalarFunction.createBuiltin(
Operator.ILIKE.name(), Lists.<Type>newArrayList(Type.STRING, Type.STRING),
false, Type.BOOLEAN, "_ZN6impala13LikePredicate4LikeEPN10impala_udf15FunctionContextERKNS1_9StringValES6_",
"_ZN6impala13LikePredicate12ILikePrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE",
"_ZN6impala13LikePredicate9LikeCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE", true));
db.addBuiltin(ScalarFunction.createBuiltin(
Operator.RLIKE.name(), Lists.<Type>newArrayList(Type.STRING, Type.STRING),
false, Type.BOOLEAN, "_ZN6impala13LikePredicate5RegexEPN10impala_udf15FunctionContextERKNS1_9StringValES6_",
"_ZN6impala13LikePredicate12RegexPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE",
"_ZN6impala13LikePredicate10RegexCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE", true));
db.addBuiltin(ScalarFunction.createBuiltin(
Operator.REGEXP.name(), Lists.<Type>newArrayList(Type.STRING, Type.STRING),
false, Type.BOOLEAN, "_ZN6impala13LikePredicate5RegexEPN10impala_udf15FunctionContextERKNS1_9StringValES6_",
"_ZN6impala13LikePredicate12RegexPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE",
"_ZN6impala13LikePredicate10RegexCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE", true));
db.addBuiltin(ScalarFunction.createBuiltin(
Operator.IREGEXP.name(), Lists.<Type>newArrayList(Type.STRING, Type.STRING),
false, Type.BOOLEAN, "_ZN6impala13LikePredicate5RegexEPN10impala_udf15FunctionContextERKNS1_9StringValES6_",
"_ZN6impala13LikePredicate13IRegexPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE",
"_ZN6impala13LikePredicate10RegexCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE", true));
}
private final Operator op_;
public LikePredicate(Operator op, Expr e1, Expr e2) {
super();
this.op_ = op;
Preconditions.checkNotNull(e1);
children_.add(e1);
Preconditions.checkNotNull(e2);
children_.add(e2);
}
/**
* Copy c'tor used in clone().
*/
public LikePredicate(LikePredicate other) {
super(other);
op_ = other.op_;
}
@Override
public boolean localEquals(Expr that) {
return super.localEquals(that) && ((LikePredicate) that).op_ == op_;
}
@Override
public String toSqlImpl(ToSqlOptions options) {
return getChild(0).toSql(options) + " " + op_.toString() + " "
+ getChild(1).toSql(options);
}
@Override
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.FUNCTION_CALL;
}
@Override
protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
super.analyzeImpl(analyzer);
if (!getChild(0).getType().isStringType() && !getChild(0).getType().isNull()) {
throw new AnalysisException(
"left operand of " + op_.toString() + " must be of type STRING: " + toSql());
}
if (!getChild(1).getType().isStringType() && !getChild(1).getType().isNull()) {
throw new AnalysisException(
"right operand of " + op_.toString() + " must be of type STRING: " + toSql());
}
fn_ = getBuiltinFunction(analyzer, op_.toString(), collectChildReturnTypes(),
CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
Preconditions.checkState(fn_ != null);
Preconditions.checkState(fn_.getReturnType().isBoolean());
if (Expr.IS_NON_NULL_LITERAL.apply(getChild(1))
&& (op_ == Operator.RLIKE || op_ == Operator.REGEXP || op_ == Operator.IREGEXP)) {
// let's make sure the pattern works
// TODO: this checks that it's a Java-supported regex, but the syntax supported
// by the backend is Posix; add a call to the backend to check the re syntax
try {
Pattern.compile(((StringLiteral) getChild(1)).getValueWithOriginalEscapes());
} catch (PatternSyntaxException e) {
throw new AnalysisException(
"invalid regular expression in '" + this.toSql() + "'");
}
}
castForFunctionCall(false, analyzer.isDecimalV2());
}
@Override
protected float computeEvalCost() {
if (!hasChildCosts()) return UNKNOWN_COST;
if (Expr.IS_NON_NULL_LITERAL.apply(getChild(1)) &&
Pattern.matches("[%_]*[^%_]*[%_]*",
((StringLiteral) getChild(1)).getValueWithOriginalEscapes())) {
// This pattern only has wildcards as leading or trailing character,
// so it is linear.
return getChildCosts() +
(float) (getAvgStringLength(getChild(0)) + getAvgStringLength(getChild(1)) *
BINARY_PREDICATE_COST) + LIKE_COST;
} else {
// This pattern is more expensive, so calculate its cost as quadratic.
return getChildCosts() +
(float) (getAvgStringLength(getChild(0)) * getAvgStringLength(getChild(1)) *
BINARY_PREDICATE_COST) + LIKE_COST;
}
}
@Override
public Expr clone() { return new LikePredicate(this); }
public Operator getOp() { return op_; }
}