blob: c99acae86c554903f5ade2170aaed14c93b3e770 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXPRS_IN_PREDICATE_H_
#define IMPALA_EXPRS_IN_PREDICATE_H_
#include <string>
#include <boost/container/flat_set.hpp>
#include <boost/unordered_set.hpp>
#include "exprs/predicate.h"
#include "exprs/anyval-util.h"
#include "runtime/decimal-value.inline.h"
#include "runtime/string-value.inline.h"
#include "udf/udf.h"
namespace impala {
/// Utilities for evaluating expressions of the form "val [NOT] IN (x1, x2, x3...)".
/// In predicates are implemented using ScalarFnCall and the UDF interface.
///
/// There are two strategies for evaluating the IN predicate:
//
/// 1) SET_LOOKUP: This strategy is for when all the values in the IN list are constant.
/// In the prepare function, we create a set of the constant values from the IN list,
/// and use this set to lookup a given 'val'.
//
/// 2) ITERATE: This is the fallback strategy for when there are non-constant IN list
/// values, or very few values in the IN list. We simply iterate through every
/// expression and compare it to val. This strategy has no prepare function.
//
/// The FE chooses which strategy we should use by choosing the appropriate function
/// (e.g., InIterate() or InSetLookup()). If it chooses SET_LOOKUP, it also sets the
/// appropriate SetLookupPrepare and SetLookupClose functions.
class InPredicate {
public:
/// Functions for every type
static impala_udf::BooleanVal InIterate(impala_udf::FunctionContext* context,
const impala_udf::BooleanVal& val, int num_args,
const impala_udf::BooleanVal* args);
static impala_udf::BooleanVal NotInIterate(impala_udf::FunctionContext* context,
const impala_udf::BooleanVal& val, int num_args,
const impala_udf::BooleanVal* args);
static void SetLookupPrepare_boolean(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_boolean(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::BooleanVal& val,
int num_args, const impala_udf::BooleanVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::BooleanVal& val,
int num_args, const impala_udf::BooleanVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::TinyIntVal& val,
int num_args, const impala_udf::TinyIntVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::TinyIntVal& val,
int num_args, const impala_udf::TinyIntVal* args);
static void SetLookupPrepare_tinyint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_tinyint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::TinyIntVal& val,
int num_args, const impala_udf::TinyIntVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::TinyIntVal& val,
int num_args, const impala_udf::TinyIntVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::SmallIntVal& val,
int num_args, const impala_udf::SmallIntVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::SmallIntVal& val,
int num_args, const impala_udf::SmallIntVal* args);
static void SetLookupPrepare_smallint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_smallint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::SmallIntVal& val,
int num_args, const impala_udf::SmallIntVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::SmallIntVal& val,
int num_args, const impala_udf::SmallIntVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::IntVal& val,
int num_args, const impala_udf::IntVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::IntVal& val,
int num_args, const impala_udf::IntVal* args);
static void SetLookupPrepare_int(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_int(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::IntVal& val,
int num_args, const impala_udf::IntVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::IntVal& val,
int num_args, const impala_udf::IntVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::DateVal& val,
int num_args, const impala_udf::DateVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::DateVal& val,
int num_args, const impala_udf::DateVal* args);
static void SetLookupPrepare_date(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_date(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DateVal& val,
int num_args, const impala_udf::DateVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DateVal& val,
int num_args, const impala_udf::DateVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::BigIntVal& val,
int num_args, const impala_udf::BigIntVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::BigIntVal& val,
int num_args, const impala_udf::BigIntVal* args);
static void SetLookupPrepare_bigint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_bigint(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::BigIntVal& val,
int num_args, const impala_udf::BigIntVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::BigIntVal& val,
int num_args, const impala_udf::BigIntVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::FloatVal& val,
int num_args, const impala_udf::FloatVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::FloatVal& val,
int num_args, const impala_udf::FloatVal* args);
static void SetLookupPrepare_float(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_float(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::FloatVal& val,
int num_args, const impala_udf::FloatVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::FloatVal& val,
int num_args, const impala_udf::FloatVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::DoubleVal& val,
int num_args, const impala_udf::DoubleVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::DoubleVal& val,
int num_args, const impala_udf::DoubleVal* args);
static void SetLookupPrepare_double(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_double(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DoubleVal& val,
int num_args, const impala_udf::DoubleVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DoubleVal& val,
int num_args, const impala_udf::DoubleVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::StringVal& val,
int num_args, const impala_udf::StringVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::StringVal& val,
int num_args, const impala_udf::StringVal* args);
static void SetLookupPrepare_string(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_string(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::StringVal& val,
int num_args, const impala_udf::StringVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::StringVal& val,
int num_args, const impala_udf::StringVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::TimestampVal& val,
int num_args, const impala_udf::TimestampVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::TimestampVal& val,
int num_args, const impala_udf::TimestampVal* args);
static void SetLookupPrepare_timestamp(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_timestamp(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::TimestampVal& val,
int num_args, const impala_udf::TimestampVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::TimestampVal& val,
int num_args, const impala_udf::TimestampVal* args);
static impala_udf::BooleanVal InIterate(
impala_udf::FunctionContext* context, const impala_udf::DecimalVal& val,
int num_args, const impala_udf::DecimalVal* args);
static impala_udf::BooleanVal NotInIterate(
impala_udf::FunctionContext* context, const impala_udf::DecimalVal& val,
int num_args, const impala_udf::DecimalVal* args);
static void SetLookupPrepare_decimal(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static void SetLookupClose_decimal(impala_udf::FunctionContext* ctx,
impala_udf::FunctionContext::FunctionStateScope scope);
static impala_udf::BooleanVal InSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DecimalVal& val,
int num_args, const impala_udf::DecimalVal* args);
static impala_udf::BooleanVal NotInSetLookup(
impala_udf::FunctionContext* context, const impala_udf::DecimalVal& val,
int num_args, const impala_udf::DecimalVal* args);
private:
friend class InPredicateBenchmark;
enum Strategy {
/// Indicates we should use SetLookUp().
SET_LOOKUP,
/// Indicates we should use Iterate().
ITERATE
};
template<typename SetType>
struct SetLookupState {
/// If true, there is at least one NULL constant in the IN list.
bool contains_null;
/// The set of all non-NULL constant values in the IN list.
boost::unordered_set<SetType> val_set;
/// The type of the arguments
const FunctionContext::TypeDesc* type;
};
/// The templated function that provides the implementation for all the In() and NotIn()
/// functions.
template<typename T, typename SetType, bool not_in, Strategy strategy>
static inline impala_udf::BooleanVal TemplatedIn(
impala_udf::FunctionContext* ctx, const T& val, int num_args, const T* args);
/// Initializes an SetLookupState in ctx.
template<typename T, typename SetType>
static void SetLookupPrepare(
FunctionContext* ctx, FunctionContext::FunctionStateScope scope);
template<typename SetType>
static void SetLookupClose(
FunctionContext* ctx, FunctionContext::FunctionStateScope scope);
/// Looks up v in state->val_set.
template<typename T, typename SetType>
static BooleanVal SetLookup(SetLookupState<SetType>* state, const T& v);
/// Iterates through each vararg looking for val. 'type' is the type of 'val' and
/// 'args'.
template <typename T>
static BooleanVal Iterate(
const FunctionContext::TypeDesc* type, const T& val, int num_args, const T* args);
// Templated getter functions for extracting 'SetType' values from AnyVals
template <typename T, typename SetType>
static inline SetType GetVal(const FunctionContext::TypeDesc* type, const T& x);
};
template <typename T, typename SetType, bool not_in, InPredicate::Strategy strategy>
inline impala_udf::BooleanVal InPredicate::TemplatedIn(
impala_udf::FunctionContext* ctx, const T& val, int num_args, const T* args) {
if (val.is_null) return BooleanVal::null();
BooleanVal found;
if (strategy == SET_LOOKUP) {
SetLookupState<SetType>* state = reinterpret_cast<SetLookupState<SetType>*>(
ctx->GetFunctionState(FunctionContext::FRAGMENT_LOCAL));
DCHECK(state != NULL);
found = SetLookup(state, val);
} else {
DCHECK_EQ(strategy, ITERATE);
found = Iterate(ctx->GetArgType(0), val, num_args, args);
}
if (found.is_null) return BooleanVal::null();
return BooleanVal(found.val ^ not_in);
}
template <typename T, typename SetType>
void InPredicate::SetLookupPrepare(
FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::FRAGMENT_LOCAL) return;
SetLookupState<SetType>* state = new SetLookupState<SetType>;
state->type = ctx->GetArgType(0);
state->contains_null = false;
// Collect all values in a vector to use the bulk insert API to avoid N^2 behavior
// with flat_set.
std::vector<SetType> element_list;
element_list.reserve(ctx->GetNumArgs() - 1);
for (int i = 1; i < ctx->GetNumArgs(); ++i) {
DCHECK(ctx->IsArgConstant(i));
T* arg = reinterpret_cast<T*>(ctx->GetConstantArg(i));
if (arg->is_null) {
state->contains_null = true;
} else {
element_list.push_back(GetVal<T, SetType>(state->type, *arg));
}
}
state->val_set.insert(element_list.begin(), element_list.end());
ctx->SetFunctionState(scope, state);
}
template <typename SetType>
void InPredicate::SetLookupClose(
FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::FRAGMENT_LOCAL) return;
SetLookupState<SetType>* state =
reinterpret_cast<SetLookupState<SetType>*>(ctx->GetFunctionState(scope));
delete state;
ctx->SetFunctionState(scope, nullptr);
}
template <typename T, typename SetType>
BooleanVal InPredicate::SetLookup(SetLookupState<SetType>* state, const T& v) {
DCHECK(state != NULL);
SetType val = GetVal<T, SetType>(state->type, v);
bool found = state->val_set.find(val) != state->val_set.end();
if (found) return BooleanVal(true);
if (state->contains_null) return BooleanVal::null();
return BooleanVal(false);
}
template <typename T>
BooleanVal InPredicate::Iterate(
const FunctionContext::TypeDesc* type, const T& val, int num_args, const T* args) {
bool found_null = false;
for (int i = 0; i < num_args; ++i) {
if (args[i].is_null) {
found_null = true;
} else if (AnyValUtil::Equals(*type, val, args[i])) {
return BooleanVal(true);
}
}
if (found_null) return BooleanVal::null();
return BooleanVal(false);
}
template <typename T, typename SetType>
inline SetType InPredicate::GetVal(const FunctionContext::TypeDesc* type, const T& x) {
DCHECK(!x.is_null);
return x.val;
}
template <>
inline StringValue InPredicate::GetVal(
const FunctionContext::TypeDesc* type, const StringVal& x) {
DCHECK(!x.is_null);
return StringValue::FromStringVal(x);
}
template <>
inline TimestampValue InPredicate::GetVal(
const FunctionContext::TypeDesc* type, const TimestampVal& x) {
return TimestampValue::FromTimestampVal(x);
}
template <>
inline Decimal16Value InPredicate::GetVal(
const FunctionContext::TypeDesc* type, const DecimalVal& x) {
if (type->precision <= ColumnType::MAX_DECIMAL4_PRECISION) {
return Decimal16Value(x.val4);
} else if (type->precision <= ColumnType::MAX_DECIMAL8_PRECISION) {
return Decimal16Value(x.val8);
} else {
return Decimal16Value(x.val16);
}
}
// IMPALA-6621: Due to an implementation detail in boost, these 3 types are slower when
// using unordered_map as compared to flat_set.
template <>
struct InPredicate::SetLookupState<int32_t> {
bool contains_null;
boost::container::flat_set<int32_t> val_set;
const FunctionContext::TypeDesc* type;
};
template <>
struct InPredicate::SetLookupState<int64_t> {
bool contains_null;
boost::container::flat_set<int64_t> val_set;
const FunctionContext::TypeDesc* type;
};
template <>
struct InPredicate::SetLookupState<float> {
bool contains_null;
boost::container::flat_set<float> val_set;
const FunctionContext::TypeDesc* type;
};
}
#endif