blob: cee3ad9f1fa6688ec81a348593e057ce239d93a6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H
#define IMPALA_EXPRS_SCALAR_EXPR_EVALUATOR_H
#include <boost/scoped_ptr.hpp>
#include "common/object-pool.h"
#include "common/status.h"
#include "exprs/expr-value.h"
#include "udf/udf-internal.h" // for CollectionVal
#include "udf/udf.h"
namespace impala {
using impala_udf::FunctionContext;
using impala_udf::AnyVal;
using impala_udf::BooleanVal;
using impala_udf::TinyIntVal;
using impala_udf::SmallIntVal;
using impala_udf::IntVal;
using impala_udf::BigIntVal;
using impala_udf::FloatVal;
using impala_udf::DoubleVal;
using impala_udf::TimestampVal;
using impala_udf::StringVal;
using impala_udf::DecimalVal;
using impala_udf::CollectionVal;
using impala_udf::DateVal;
class MemPool;
class RuntimeState;
class ScalarExpr;
class Status;
class TupleRow;
/// ScalarExprEvaluator is the interface for evaluating a scalar expression. It holds a
/// reference to the root of a ScalarExpr tree, runtime state (e.g. FunctionContexts)
/// needed during evaluation and also a buffer for the expression evaluation result.
/// A single evaluator is not thread-safe. It implements Get*Val() interfaces for every
/// possible return type and drives the execution by calling the Get*Val() function of
/// the root ScalarExpr with the input tuple row.
///
/// A ScalarExprEvaluator is created using the Create() interface. It must be initialized
/// by calling Open() before use and Close() must also be called to free up resources
/// owned by the evaluator.
///
/// FunctionContext is the interface for Impala to communicate with built-in functions,
/// UDF and UDAF. It is passed to UDF/UDAF to store its thread-private states, propagate
/// errors and allocate memory. An evaluator contains a vector of FunctionContext for
/// the ScalarExpr nodes in the Expr tree. The index of each node's entry is defined in
/// the its 'fn_ctx_idx_' field. The range in the vector for the sub-expression tree
/// rooted at a node is defined by [fn_ctx_idx_start_, fn_ctx_idx_end_).
///
class ScalarExprEvaluator {
public:
~ScalarExprEvaluator();
/// Creates an evaluator for the scalar expression tree rooted at 'expr' and all
/// FunctionContexts needed during evaluation.
///
/// Permanent allocations (i.e. those that must live until the evaluator is closed) come
/// from 'expr_perm_pool'. Allocations that may contain expr results (i.e. the
/// results of GetValue(), GetStringVal(), etc) come from 'expr_results_pool'. Lifetime
/// of memory in 'expr_results_pool' is managed by the owner of the pool and may freed
/// by the owner at any time except when the evaluator is in the middle of evaluating
/// the expression. These pools can be shared between evaluators (so long as the
/// required memory lifetimes are compatible) but cannot be shared between threads
/// since MemPools are not thread-safe.
///
/// Note that the caller is responsible to call Close() on all evaluators even if this
/// function returns error status on initialization failure.
static Status Create(const ScalarExpr& expr, RuntimeState* state, ObjectPool* pool,
MemPool* expr_perm_pool, MemPool* expr_results_pool,
ScalarExprEvaluator** eval) WARN_UNUSED_RESULT;
/// Convenience function for creating multiple ScalarExprEvaluators. The evaluators
/// are returned in 'evals'.
static Status Create(const std::vector<ScalarExpr*>& exprs, RuntimeState* state,
ObjectPool* pool, MemPool* expr_perm_pool, MemPool* expr_results_pool,
std::vector<ScalarExprEvaluator*>* evals) WARN_UNUSED_RESULT;
/// Initializes the ScalarExprEvaluator on all nodes in the ScalarExpr tree. This is
/// also the location in which constant arguments to functions are computed. Does not
/// need to be called on clones. Idempotent (this allows exprs to be opened multiple
/// times in subplans without reinitializing function states).
Status Open(RuntimeState* state) WARN_UNUSED_RESULT;
/// Convenience function for opening multiple ScalarExprEvaluators.
static Status Open(const std::vector<ScalarExprEvaluator*>& evals,
RuntimeState* state) WARN_UNUSED_RESULT;
/// Free resources held by this evaluator. Must be called on every ScalarExprEvaluator,
/// including clones. Has no effect if already closed.
void Close(RuntimeState* state);
/// Convenience function for closing multiple ScalarExprEvaluators.
static void Close(const std::vector<ScalarExprEvaluator*>& evals,
RuntimeState* state);
/// Creates a copy of this ScalarExprEvaluator. Open() must be called first. The copy
/// contains clones of each FunctionContext, which share the fragment-local state of the
/// original one but have their own memory and thread-local state. This should be used
/// to create an ScalarExprEvaluator for each execution thread that needs to evaluate
/// 'root_'. 'expr_perm_pool' and 'expr_results_pool' are used for allocations so callers
/// must use different MemPools for evaluators in different threads. Note that clones
/// are considered opened. The cloned ScalarExprEvaluator cannot be used after the
/// original ScalarExprEvaluator is destroyed because it may reference fragment-local
/// state from the original.
/// TODO: IMPALA-4743: Evaluate input arguments in ScalarExpr::Init() and store them
/// in ScalarExpr.
Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* expr_perm_pool,
MemPool* expr_results_pool, ScalarExprEvaluator** new_eval) const WARN_UNUSED_RESULT;
/// Convenience functions for cloning multiple ScalarExprEvaluators. The newly
/// created evaluators are appended to 'new_evals.
static Status Clone(ObjectPool* pool, RuntimeState* state, MemPool* expr_perm_pool,
MemPool* expr_results_pool, const std::vector<ScalarExprEvaluator*>& evals,
std::vector<ScalarExprEvaluator*>* new_evals) WARN_UNUSED_RESULT;
/// If 'expr' is constant, evaluates it with no input row argument and returns the
/// result in 'const_val'. Sets 'const_val' to NULL if the argument is not constant.
/// The returned AnyVal and associated varlen data is owned by this evaluator. This
/// should only be called after Open() has been called on this expr. Returns an error
/// if there was an error evaluating the expression or if memory could not be allocated
/// for the expression result.
Status GetConstValue(
RuntimeState* state, const ScalarExpr& expr, AnyVal** const_val) WARN_UNUSED_RESULT;
/// Calls the appropriate Get*Val() function on 'e' and stores the result in result_.
/// This is used by ScalarExpr to call GetValue() on sub-expression, rather than root_.
void* GetValue(const ScalarExpr& e, const TupleRow* row);
/// Calls the appropriate Get*Val() function on this evaluator's root_ expr tree, stores
/// the result in 'result_' and returns a pointer to it.
void* GetValue(const TupleRow* row);
/// Evaluates the expression of this evaluator on tuple row 'row' and returns
/// the results. One function for each data type implemented.
BooleanVal GetBooleanVal(const TupleRow* row);
TinyIntVal GetTinyIntVal(const TupleRow* row);
SmallIntVal GetSmallIntVal(const TupleRow* row);
IntVal GetIntVal(const TupleRow* row);
BigIntVal GetBigIntVal(const TupleRow* row);
FloatVal GetFloatVal(const TupleRow* row);
DoubleVal GetDoubleVal(const TupleRow* row);
StringVal GetStringVal(const TupleRow* row);
CollectionVal GetCollectionVal(const TupleRow* row);
TimestampVal GetTimestampVal(const TupleRow* row);
DecimalVal GetDecimalVal(const TupleRow* row);
DateVal GetDateVal(const TupleRow* row);
/// Returns an error status if there was any error in evaluating the expression
/// or its sub-expressions. 'start_idx' and 'end_idx' correspond to the range
/// within the vector of FunctionContext for the sub-expressions of interest.
/// The default parameters correspond to the entire expr 'root_'.
Status GetError(int start_idx = 0, int end_idx = -1) const WARN_UNUSED_RESULT;
/// Convenience functions: print value into 'str' or 'stream'. NULL turns into "NULL".
/// The first two variants will evaluate the tuple row against 'root_'.
void PrintValue(const TupleRow* row, std::string* str);
void PrintValue(const TupleRow* row, std::stringstream* stream);
void PrintValue(void* value, std::string* str);
void PrintValue(void* value, std::stringstream* stream);
/// Get the number of digits after the decimal that should be displayed for this value.
/// Returns -1 if no scale has been specified (currently the scale is only set for
/// doubles set by RoundUpTo). GetValue() must have already been called.
/// TODO: remove this (IMPALA-4720).
int output_scale() const { return output_scale_; }
const ScalarExpr& root() const { return root_; }
bool opened() const { return opened_; }
bool closed() const { return closed_; }
bool is_clone() const { return is_clone_; }
MemPool* expr_perm_pool() const { return expr_perm_pool_; }
/// The builtin functions are not called from anywhere in the code and the
/// symbols are therefore not included in the binary. We call these functions
/// by using dlsym. The compiler must think this function is callable to
/// not strip these symbols.
static void InitBuiltinsDummy();
static const char* LLVM_CLASS_NAME;
protected:
/// Users of fn_context();
friend class CaseExpr;
friend class CastFormatExpr;
friend class HiveUdfCall;
friend class ScalarFnCall;
/// Retrieves a registered FunctionContext. 'i' is the 'fn_context_index_' of the
/// corresponding sub-expression in the Expr tree.
FunctionContext* fn_context(int i) {
DCHECK_GE(i, 0);
DCHECK_LT(i, fn_ctxs_.size());
return fn_ctxs_[i];
}
private:
friend class ScalarExpr;
/// FunctionContexts for nodes in this Expr tree. Created by this ScalarExprEvaluator
/// and live in the same object pool as this evaluator (i.e. same life span as the
/// evaluator).
std::vector<FunctionContext*> fn_ctxs_;
/// Array access to fn_ctxs_. Used by ScalarFnCall's codegend compute function
/// to access the correct FunctionContext.
FunctionContext** fn_ctxs_ptr_ = nullptr;
/// Pointer to the MemPool which all permanent allocations (including those from
/// 'fn_ctxs_') come from. Owned by the exec node or data sink which owns this
/// evaluator.
MemPool* const expr_perm_pool_;
/// The expr tree which this evaluator is for.
const ScalarExpr& root_;
/// Stores the result of evaluation for this expr tree (or any sub-expression).
/// This is used in interpreted path when we need to return a void*.
ExprValue result_;
/// True if this evaluator came from a Clone() call. Used to manage FunctionStateScope.
bool is_clone_ = false;
/// Variables keeping track of current state.
bool initialized_ = false;
bool opened_ = false;
bool closed_ = false;
/// The number of digits after the decimal that should be displayed for this value.
/// -1 if no scale has been specified (currently the scale is only set for doubles
/// set by RoundUpTo). This value relies on FunctionContext to be allocated first
/// before it's derived so it lives in the evaluator instead of Expr.
/// TODO: move this to Expr initialization after IMPALA-4743 is fixed.
int output_scale_ = -1;
ScalarExprEvaluator(const ScalarExpr& root, MemPool* expr_perm_pool,
MemPool* expr_results_pool);
/// Walks the expression tree 'expr' and fills in 'fn_ctxs_' for all Expr nodes
/// which need FunctionContext.
void CreateFnCtxs(RuntimeState* state, const ScalarExpr& expr, MemPool* expr_perm_pool,
MemPool* expr_results_pool);
};
}
#endif