blob: e1954b70e8e2b083977ce17c32f14243f145c80e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXPRS_SCALAR_EXPR_H
#define IMPALA_EXPRS_SCALAR_EXPR_H
#include <memory>
#include <string>
#include <vector>
#include <boost/scoped_ptr.hpp>
#include "common/global-types.h"
#include "common/status.h"
#include "exprs/expr.h"
#include "impala-ir/impala-ir-functions.h"
#include "runtime/types.h"
#include "udf/udf-internal.h" // for CollectionVal
#include "udf/udf.h"
namespace llvm {
class BasicBlock;
class Function;
class Type;
class Value;
};
namespace impala {
using impala_udf::FunctionContext;
using impala_udf::AnyVal;
using impala_udf::BooleanVal;
using impala_udf::TinyIntVal;
using impala_udf::SmallIntVal;
using impala_udf::IntVal;
using impala_udf::BigIntVal;
using impala_udf::FloatVal;
using impala_udf::DoubleVal;
using impala_udf::TimestampVal;
using impala_udf::StringVal;
using impala_udf::DecimalVal;
using impala_udf::CollectionVal;
class LibCacheEntry;
class LlvmCodeGen;
class MemTracker;
class ObjectPool;
class RowDescriptor;
class RuntimeState;
class ScalarExprEvaluator;
class SlotDescriptor;
class TColumnValue;
class TExpr;
class TExprNode;
class Tuple;
class TupleRow;
/// --- ScalarExpr overview
///
/// ScalarExpr is an expression which returns a value for each input tuple row.
/// Examples include built-in functions such as abs(), UDF, case-expr and literal
/// such as a string "foobar". It's a subclass of Expr which represents an expression
/// as a tree.
///
/// --- Implementation:
///
/// ScalarExpr implements compute function, which given a row, performs the computation
/// of an expr and produces scalar result. This function evaluates the necessary child
/// arguments by calling their compute functions, then performs whatever computation is
/// necessary on the arguments to generate the result. These compute functions have
/// signature Get*Val(ScalarExprEvaluator*, const TupleRow*). One is implemented for each
/// possible return type it supports (e.g. GetBooleanVal(), GetStringVal(), etc). The
/// return type is a subclass of AnyVal (e.g. StringVal). One or more of these compute
/// functions must be overridden by subclasses of ScalarExpr.
///
/// ScalarExpr contains query compile-time information about an expression (e.g.
/// sub-expressions implicitly encoded in the tree structure) and the LLVM IR compute
/// functions. ScalarExprEvaluator is the interface for evaluating a scalar expression
/// against an input TupleRow.
///
/// ScalarExpr's compute functions are codegend to replace calls to the generic compute
/// function of child expressions with the exact compute functions based on the return
/// types of the child expressions known at runtime. Subclasses should override
/// GetCodegendComputeFn() to either generate custom IR compute functions using IRBuilder,
/// which inline calls to child expressions' compute functions, or simply call
/// GetCodegendComputeFnWrapper() to generate a wrapper function to call the interpreted
/// compute function. Note that we do not need a separate GetCodegendComputeFn() for each
/// type.
///
/// TODO: Fix subclasses which call GetCodegendComputeFnWrapper() to not call interpreted
/// functions.
///
class ScalarExpr : public Expr {
public:
/// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr
/// is stored in ObjectPool 'pool' and returned in 'expr' on success. 'row_desc' is the
/// tuple row descriptor of the input tuple row. On failure, 'expr' is set to NULL and
/// the expr tree (if created) will be closed. Error status will be returned too.
static Status Create(const TExpr& texpr, const RowDescriptor& row_desc,
RuntimeState* state, ObjectPool* pool, ScalarExpr** expr) WARN_UNUSED_RESULT;
/// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr
/// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is
/// the tuple row descriptor of the input tuple row. Returns error status on failure.
static Status Create(const TExpr& texpr, const RowDescriptor& row_desc,
RuntimeState* state, ScalarExpr** expr) WARN_UNUSED_RESULT;
/// Convenience functions creating multiple ScalarExpr.
static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& row_desc,
RuntimeState* state, ObjectPool* pool, std::vector<ScalarExpr*>* exprs)
WARN_UNUSED_RESULT;
/// Convenience functions creating multiple ScalarExpr.
static Status Create(const std::vector<TExpr>& texprs, const RowDescriptor& row_desc,
RuntimeState* state, std::vector<ScalarExpr*>* exprs) WARN_UNUSED_RESULT;
/// Returns true if this expression is a SlotRef. Overridden by SlotRef.
virtual bool IsSlotRef() const { return false; }
/// Returns true if this is a literal expression. Overridden by Literal.
virtual bool IsLiteral() const { return false; }
/// Returns true if this expr uses a FunctionContext to track its runtime state.
/// Overridden by exprs which use FunctionContext.
virtual bool HasFnCtx() const { return false; }
/// Returns true if this expr should be treated as a constant expression.
bool is_constant() const { return is_constant_; }
/// Returns the number of SlotRef nodes in the expr tree. If 'slot_ids' is non-null,
/// add the slot ids to it. Overridden by SlotRef.
virtual int GetSlotIds(std::vector<SlotId>* slot_ids = nullptr) const;
/// Returns an llvm::Function* with signature:
/// <subclass of AnyVal> ComputeFn(ScalarExprEvaluator*, const TupleRow*)
//
/// The function should evaluate this expr over 'row' and return the result as the
/// appropriate type of AnyVal. Returns error status on failure.
virtual Status GetCodegendComputeFn(
LlvmCodeGen* codegen, llvm::Function** fn) WARN_UNUSED_RESULT = 0;
/// Simple debug string that provides no expr subclass-specific information
virtual std::string DebugString() const;
static std::string DebugString(const std::vector<ScalarExpr*>& exprs);
std::string DebugString(const std::string& expr_name) const;
/// Computes a memory efficient layout for storing the results of evaluating 'exprs'.
/// The results are assumed to be void* slot types (vs AnyVal types). Varlen data is
/// not included (e.g. there will be space for a StringValue, but not the data
/// referenced by it).
///
/// Returns the number of bytes necessary to store all the results and offsets
/// where the result for each expr should be stored.
///
/// Variable length types are guaranteed to be at the end and 'var_result_begin'
/// will be set the beginning byte offset where variable length results begin.
/// 'var_result_begin' will be set to -1 if there are no variable len types.
static int ComputeResultsLayout(const vector<ScalarExpr*>& exprs, vector<int>* offsets,
int* var_result_begin);
/// Releases cache entries to libCache for all nodes in the ScalarExpr tree.
virtual void Close();
/// Convenience functions for closing a list of ScalarExpr.
static void Close(const std::vector<ScalarExpr*>& exprs);
static const char* LLVM_CLASS_NAME;
protected:
friend class Expr;
friend class AggFn;
friend class AggFnEvaluator;
friend class AndPredicate;
friend class CaseExpr;
friend class CoalesceExpr;
friend class ConditionalFunctions;
friend class CompoundPredicate;
friend class DecimalFunctions;
friend class DecimalOperators;
friend class HiveUdfCall;
friend class IfExpr;
friend class InPredicate;
friend class IsNotEmptyPredicate;
friend class IsNullExpr;
friend class KuduPartitionExpr;
friend class Literal;
friend class NullLiteral;
friend class OrPredicate;
friend class Predicate;
friend class ScalarExprEvaluator;
friend class ScalarFnCall;
/// For BE tests
friend class ExprTest;
friend class ExprCodegenTest;
friend class HashTableTest;
friend class OldHashTableTest;
/// Cached LLVM IR for the compute function. Set this in GetCodegendComputeFn().
llvm::Function* ir_compute_fn_ = nullptr;
/// Assigns indices into the FunctionContext vector 'fn_ctxs_' in an evaluator to
/// nodes which need FunctionContext in the tree. 'next_fn_ctx_idx' is the index
/// of the next available entry in the vector. It's updated as this function is
/// called recursively down the tree.
void AssignFnCtxIdx(int* next_fn_ctx_idx);
int fn_ctx_idx() const { return fn_ctx_idx_; }
/// Creates a single ScalarExpr node based on 'texpr_node' and returns it
/// in 'expr'. Return error status on failure.
static Status CreateNode(const TExprNode& texpr_node, ObjectPool* pool,
ScalarExpr** expr) WARN_UNUSED_RESULT;
ScalarExpr(const ColumnType& type, bool is_constant);
ScalarExpr(const TExprNode& node);
/// Virtual compute functions for each return type. Each subclass should override
/// the functions for the return type(s) it supports. For example, a boolean function
/// will only override GetBooleanVal(). Some Exprs, like Literal, have many possible
/// return types and will override multiple Get*Val() functions. These functions should
/// be called by other ScalarExpr and ScalarExprEvaluator only.
virtual BooleanVal GetBooleanVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual TinyIntVal GetTinyIntVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual SmallIntVal GetSmallIntVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual IntVal GetIntVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual BigIntVal GetBigIntVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual FloatVal GetFloatVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual DoubleVal GetDoubleVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual StringVal GetStringVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual CollectionVal GetCollectionVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual TimestampVal GetTimestampVal(ScalarExprEvaluator*, const TupleRow*) const;
virtual DecimalVal GetDecimalVal(ScalarExprEvaluator*, const TupleRow*) const;
/// Initializes all nodes in the expr tree. Subclasses overriding this function should
/// call ScalarExpr::Init() to recursively call Init() on the expr tree.
virtual Status Init(const RowDescriptor& row_desc, RuntimeState* state)
WARN_UNUSED_RESULT;
/// Initializes 'eval' for execution. If scope if FRAGMENT_LOCAL, both
/// fragment-local and thread-local states should be initialized. If scope is
/// THREAD_LOCAL, only thread-local states should be initialized. THREAD_LOCAL
/// scope is used for cloned evaluator.
///
/// Subclasses overriding this function should call ScalarExpr::OpenEvaluator() to
/// recursively call OpenEvaluator() on all nodes in the ScalarExpr tree.
virtual Status OpenEvaluator(FunctionContext::FunctionStateScope scope,
RuntimeState* state, ScalarExprEvaluator* eval) const
WARN_UNUSED_RESULT;
/// Free resources held by the 'eval' allocated during OpenEvaluator().
/// If scope is FRAGMENT_LOCAL, both fragment-local and thread-local states should be
/// torn down. If scope is THREAD_LOCAL, only thread-local state should be torn down.
///
/// Subclasses overriding this function should call ScalarExpr::CloseEvaluator() to
/// recursively call CloseEvaluator() on all nodes in the ScalarExpr tree.
virtual void CloseEvaluator(FunctionContext::FunctionStateScope scope,
RuntimeState* state, ScalarExprEvaluator* eval) const;
/// Computes the size of the varargs buffer in bytes (0 bytes if no varargs).
/// Overridden by ScalarFnCall.
virtual int ComputeVarArgsBufferSize() const { return 0; }
/// Helper function to create an empty llvm::Function* with the signature:
/// *Val name(ScalarExprEvaluator*, TupleRow*);
///
/// 'name' is the name of the returned llvm::Function*. The arguments to the IR function
/// are returned in 'args'. The return type is determined by the return type of the expr
/// tree.
llvm::Function* CreateIrFunctionPrototype(const std::string& name, LlvmCodeGen* codegen,
llvm::Value* (*args)[2]);
/// Generates an IR compute function that calls the interpreted compute function.
/// It doesn't provide any performance benefit over the interpreted path. This is
/// useful for builtins (e.g. && and || operators) and UDF which don't generate
/// custom IR code but are part of a larger expr tree. The IR compute function of
/// the larger expr tree may still benefit from custom IR and inlining of other
/// sub-expressions.
///
/// TODO: this should be removed in the long run and replaced with cross-compilation
/// together with constant propagation and loop unrolling.
Status GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, llvm::Function** fn)
WARN_UNUSED_RESULT;
/// Helper function for GetCodegendComputeFnWrapper(). Returns the cross-compiled IR
/// function of the static Get*Val wrapper function for return type 'type'.
llvm::Function* GetStaticGetValWrapper(ColumnType type, LlvmCodeGen* codegen);
private:
/// 'fn_ctx_idx_' is the index into the FunctionContext vector in ScalarExprEvaluator
/// for storing FunctionContext needed to evaluate this ScalarExprNode. It's -1 if this
/// ScalarExpr doesn't need a FunctionContext. The FunctionContext is managed by the
/// evaluator and initialized by calling ScalarExpr::OpenEvaluator().
int fn_ctx_idx_ = -1;
/// [fn_ctx_idx_start_, fn_ctx_idx_end_) defines the range in FunctionContext vector
/// in ScalarExpeEvaluator for the expression subtree rooted at this ScalarExpr node.
int fn_ctx_idx_start_ = 0;
int fn_ctx_idx_end_ = 0;
/// True if this expr should be treated as a constant expression. True if either:
/// * This expr was sent from the frontend and Expr.isConstant() was true.
/// * This expr is a constant literal created in the backend.
const bool is_constant_;
/// Static wrappers which call the compute function of the given ScalarExpr, passing
/// it the ScalarExprEvaluator and TupleRow. These are cross-compiled and called by
/// the IR wrapper functions generated by GetCodegendComputeFnWrapper().
static BooleanVal GetBooleanVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static TinyIntVal GetTinyIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static SmallIntVal GetSmallIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static IntVal GetIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static BigIntVal GetBigIntVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static FloatVal GetFloatVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static DoubleVal GetDoubleVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static StringVal GetStringVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static TimestampVal GetTimestampVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
static DecimalVal GetDecimalVal(ScalarExpr*, ScalarExprEvaluator*, const TupleRow*);
};
}
#endif