blob: 41f856216f69011903a3511cfaf82706f67a6baa [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_UDF_UDF_INTERNAL_H
#define IMPALA_UDF_UDF_INTERNAL_H
#include <string.h>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include <boost/cstdint.hpp>
/// Be very careful when adding Impala includes in this file. We don't want to pull
/// in unnecessary dependencies for the development libs.
#include "udf/udf.h"
namespace impala {
#define RETURN_IF_NULL(ctx, ptr) \
do { \
if (UNLIKELY(ptr == NULL)) { \
DCHECK(!ctx->impl()->state()->GetQueryStatus().ok()); \
return; \
} \
} while (false)
class FreePool;
class MemPool;
class RuntimeState;
class ScalarExpr;
/// This class actually implements the interface of FunctionContext. This is split to
/// hide the details from the external header.
/// Note: The actual user code does not include this file.
///
/// Exprs (e.g. UDFs and UDAs) require a FunctionContext to store state related to
/// evaluation of the expression. Each FunctionContext is associated with a backend Expr
/// or AggFnEvaluator, which is derived from a TExprNode generated by the Impala frontend.
/// FunctionContexts are allocated and managed by ScalarExprEvaluator. Exprs shouldn't try
/// to create FunctionContext themselves.
class FunctionContextImpl {
public:
/// Create a FunctionContext for a UDF. Caller is responsible for deleting it.
/// UDF-managed allocations (i.e. Allocate()) are backed by 'perm_pool' and
/// allocations that may hold expr results (i.e. AllocateForResults()) are backed
/// by 'results_pool'.
static impala_udf::FunctionContext* CreateContext(RuntimeState* state,
MemPool* perm_pool, MemPool* results_pool,
const impala_udf::FunctionContext::TypeDesc& return_type,
const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types,
int varargs_buffer_size = 0, bool debug = false);
/// Create a FunctionContext for a UDA. Identical to the UDF version except for the
/// intermediate type. Caller is responsible for deleting it.
static impala_udf::FunctionContext* CreateContext(RuntimeState* state,
MemPool* perm_pool, MemPool* results_pool,
const impala_udf::FunctionContext::TypeDesc& intermediate_type,
const impala_udf::FunctionContext::TypeDesc& return_type,
const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types,
int varargs_buffer_size = 0, bool debug = false);
FunctionContextImpl(impala_udf::FunctionContext* parent);
~FunctionContextImpl();
/// Checks for any outstanding memory allocations. If there is (non-result) memory that
/// was allocated by the UDF via this FunctionContext but not freed, adds a warning
/// and frees the allocations.
void Close();
/// Returns a new FunctionContext with the same constant args, fragment-local state, and
/// debug flag as this FunctionContext. The caller is responsible for calling delete on
/// it. The cloned FunctionContext cannot be used after the original FunctionContext is
/// destroyed because it may reference fragment-local state from the original.
impala_udf::FunctionContext* Clone(MemPool* perm_pool, MemPool* results_pool);
/// Allocates a buffer of 'byte_size' to hold expr results. If the new allocation
/// causes the memory limit to be exceeded, the error will be set in this object
/// causing the query to fail.
///
/// These allocations live in the 'results_pool' passed into the constructor.
/// 'results_pool' is managed by the Impala runtime and can be safely cleared
/// whenever memory returned by the expression is no longer referenced.
uint8_t* AllocateForResults(int64_t byte_size) noexcept;
/// Replaces the current 'results_pool_' for 'new_results_pool' to be used for
/// AllocateForResults(). Returns a pointer to the pool that was replaced.
MemPool* SwapResultsPool(MemPool* new_results_pool) {
MemPool* old_results_pool = results_pool_;
results_pool_ = new_results_pool;
return old_results_pool;
}
/// Sets the constant arg list. The vector should contain one entry per argument,
/// with a non-NULL entry if the argument is constant. The AnyVal* values are
/// owned by the caller and must be allocated from the ScalarExprEvaluator's MemPool.
void SetConstantArgs(std::vector<impala_udf::AnyVal*>&& constant_args);
typedef std::vector<std::pair<ScalarExpr*, impala_udf::AnyVal*>> NonConstantArgsVector;
/// Sets the non-constant args. Contains one entry per non-constant argument. All
/// pointers should be non-NULL. The Expr* and AnyVal* values are owned by the caller.
/// The AnyVal* values must be allocated from the ScalarExprEvaluator's MemPool.
void SetNonConstantArgs(NonConstantArgsVector&& non_constant_args);
const std::vector<impala_udf::AnyVal*>& constant_args() const { return constant_args_; }
const NonConstantArgsVector& non_constant_args() const { return non_constant_args_; }
uint8_t* varargs_buffer() { return varargs_buffer_; }
std::vector<impala_udf::AnyVal*>* staging_input_vals() { return &staging_input_vals_; }
bool debug() { return debug_; }
bool closed() { return closed_; }
int64_t num_updates() const { return num_updates_; }
int64_t num_removes() const { return num_removes_; }
void set_num_updates(int64_t n) { num_updates_ = n; }
void set_num_removes(int64_t n) { num_removes_ = n; }
void IncrementNumUpdates(int64_t n = 1) { num_updates_ += n; }
void IncrementNumRemoves(int64_t n = 1) { num_removes_ += n; }
const std::vector<impala_udf::FunctionContext::TypeDesc> arg_types() {
return arg_types_;
}
RuntimeState* state() { return state_; }
/// Various static attributes of the UDF/UDA that can be injected as constants
/// by codegen. Note that the argument types refer to those in the UDF/UDA signature,
/// not the arguments of the C++ functions implementing the UDF/UDA. Any change to
/// this enum must be reflected in FunctionContextImpl::GetConstFnAttr().
enum ConstFnAttr {
/// RETURN_TYPE_*: properties of FunctionContext::GetReturnType()
RETURN_TYPE_SIZE, // int
RETURN_TYPE_PRECISION, // int
RETURN_TYPE_SCALE, // int
/// ARG_TYPE_* with parameter i: properties of FunctionContext::GetArgType(i)
ARG_TYPE_SIZE, // int[]
ARG_TYPE_PRECISION, // int[]
ARG_TYPE_SCALE, // int[]
/// True if decimal_v2 query option is set.
DECIMAL_V2,
};
/// This function returns the various static attributes of the UDF/UDA. Calls to this
/// function are replaced by constants injected by codegen. If codegen is disabled,
/// this function is interpreted as-is.
///
/// 't' is the static function attribute defined in the ConstFnAttr enum above.
/// For function attributes of arguments, 'i' holds the argument number (0 indexed).
/// Please note that argument refers to the arguments in the signature of the UDF or UDA.
/// 'i' must always be an immediate integer value in order to utilize the constant
/// replacement when codegen is enabled. e.g., it cannot be a variable or an expression
/// like "1 + 1".
///
int GetConstFnAttr(ConstFnAttr t, int i = -1);
/// Return the function attribute 't' defined in ConstFnAttr above.
static int GetConstFnAttr(const RuntimeState* state,
const impala_udf::FunctionContext::TypeDesc& return_type,
const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types,
ConstFnAttr t, int i = -1);
/// UDFs may manipulate DecimalVal arguments via SIMD instructions such as 'movaps'
/// that require 16-byte memory alignment.
static const int VARARGS_BUFFER_ALIGNMENT = 16;
/// The LLVM class name for FunctionContext. Used for handcrafted IR.
static const char* LLVM_FUNCTIONCONTEXT_NAME;
/// FunctionContextImpl::GetConstFnAttr() symbol. Used for call sites replacement.
static const char* GET_CONST_FN_ATTR_SYMBOL;
private:
friend class impala_udf::FunctionContext;
friend class ScalarExprEvaluator;
/// A utility function which checks for memory limits and null pointers returned by
/// Allocate(), Reallocate() and AllocateForResults() and sets the appropriate error status
/// if necessary.
///
/// Return false if 'buf' is null; returns true otherwise.
bool CheckAllocResult(const char* fn_name, uint8_t* buf, int64_t byte_size);
/// A utility function which checks for memory limits that may have been exceeded by
/// Allocate(), Reallocate(), AllocateForResults() or TrackAllocation(). Sets the
/// appropriate error status if necessary.
void CheckMemLimit(const char* fn_name, int64_t byte_size);
/// Preallocated buffer for storing varargs (if the function has any). Allocated and
/// owned by this object, but populated by an Expr function. The buffer is interpreted
/// as an array of the appropriate AnyVal subclass.
uint8_t* varargs_buffer_;
int varargs_buffer_size_;
/// Parent context object. Not owned
impala_udf::FunctionContext* context_;
/// Pool used for allocations made via Allocate(). Allocations are explicitly freed and
/// returned to this pool with Free(). The memory allocated in this pool is effectively
/// owned by the UDF.
/// Owned and freed in destructor. Uses raw pointer to avoid pulling headers into SDK.
FreePool* udf_pool_;
/// Pool used for allocations made via AllocateForResults(). Not owned by this
/// FunctionContext. Allocations made from the pool are used temporarily during
/// expression evaluation. Var-len values returned from an expression may reference
/// memory in this pool - the caller is responsible for ensuring that the pool is
/// not cleared while that memory is still referenced.
MemPool* results_pool_;
/// We use the query's runtime state to report errors and warnings. NULL for test
/// contexts.
RuntimeState* state_;
/// If true, indicates this is a debug context which will do additional validation.
bool debug_;
impala_udf::FunctionContext::ImpalaVersion version_;
/// Empty if there's no error
std::string error_msg_;
/// The number of warnings reported.
int64_t num_warnings_;
/// The number of calls to Update()/Remove().
int64_t num_updates_;
int64_t num_removes_;
/// Allocations made and still owned by the user function. Only used if debug_ is true
/// because it is very expensive to maintain.
std::map<uint8_t*, int> allocations_;
/// The function state accessed via FunctionContext::Get/SetFunctionState()
void* thread_local_fn_state_;
void* fragment_local_fn_state_;
/// The number of bytes allocated externally by the user function. In some cases,
/// it is too inconvenient to use the Allocate()/Free() APIs in the FunctionContext,
/// particularly for existing codebases (e.g. they use std::vector). Instead, they'll
/// have to track those allocations manually.
int64_t external_bytes_tracked_;
/// Type descriptor for the intermediate type of a UDA. Set to INVALID_TYPE for UDFs.
impala_udf::FunctionContext::TypeDesc intermediate_type_;
/// Type descriptor for the return type of the function.
impala_udf::FunctionContext::TypeDesc return_type_;
/// Type descriptors for each argument of the function.
std::vector<impala_udf::FunctionContext::TypeDesc> arg_types_;
/// Contains an AnyVal* for each argument of the function. If the AnyVal* is NULL,
/// indicates that the corresponding argument is non-constant. Otherwise contains the
/// value of the argument. The AnyVal* objects and associated data are owned by the
/// ScalarExprEvaluator provided when opening the FRAGMENT_LOCAL expression contexts.
std::vector<impala_udf::AnyVal*> constant_args_;
/// Vector of all non-constant children expressions that need to be evaluated for
/// each input row. The first element of each pair is the child expression and the
/// second element is the value it must be evaluated into.
NonConstantArgsVector non_constant_args_;
/// Used by ScalarFnCall to temporarily store arguments for a UDF when running without
/// codegen. Allows us to pass AnyVal* arguments to the scalar function directly,
/// rather than codegening a call that passes the correct AnyVal subclass pointer type.
/// Note that this is only used for non-variadic arguments; varargs are always stored
/// in varargs_buffer_.
std::vector<impala_udf::AnyVal*> staging_input_vals_;
/// Indicates whether this context has been closed. Used for verification/debugging.
bool closed_;
};
}
namespace impala_udf {
/// Temporary CollectionVal definition, used to represent arrays and maps. This is not
/// ready for public consumption because users must have access to our internal tuple
/// layout.
struct CollectionVal : public AnyVal {
// Put num_tuples before ptr so that 'AnyVal::is_null', 'num_tuples' and 'ptr' can be
// packed into 16 bytes. This matches the memory layout of StringVal, which allows
// sharing of support in CodegenAnyval.
int num_tuples;
uint8_t* ptr;
/// Construct an CollectionVal from ptr/num_tuples. Note: this does not make a copy of
/// ptr so the buffer must exist as long as this CollectionVal does.
CollectionVal(uint8_t* ptr = NULL, int num_tuples = 0)
: num_tuples(num_tuples), ptr(ptr) {}
static CollectionVal null() {
CollectionVal cv;
cv.is_null = true;
return cv;
}
};
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
static_assert(sizeof(CollectionVal) == sizeof(StringVal), "Wrong size.");
static_assert(
offsetof(CollectionVal, num_tuples) == offsetof(StringVal, len), "Wrong offset.");
static_assert(offsetof(CollectionVal, ptr) == offsetof(StringVal, ptr), "Wrong offset.");
} // namespace impala_udf
#endif