| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #ifndef IMPALA_UDF_UDF_INTERNAL_H |
| #define IMPALA_UDF_UDF_INTERNAL_H |
| |
| #include <string.h> |
| #include <map> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| #include <boost/cstdint.hpp> |
| |
| /// Be very careful when adding Impala includes in this file. We don't want to pull |
| /// in unnecessary dependencies for the development libs. |
| #include "udf/udf.h" |
| |
| namespace impala { |
| |
| #define RETURN_IF_NULL(ctx, ptr) \ |
| do { \ |
| if (UNLIKELY(ptr == NULL)) { \ |
| DCHECK(!ctx->impl()->state()->GetQueryStatus().ok()); \ |
| return; \ |
| } \ |
| } while (false) |
| |
| class FreePool; |
| class MemPool; |
| class RuntimeState; |
| class ScalarExpr; |
| |
| /// This class actually implements the interface of FunctionContext. This is split to |
| /// hide the details from the external header. |
| /// Note: The actual user code does not include this file. |
| /// |
| /// Exprs (e.g. UDFs and UDAs) require a FunctionContext to store state related to |
| /// evaluation of the expression. Each FunctionContext is associated with a backend Expr |
| /// or AggFnEvaluator, which is derived from a TExprNode generated by the Impala frontend. |
| /// FunctionContexts are allocated and managed by ScalarExprEvaluator. Exprs shouldn't try |
| /// to create FunctionContext themselves. |
| class FunctionContextImpl { |
| public: |
| /// Create a FunctionContext for a UDF. Caller is responsible for deleting it. |
| /// UDF-managed allocations (i.e. Allocate()) are backed by 'perm_pool' and |
| /// allocations that may hold expr results (i.e. AllocateForResults()) are backed |
| /// by 'results_pool'. |
| static impala_udf::FunctionContext* CreateContext(RuntimeState* state, |
| MemPool* perm_pool, MemPool* results_pool, |
| const impala_udf::FunctionContext::TypeDesc& return_type, |
| const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types, |
| int varargs_buffer_size = 0, bool debug = false); |
| |
| /// Create a FunctionContext for a UDA. Identical to the UDF version except for the |
| /// intermediate type. Caller is responsible for deleting it. |
| static impala_udf::FunctionContext* CreateContext(RuntimeState* state, |
| MemPool* perm_pool, MemPool* results_pool, |
| const impala_udf::FunctionContext::TypeDesc& intermediate_type, |
| const impala_udf::FunctionContext::TypeDesc& return_type, |
| const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types, |
| int varargs_buffer_size = 0, bool debug = false); |
| |
| FunctionContextImpl(impala_udf::FunctionContext* parent); |
| ~FunctionContextImpl(); |
| |
| /// Checks for any outstanding memory allocations. If there is (non-result) memory that |
| /// was allocated by the UDF via this FunctionContext but not freed, adds a warning |
| /// and frees the allocations. |
| void Close(); |
| |
| /// Returns a new FunctionContext with the same constant args, fragment-local state, and |
| /// debug flag as this FunctionContext. The caller is responsible for calling delete on |
| /// it. The cloned FunctionContext cannot be used after the original FunctionContext is |
| /// destroyed because it may reference fragment-local state from the original. |
| impala_udf::FunctionContext* Clone(MemPool* perm_pool, MemPool* results_pool); |
| |
| /// Allocates a buffer of 'byte_size' to hold expr results. If the new allocation |
| /// causes the memory limit to be exceeded, the error will be set in this object |
| /// causing the query to fail. |
| /// |
| /// These allocations live in the 'results_pool' passed into the constructor. |
| /// 'results_pool' is managed by the Impala runtime and can be safely cleared |
| /// whenever memory returned by the expression is no longer referenced. |
| uint8_t* AllocateForResults(int64_t byte_size) noexcept; |
| |
| /// Replaces the current 'results_pool_' for 'new_results_pool' to be used for |
| /// AllocateForResults(). Returns a pointer to the pool that was replaced. |
| MemPool* SwapResultsPool(MemPool* new_results_pool) { |
| MemPool* old_results_pool = results_pool_; |
| results_pool_ = new_results_pool; |
| return old_results_pool; |
| } |
| |
| /// Sets the constant arg list. The vector should contain one entry per argument, |
| /// with a non-NULL entry if the argument is constant. The AnyVal* values are |
| /// owned by the caller and must be allocated from the ScalarExprEvaluator's MemPool. |
| void SetConstantArgs(std::vector<impala_udf::AnyVal*>&& constant_args); |
| |
| typedef std::vector<std::pair<ScalarExpr*, impala_udf::AnyVal*>> NonConstantArgsVector; |
| |
| /// Sets the non-constant args. Contains one entry per non-constant argument. All |
| /// pointers should be non-NULL. The Expr* and AnyVal* values are owned by the caller. |
| /// The AnyVal* values must be allocated from the ScalarExprEvaluator's MemPool. |
| void SetNonConstantArgs(NonConstantArgsVector&& non_constant_args); |
| |
| const std::vector<impala_udf::AnyVal*>& constant_args() const { return constant_args_; } |
| const NonConstantArgsVector& non_constant_args() const { return non_constant_args_; } |
| |
| uint8_t* varargs_buffer() { return varargs_buffer_; } |
| |
| std::vector<impala_udf::AnyVal*>* staging_input_vals() { return &staging_input_vals_; } |
| |
| bool debug() { return debug_; } |
| bool closed() { return closed_; } |
| |
| int64_t num_updates() const { return num_updates_; } |
| int64_t num_removes() const { return num_removes_; } |
| void set_num_updates(int64_t n) { num_updates_ = n; } |
| void set_num_removes(int64_t n) { num_removes_ = n; } |
| void IncrementNumUpdates(int64_t n = 1) { num_updates_ += n; } |
| void IncrementNumRemoves(int64_t n = 1) { num_removes_ += n; } |
| |
| const std::vector<impala_udf::FunctionContext::TypeDesc> arg_types() { |
| return arg_types_; |
| } |
| |
| RuntimeState* state() { return state_; } |
| |
| /// Various static attributes of the UDF/UDA that can be injected as constants |
| /// by codegen. Note that the argument types refer to those in the UDF/UDA signature, |
| /// not the arguments of the C++ functions implementing the UDF/UDA. Any change to |
| /// this enum must be reflected in FunctionContextImpl::GetConstFnAttr(). |
| enum ConstFnAttr { |
| /// RETURN_TYPE_*: properties of FunctionContext::GetReturnType() |
| RETURN_TYPE_SIZE, // int |
| RETURN_TYPE_PRECISION, // int |
| RETURN_TYPE_SCALE, // int |
| /// ARG_TYPE_* with parameter i: properties of FunctionContext::GetArgType(i) |
| ARG_TYPE_SIZE, // int[] |
| ARG_TYPE_PRECISION, // int[] |
| ARG_TYPE_SCALE, // int[] |
| /// True if decimal_v2 query option is set. |
| DECIMAL_V2, |
| }; |
| |
| /// This function returns the various static attributes of the UDF/UDA. Calls to this |
| /// function are replaced by constants injected by codegen. If codegen is disabled, |
| /// this function is interpreted as-is. |
| /// |
| /// 't' is the static function attribute defined in the ConstFnAttr enum above. |
| /// For function attributes of arguments, 'i' holds the argument number (0 indexed). |
| /// Please note that argument refers to the arguments in the signature of the UDF or UDA. |
| /// 'i' must always be an immediate integer value in order to utilize the constant |
| /// replacement when codegen is enabled. e.g., it cannot be a variable or an expression |
| /// like "1 + 1". |
| /// |
| int GetConstFnAttr(ConstFnAttr t, int i = -1); |
| |
| /// Return the function attribute 't' defined in ConstFnAttr above. |
| static int GetConstFnAttr(const RuntimeState* state, |
| const impala_udf::FunctionContext::TypeDesc& return_type, |
| const std::vector<impala_udf::FunctionContext::TypeDesc>& arg_types, |
| ConstFnAttr t, int i = -1); |
| |
| /// UDFs may manipulate DecimalVal arguments via SIMD instructions such as 'movaps' |
| /// that require 16-byte memory alignment. |
| static const int VARARGS_BUFFER_ALIGNMENT = 16; |
| |
| /// The LLVM class name for FunctionContext. Used for handcrafted IR. |
| static const char* LLVM_FUNCTIONCONTEXT_NAME; |
| |
| /// FunctionContextImpl::GetConstFnAttr() symbol. Used for call sites replacement. |
| static const char* GET_CONST_FN_ATTR_SYMBOL; |
| |
| private: |
| friend class impala_udf::FunctionContext; |
| friend class ScalarExprEvaluator; |
| |
| /// A utility function which checks for memory limits and null pointers returned by |
| /// Allocate(), Reallocate() and AllocateForResults() and sets the appropriate error status |
| /// if necessary. |
| /// |
| /// Return false if 'buf' is null; returns true otherwise. |
| bool CheckAllocResult(const char* fn_name, uint8_t* buf, int64_t byte_size); |
| |
| /// A utility function which checks for memory limits that may have been exceeded by |
| /// Allocate(), Reallocate(), AllocateForResults() or TrackAllocation(). Sets the |
| /// appropriate error status if necessary. |
| void CheckMemLimit(const char* fn_name, int64_t byte_size); |
| |
| /// Preallocated buffer for storing varargs (if the function has any). Allocated and |
| /// owned by this object, but populated by an Expr function. The buffer is interpreted |
| /// as an array of the appropriate AnyVal subclass. |
| uint8_t* varargs_buffer_; |
| int varargs_buffer_size_; |
| |
| /// Parent context object. Not owned |
| impala_udf::FunctionContext* context_; |
| |
| /// Pool used for allocations made via Allocate(). Allocations are explicitly freed and |
| /// returned to this pool with Free(). The memory allocated in this pool is effectively |
| /// owned by the UDF. |
| /// Owned and freed in destructor. Uses raw pointer to avoid pulling headers into SDK. |
| FreePool* udf_pool_; |
| |
| /// Pool used for allocations made via AllocateForResults(). Not owned by this |
| /// FunctionContext. Allocations made from the pool are used temporarily during |
| /// expression evaluation. Var-len values returned from an expression may reference |
| /// memory in this pool - the caller is responsible for ensuring that the pool is |
| /// not cleared while that memory is still referenced. |
| MemPool* results_pool_; |
| |
| /// We use the query's runtime state to report errors and warnings. NULL for test |
| /// contexts. |
| RuntimeState* state_; |
| |
| /// If true, indicates this is a debug context which will do additional validation. |
| bool debug_; |
| |
| impala_udf::FunctionContext::ImpalaVersion version_; |
| |
| /// Empty if there's no error |
| std::string error_msg_; |
| |
| /// The number of warnings reported. |
| int64_t num_warnings_; |
| |
| /// The number of calls to Update()/Remove(). |
| int64_t num_updates_; |
| int64_t num_removes_; |
| |
| /// Allocations made and still owned by the user function. Only used if debug_ is true |
| /// because it is very expensive to maintain. |
| std::map<uint8_t*, int> allocations_; |
| |
| /// The function state accessed via FunctionContext::Get/SetFunctionState() |
| void* thread_local_fn_state_; |
| void* fragment_local_fn_state_; |
| |
| /// The number of bytes allocated externally by the user function. In some cases, |
| /// it is too inconvenient to use the Allocate()/Free() APIs in the FunctionContext, |
| /// particularly for existing codebases (e.g. they use std::vector). Instead, they'll |
| /// have to track those allocations manually. |
| int64_t external_bytes_tracked_; |
| |
| /// Type descriptor for the intermediate type of a UDA. Set to INVALID_TYPE for UDFs. |
| impala_udf::FunctionContext::TypeDesc intermediate_type_; |
| |
| /// Type descriptor for the return type of the function. |
| impala_udf::FunctionContext::TypeDesc return_type_; |
| |
| /// Type descriptors for each argument of the function. |
| std::vector<impala_udf::FunctionContext::TypeDesc> arg_types_; |
| |
| /// Contains an AnyVal* for each argument of the function. If the AnyVal* is NULL, |
| /// indicates that the corresponding argument is non-constant. Otherwise contains the |
| /// value of the argument. The AnyVal* objects and associated data are owned by the |
| /// ScalarExprEvaluator provided when opening the FRAGMENT_LOCAL expression contexts. |
| std::vector<impala_udf::AnyVal*> constant_args_; |
| |
| /// Vector of all non-constant children expressions that need to be evaluated for |
| /// each input row. The first element of each pair is the child expression and the |
| /// second element is the value it must be evaluated into. |
| NonConstantArgsVector non_constant_args_; |
| |
| /// Used by ScalarFnCall to temporarily store arguments for a UDF when running without |
| /// codegen. Allows us to pass AnyVal* arguments to the scalar function directly, |
| /// rather than codegening a call that passes the correct AnyVal subclass pointer type. |
| /// Note that this is only used for non-variadic arguments; varargs are always stored |
| /// in varargs_buffer_. |
| std::vector<impala_udf::AnyVal*> staging_input_vals_; |
| |
| /// Indicates whether this context has been closed. Used for verification/debugging. |
| bool closed_; |
| }; |
| |
| } |
| |
| namespace impala_udf { |
| |
| /// Temporary CollectionVal definition, used to represent arrays and maps. This is not |
| /// ready for public consumption because users must have access to our internal tuple |
| /// layout. |
| struct CollectionVal : public AnyVal { |
| // Put num_tuples before ptr so that 'AnyVal::is_null', 'num_tuples' and 'ptr' can be |
| // packed into 16 bytes. This matches the memory layout of StringVal, which allows |
| // sharing of support in CodegenAnyval. |
| int num_tuples; |
| uint8_t* ptr; |
| |
| /// Construct an CollectionVal from ptr/num_tuples. Note: this does not make a copy of |
| /// ptr so the buffer must exist as long as this CollectionVal does. |
| CollectionVal(uint8_t* ptr = NULL, int num_tuples = 0) |
| : num_tuples(num_tuples), ptr(ptr) {} |
| |
| static CollectionVal null() { |
| CollectionVal cv; |
| cv.is_null = true; |
| return cv; |
| } |
| }; |
| |
| #pragma GCC diagnostic ignored "-Winvalid-offsetof" |
| static_assert(sizeof(CollectionVal) == sizeof(StringVal), "Wrong size."); |
| static_assert( |
| offsetof(CollectionVal, num_tuples) == offsetof(StringVal, len), "Wrong offset."); |
| static_assert(offsetof(CollectionVal, ptr) == offsetof(StringVal, ptr), "Wrong offset."); |
| } // namespace impala_udf |
| |
| #endif |