blob: 0fecea080d87d0d1d6a89b15133b56189c2e2071 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// NOTE: API is EXPERIMENTAL and will change without going through a
// deprecation cycle
#pragma once
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "arrow/buffer.h"
#include "arrow/compute/exec.h"
#include "arrow/datum.h"
#include "arrow/memory_pool.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace compute {
struct FunctionOptions;
/// \brief Base class for opaque kernel-specific state. For example, if there
/// is some kind of initialization required.
struct ARROW_EXPORT KernelState {
virtual ~KernelState() = default;
};
/// \brief Context/state for the execution of a particular kernel.
class ARROW_EXPORT KernelContext {
public:
explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx) {}
/// \brief Allocate buffer from the context's memory pool. The contents are
/// not initialized.
Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);
/// \brief Allocate buffer for bitmap from the context's memory pool. Like
/// Allocate, the contents of the buffer are not initialized but the last
/// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);
/// \brief Assign the active KernelState to be utilized for each stage of
/// kernel execution. Ownership and memory lifetime of the KernelState must
/// be minded separately.
void SetState(KernelState* state) { state_ = state; }
KernelState* state() { return state_; }
/// \brief Configuration related to function execution that is to be shared
/// across multiple kernels.
ExecContext* exec_context() { return exec_ctx_; }
/// \brief The memory pool to use for allocations. For now, it uses the
/// MemoryPool contained in the ExecContext used to create the KernelContext.
MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }
private:
ExecContext* exec_ctx_;
KernelState* state_;
};
/// \brief The standard kernel execution API that must be implemented for
/// SCALAR and VECTOR kernel types. This includes both stateless and stateful
/// kernels. Kernels depending on some execution state access that state via
/// subclasses of KernelState set on the KernelContext object. May be used for
/// SCALAR and VECTOR kernel kinds. Implementations should endeavor to write
/// into pre-allocated memory if they are able, though for some kernels
/// (e.g. in cases when a builder like StringBuilder) must be employed this may
/// not be possible.
using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;
/// \brief An type-checking interface to permit customizable validation rules
/// for use with InputType and KernelSignature. This is for scenarios where the
/// acceptance is not an exact type instance, such as a TIMESTAMP type for a
/// specific TimeUnit, but permitting any time zone.
struct ARROW_EXPORT TypeMatcher {
virtual ~TypeMatcher() = default;
/// \brief Return true if this matcher accepts the data type.
virtual bool Matches(const DataType& type) const = 0;
/// \brief A human-interpretable string representation of what the type
/// matcher checks for, usable when printing KernelSignature or formatting
/// error messages.
virtual std::string ToString() const = 0;
/// \brief Return true if this TypeMatcher contains the same matching rule as
/// the other. Currently depends on RTTI.
virtual bool Equals(const TypeMatcher& other) const = 0;
};
namespace match {
/// \brief Match any DataType instance having the same DataType::id.
ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);
/// \brief Match any TimestampType instance having the same unit, but the time
/// zones can be different.
ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);
// \brief Match any integer type
ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();
// Match types using 32-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();
// Match types using 64-bit varbinary representation
ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();
// \brief Match any primitive type (boolean or any type representable as a C
// Type)
ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
} // namespace match
/// \brief An object used for type- and shape-checking arguments to be passed
/// to a kernel and stored in a KernelSignature. Distinguishes between ARRAY
/// and SCALAR arguments using ValueDescr::Shape. The type-checking rule can be
/// supplied either with an exact DataType instance or a custom TypeMatcher.
class ARROW_EXPORT InputType {
public:
/// \brief The kind of type-checking rule that the InputType contains.
enum Kind {
/// \brief Accept any value type.
ANY_TYPE,
/// \brief A fixed arrow::DataType and will only exact match having this
/// exact type (e.g. same TimestampType unit, same decimal scale and
/// precision, or same nested child types).
EXACT_TYPE,
/// \brief Uses a TypeMatcher implementation to check the type.
USE_TYPE_MATCHER
};
/// \brief Accept any value type but with a specific shape (e.g. any Array or
/// any Scalar).
InputType(ValueDescr::Shape shape = ValueDescr::ANY) // NOLINT implicit construction
: kind_(ANY_TYPE), shape_(shape) {}
/// \brief Accept an exact value type.
InputType(std::shared_ptr<DataType> type, // NOLINT implicit construction
ValueDescr::Shape shape = ValueDescr::ANY)
: kind_(EXACT_TYPE), shape_(shape), type_(std::move(type)) {}
/// \brief Accept an exact value type and shape provided by a ValueDescr.
InputType(const ValueDescr& descr) // NOLINT implicit construction
: InputType(descr.type, descr.shape) {}
/// \brief Use the passed TypeMatcher to type check.
InputType(std::shared_ptr<TypeMatcher> type_matcher, // NOLINT implicit construction
ValueDescr::Shape shape = ValueDescr::ANY)
: kind_(USE_TYPE_MATCHER), shape_(shape), type_matcher_(std::move(type_matcher)) {}
/// \brief Match any type with the given Type::type. Uses a TypeMatcher for
/// its implementation.
explicit InputType(Type::type type_id, ValueDescr::Shape shape = ValueDescr::ANY)
: InputType(match::SameTypeId(type_id), shape) {}
InputType(const InputType& other) { CopyInto(other); }
void operator=(const InputType& other) { CopyInto(other); }
InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
// \brief Match an array with the given exact type. Convenience constructor.
static InputType Array(std::shared_ptr<DataType> type) {
return InputType(std::move(type), ValueDescr::ARRAY);
}
// \brief Match a scalar with the given exact type. Convenience constructor.
static InputType Scalar(std::shared_ptr<DataType> type) {
return InputType(std::move(type), ValueDescr::SCALAR);
}
// \brief Match an array with the given Type::type id. Convenience
// constructor.
static InputType Array(Type::type id) { return InputType(id, ValueDescr::ARRAY); }
// \brief Match a scalar with the given Type::type id. Convenience
// constructor.
static InputType Scalar(Type::type id) { return InputType(id, ValueDescr::SCALAR); }
/// \brief Return true if this input type matches the same type cases as the
/// other.
bool Equals(const InputType& other) const;
bool operator==(const InputType& other) const { return this->Equals(other); }
bool operator!=(const InputType& other) const { return !(*this == other); }
/// \brief Return hash code.
size_t Hash() const;
/// \brief Render a human-readable string representation.
std::string ToString() const;
/// \brief Return true if the value matches this argument kind in type
/// and shape.
bool Matches(const Datum& value) const;
/// \brief Return true if the value descriptor matches this argument kind in
/// type and shape.
bool Matches(const ValueDescr& value) const;
/// \brief The type matching rule that this InputType uses.
Kind kind() const { return kind_; }
/// \brief Indicates whether this InputType matches Array (ValueDescr::ARRAY),
/// Scalar (ValueDescr::SCALAR) values, or both (ValueDescr::ANY).
ValueDescr::Shape shape() const { return shape_; }
/// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
/// must match. Otherwise this function should not be used and will assert in
/// debug builds.
const std::shared_ptr<DataType>& type() const;
/// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
/// checking the type of a value. Otherwise this function should not be used
/// and will assert in debug builds.
const TypeMatcher& type_matcher() const;
private:
void CopyInto(const InputType& other) {
this->kind_ = other.kind_;
this->shape_ = other.shape_;
this->type_ = other.type_;
this->type_matcher_ = other.type_matcher_;
}
void MoveInto(InputType&& other) {
this->kind_ = other.kind_;
this->shape_ = other.shape_;
this->type_ = std::move(other.type_);
this->type_matcher_ = std::move(other.type_matcher_);
}
Kind kind_;
ValueDescr::Shape shape_ = ValueDescr::ANY;
// For EXACT_TYPE Kind
std::shared_ptr<DataType> type_;
// For USE_TYPE_MATCHER Kind
std::shared_ptr<TypeMatcher> type_matcher_;
};
/// \brief Container to capture both exact and input-dependent output types.
///
/// The value shape returned by Resolve will be determined by broadcasting the
/// shapes of the input arguments, otherwise this is handled by the
/// user-defined resolver function:
///
/// * Any ARRAY shape -> output shape is ARRAY
/// * All SCALAR shapes -> output shape is SCALAR
class ARROW_EXPORT OutputType {
public:
/// \brief An enum indicating whether the value type is an invariant fixed
/// value or one that's computed by a kernel-defined resolver function.
enum ResolveKind { FIXED, COMPUTED };
/// Type resolution function. Given input types and shapes, return output
/// type and shape. This function SHOULD _not_ be used to check for arity,
/// that is to be performed one or more layers above. May make use of kernel
/// state to know what type to output in some cases.
using Resolver =
std::function<Result<ValueDescr>(KernelContext*, const std::vector<ValueDescr>&)>;
/// \brief Output an exact type, but with shape determined by promoting the
/// shapes of the inputs (any ARRAY argument yields ARRAY).
OutputType(std::shared_ptr<DataType> type) // NOLINT implicit construction
: kind_(FIXED), type_(std::move(type)) {}
/// \brief Output the exact type and shape provided by a ValueDescr
OutputType(ValueDescr descr); // NOLINT implicit construction
explicit OutputType(Resolver resolver)
: kind_(COMPUTED), resolver_(std::move(resolver)) {}
OutputType(const OutputType& other) {
this->kind_ = other.kind_;
this->shape_ = other.shape_;
this->type_ = other.type_;
this->resolver_ = other.resolver_;
}
OutputType(OutputType&& other) {
this->kind_ = other.kind_;
this->type_ = std::move(other.type_);
this->shape_ = other.shape_;
this->resolver_ = other.resolver_;
}
/// \brief Return the shape and type of the expected output value of the
/// kernel given the value descriptors (shapes and types) of the input
/// arguments. The resolver may make use of state information kept in the
/// KernelContext.
Result<ValueDescr> Resolve(KernelContext* ctx,
const std::vector<ValueDescr>& args) const;
/// \brief The exact output value type for the FIXED kind.
const std::shared_ptr<DataType>& type() const;
/// \brief For use with COMPUTED resolution strategy. It may be more
/// convenient to invoke this with OutputType::Resolve returned from this
/// method.
const Resolver& resolver() const;
/// \brief Render a human-readable string representation.
std::string ToString() const;
/// \brief Return the kind of type resolution of this output type, whether
/// fixed/invariant or computed by a resolver.
ResolveKind kind() const { return kind_; }
/// \brief If the shape is ANY, then Resolve will compute the shape based on
/// the input arguments.
ValueDescr::Shape shape() const { return shape_; }
private:
ResolveKind kind_;
// For FIXED resolution
std::shared_ptr<DataType> type_;
/// \brief The shape of the output type to return when using Resolve. If ANY
/// will promote the input shapes.
ValueDescr::Shape shape_ = ValueDescr::ANY;
// For COMPUTED resolution
Resolver resolver_;
};
/// \brief Holds the input types and output type of the kernel.
///
/// VarArgs functions should pass a single input type to be used to validate
/// the input types of a function invocation.
class ARROW_EXPORT KernelSignature {
public:
KernelSignature(std::vector<InputType> in_types, OutputType out_type,
bool is_varargs = false);
/// \brief Convenience ctor since make_shared can be awkward
static std::shared_ptr<KernelSignature> Make(std::vector<InputType> in_types,
OutputType out_type,
bool is_varargs = false);
/// \brief Return true if the signature if compatible with the list of input
/// value descriptors.
bool MatchesInputs(const std::vector<ValueDescr>& descriptors) const;
/// \brief Returns true if the input types of each signature are
/// equal. Well-formed functions should have a deterministic output type
/// given input types, but currently it is the responsibility of the
/// developer to ensure this.
bool Equals(const KernelSignature& other) const;
bool operator==(const KernelSignature& other) const { return this->Equals(other); }
bool operator!=(const KernelSignature& other) const { return !(*this == other); }
/// \brief Compute a hash code for the signature
size_t Hash() const;
/// \brief The input types for the kernel. For VarArgs functions, this should
/// generally contain a single validator to use for validating all of the
/// function arguments.
const std::vector<InputType>& in_types() const { return in_types_; }
/// \brief The output type for the kernel. Use Resolve to return the exact
/// output given input argument ValueDescrs, since many kernels' output types
/// depend on their input types (or their type metadata).
const OutputType& out_type() const { return out_type_; }
/// \brief Render a human-readable string representation
std::string ToString() const;
bool is_varargs() const { return is_varargs_; }
private:
std::vector<InputType> in_types_;
OutputType out_type_;
bool is_varargs_;
// For caching the hash code after it's computed the first time
mutable uint64_t hash_code_;
};
/// \brief A function may contain multiple variants of a kernel for a given
/// type combination for different SIMD levels. Based on the active system's
/// CPU info or the user's preferences, we can elect to use one over the other.
struct SimdLevel {
enum type { NONE = 0, SSE4_2, AVX, AVX2, AVX512, NEON, MAX };
};
/// \brief The strategy to use for propagating or otherwise populating the
/// validity bitmap of a kernel output.
struct NullHandling {
enum type {
/// Compute the output validity bitmap by intersecting the validity bitmaps
/// of the arguments using bitwise-and operations. This means that values
/// in the output are valid/non-null only if the corresponding values in
/// all input arguments were valid/non-null. Kernel generally need not
/// touch the bitmap thereafter, but a kernel's exec function is permitted
/// to alter the bitmap after the null intersection is computed if it needs
/// to.
INTERSECTION,
/// Kernel expects a pre-allocated buffer to write the result bitmap
/// into. The preallocated memory is not zeroed (except for the last byte),
/// so the kernel should ensure to completely populate the bitmap.
COMPUTED_PREALLOCATE,
/// Kernel allocates and sets the validity bitmap of the output.
COMPUTED_NO_PREALLOCATE,
/// Kernel output is never null and a validity bitmap does not need to be
/// allocated.
OUTPUT_NOT_NULL
};
};
/// \brief The preference for memory preallocation of fixed-width type outputs
/// in kernel execution.
struct MemAllocation {
enum type {
// For data types that support pre-allocation (i.e. fixed-width), the
// kernel expects to be provided a pre-allocated data buffer to write
// into. Non-fixed-width types must always allocate their own data
// buffers. The allocation made for the same length as the execution batch,
// so vector kernels yielding differently sized output should not use this.
//
// It is valid for the data to not be preallocated but the validity bitmap
// is (or is computed using the intersection/bitwise-and method).
//
// For variable-size output types like BinaryType or StringType, or for
// nested types, this option has no effect.
PREALLOCATE,
// The kernel is responsible for allocating its own data buffer for
// fixed-width type outputs.
NO_PREALLOCATE
};
};
struct Kernel;
/// \brief Arguments to pass to a KernelInit function. A struct is used to help
/// avoid API breakage should the arguments passed need to be expanded.
struct KernelInitArgs {
/// \brief A pointer to the kernel being initialized. The init function may
/// depend on the kernel's KernelSignature or other data contained there.
const Kernel* kernel;
/// \brief The types and shapes of the input arguments that the kernel is
/// about to be executed against.
///
/// TODO: should this be const std::vector<ValueDescr>*? const-ref is being
/// used to avoid the cost of copying the struct into the args struct.
const std::vector<ValueDescr>& inputs;
/// \brief Opaque options specific to this kernel. May be nullptr for functions
/// that do not require options.
const FunctionOptions* options;
};
/// \brief Common initializer function for all kernel types.
using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
KernelContext*, const KernelInitArgs&)>;
/// \brief Base type for kernels. Contains the function signature and
/// optionally the state initialization function, along with some common
/// attributes
struct Kernel {
Kernel() = default;
Kernel(std::shared_ptr<KernelSignature> sig, KernelInit init)
: signature(std::move(sig)), init(std::move(init)) {}
Kernel(std::vector<InputType> in_types, OutputType out_type, KernelInit init)
: Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)),
std::move(init)) {}
/// \brief The "signature" of the kernel containing the InputType input
/// argument validators and OutputType output type and shape resolver.
std::shared_ptr<KernelSignature> signature;
/// \brief Create a new KernelState for invocations of this kernel, e.g. to
/// set up any options or state relevant for execution.
KernelInit init;
/// \brief Indicates whether execution can benefit from parallelization
/// (splitting large chunks into smaller chunks and using multiple
/// threads). Some kernels may not support parallel execution at
/// all. Synchronization and concurrency-related issues are currently the
/// responsibility of the Kernel's implementation.
bool parallelizable = true;
/// \brief Indicates the level of SIMD instruction support in the host CPU is
/// required to use the function. The intention is for functions to be able to
/// contain multiple kernels with the same signature but different levels of SIMD,
/// so that the most optimized kernel supported on a host's processor can be chosen.
SimdLevel::type simd_level = SimdLevel::NONE;
};
/// \brief Common kernel base data structure for ScalarKernel and
/// VectorKernel. It is called "ArrayKernel" in that the functions generally
/// output array values (as opposed to scalar values in the case of aggregate
/// functions).
struct ArrayKernel : public Kernel {
ArrayKernel() = default;
ArrayKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
KernelInit init = NULLPTR)
: Kernel(std::move(sig), init), exec(std::move(exec)) {}
ArrayKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
KernelInit init = NULLPTR)
: Kernel(std::move(in_types), std::move(out_type), std::move(init)),
exec(std::move(exec)) {}
/// \brief Perform a single invocation of this kernel. Depending on the
/// implementation, it may only write into preallocated memory, while in some
/// cases it will allocate its own memory. Any required state is managed
/// through the KernelContext.
ArrayKernelExec exec;
/// \brief Writing execution results into larger contiguous allocations
/// requires that the kernel be able to write into sliced output ArrayData*,
/// including sliced output validity bitmaps. Some kernel implementations may
/// not be able to do this, so setting this to false disables this
/// functionality.
bool can_write_into_slices = true;
};
/// \brief Kernel data structure for implementations of ScalarFunction. In
/// addition to the members found in ArrayKernel, contains the null handling
/// and memory pre-allocation preferences.
struct ScalarKernel : public ArrayKernel {
using ArrayKernel::ArrayKernel;
// For scalar functions preallocated data and intersecting arg validity
// bitmaps is a reasonable default
NullHandling::type null_handling = NullHandling::INTERSECTION;
MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
};
// ----------------------------------------------------------------------
// VectorKernel (for VectorFunction)
/// \brief See VectorKernel::finalize member for usage
using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;
/// \brief Kernel data structure for implementations of VectorFunction. In
/// addition to the members found in ArrayKernel, contains an optional
/// finalizer function, the null handling and memory pre-allocation preferences
/// (which have different defaults from ScalarKernel), and some other
/// execution-related options.
struct VectorKernel : public ArrayKernel {
VectorKernel() = default;
VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec)
: ArrayKernel(std::move(sig), std::move(exec)) {}
VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
: ArrayKernel(std::move(in_types), std::move(out_type), std::move(exec),
std::move(init)),
finalize(std::move(finalize)) {}
VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
: ArrayKernel(std::move(sig), std::move(exec), std::move(init)),
finalize(std::move(finalize)) {}
/// \brief For VectorKernel, convert intermediate results into finalized
/// results. Mutates input argument. Some kernels may accumulate state
/// (example: hashing-related functions) through processing chunked inputs, and
/// then need to attach some accumulated state to each of the outputs of
/// processing each chunk of data.
VectorFinalize finalize;
/// Since vector kernels generally are implemented rather differently from
/// scalar/elementwise kernels (and they may not even yield arrays of the same
/// size), so we make the developer opt-in to any memory preallocation rather
/// than having to turn it off.
NullHandling::type null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
MemAllocation::type mem_allocation = MemAllocation::NO_PREALLOCATE;
/// Some vector kernels can do chunkwise execution using ExecBatchIterator,
/// in some cases accumulating some state. Other kernels (like Take) need to
/// be passed whole arrays and don't work on ChunkedArray inputs
bool can_execute_chunkwise = true;
/// Some kernels (like unique and value_counts) yield non-chunked output from
/// chunked-array inputs. This option controls how the results are boxed when
/// returned from ExecVectorFunction
///
/// true -> ChunkedArray
/// false -> Array
bool output_chunked = true;
};
// ----------------------------------------------------------------------
// ScalarAggregateKernel (for ScalarAggregateFunction)
using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
using ScalarAggregateMerge =
std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
// Finalize returns Datum to permit multiple return values
using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
/// \brief Kernel data structure for implementations of
/// ScalarAggregateFunction. The four necessary components of an aggregation
/// kernel are the init, consume, merge, and finalize functions.
///
/// * init: creates a new KernelState for a kernel.
/// * consume: processes an ExecBatch and updates the KernelState found in the
/// KernelContext.
/// * merge: combines one KernelState with another.
/// * finalize: produces the end result of the aggregation using the
/// KernelState in the KernelContext.
struct ScalarAggregateKernel : public Kernel {
ScalarAggregateKernel() = default;
ScalarAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
ScalarAggregateConsume consume, ScalarAggregateMerge merge,
ScalarAggregateFinalize finalize)
: Kernel(std::move(sig), std::move(init)),
consume(std::move(consume)),
merge(std::move(merge)),
finalize(std::move(finalize)) {}
ScalarAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
KernelInit init, ScalarAggregateConsume consume,
ScalarAggregateMerge merge, ScalarAggregateFinalize finalize)
: ScalarAggregateKernel(
KernelSignature::Make(std::move(in_types), std::move(out_type)),
std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
ScalarAggregateConsume consume;
ScalarAggregateMerge merge;
ScalarAggregateFinalize finalize;
};
// ----------------------------------------------------------------------
// HashAggregateKernel (for HashAggregateFunction)
using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;
using HashAggregateMerge =
std::function<Status(KernelContext*, KernelState&&, KernelState*)>;
// Finalize returns Datum to permit multiple return values
using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;
/// \brief Kernel data structure for implementations of
/// HashAggregateFunction. The four necessary components of an aggregation
/// kernel are the init, consume, merge, and finalize functions.
///
/// * init: creates a new KernelState for a kernel.
/// * consume: processes an ExecBatch (which includes the argument as well
/// as an array of group identifiers) and updates the KernelState found in the
/// KernelContext.
/// * merge: combines one KernelState with another.
/// * finalize: produces the end result of the aggregation using the
/// KernelState in the KernelContext.
struct HashAggregateKernel : public Kernel {
HashAggregateKernel() = default;
HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
HashAggregateConsume consume, HashAggregateMerge merge,
HashAggregateFinalize finalize)
: Kernel(std::move(sig), std::move(init)),
consume(std::move(consume)),
merge(std::move(merge)),
finalize(std::move(finalize)) {}
HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
KernelInit init, HashAggregateMerge merge,
HashAggregateConsume consume, HashAggregateFinalize finalize)
: HashAggregateKernel(
KernelSignature::Make(std::move(in_types), std::move(out_type)),
std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}
HashAggregateConsume consume;
HashAggregateMerge merge;
HashAggregateFinalize finalize;
};
} // namespace compute
} // namespace arrow