cpp/src/arrow/compute/kernel.h - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 // NOTE: API is EXPERIMENTAL and will change without going through a
 // deprecation cycle

 #pragma once

 #include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/visibility.h"

 namespace arrow {
 namespace compute {

 struct FunctionOptions;

 /// \brief Base class for opaque kernel-specific state. For example, if there
 /// is some kind of initialization required.
 struct ARROW_EXPORT KernelState {
   virtual ~KernelState() = default;
 };

 /// \brief Context/state for the execution of a particular kernel.
 class ARROW_EXPORT KernelContext {
  public:
   explicit KernelContext(ExecContext* exec_ctx) : exec_ctx_(exec_ctx) {}

   /// \brief Allocate buffer from the context's memory pool. The contents are
   /// not initialized.
   Result<std::shared_ptr<ResizableBuffer>> Allocate(int64_t nbytes);

   /// \brief Allocate buffer for bitmap from the context's memory pool. Like
   /// Allocate, the contents of the buffer are not initialized but the last
   /// byte is preemptively zeroed to help avoid ASAN or valgrind issues.
   Result<std::shared_ptr<ResizableBuffer>> AllocateBitmap(int64_t num_bits);

   /// \brief Assign the active KernelState to be utilized for each stage of
   /// kernel execution. Ownership and memory lifetime of the KernelState must
   /// be minded separately.
   void SetState(KernelState* state) { state_ = state; }

   KernelState* state() { return state_; }

   /// \brief Configuration related to function execution that is to be shared
   /// across multiple kernels.
   ExecContext* exec_context() { return exec_ctx_; }

   /// \brief The memory pool to use for allocations. For now, it uses the
   /// MemoryPool contained in the ExecContext used to create the KernelContext.
   MemoryPool* memory_pool() { return exec_ctx_->memory_pool(); }

  private:
   ExecContext* exec_ctx_;
   KernelState* state_;
 };

 /// \brief The standard kernel execution API that must be implemented for
 /// SCALAR and VECTOR kernel types. This includes both stateless and stateful
 /// kernels. Kernels depending on some execution state access that state via
 /// subclasses of KernelState set on the KernelContext object. May be used for
 /// SCALAR and VECTOR kernel kinds. Implementations should endeavor to write
 /// into pre-allocated memory if they are able, though for some kernels
 /// (e.g. in cases when a builder like StringBuilder) must be employed this may
 /// not be possible.
 using ArrayKernelExec = std::function<Status(KernelContext*, const ExecBatch&, Datum*)>;

 /// \brief An type-checking interface to permit customizable validation rules
 /// for use with InputType and KernelSignature. This is for scenarios where the
 /// acceptance is not an exact type instance, such as a TIMESTAMP type for a
 /// specific TimeUnit, but permitting any time zone.
 struct ARROW_EXPORT TypeMatcher {
   virtual ~TypeMatcher() = default;

   /// \brief Return true if this matcher accepts the data type.
   virtual bool Matches(const DataType& type) const = 0;

   /// \brief A human-interpretable string representation of what the type
   /// matcher checks for, usable when printing KernelSignature or formatting
   /// error messages.
   virtual std::string ToString() const = 0;

   /// \brief Return true if this TypeMatcher contains the same matching rule as
   /// the other. Currently depends on RTTI.
   virtual bool Equals(const TypeMatcher& other) const = 0;
 };

 namespace match {

 /// \brief Match any DataType instance having the same DataType::id.
 ARROW_EXPORT std::shared_ptr<TypeMatcher> SameTypeId(Type::type type_id);

 /// \brief Match any TimestampType instance having the same unit, but the time
 /// zones can be different.
 ARROW_EXPORT std::shared_ptr<TypeMatcher> TimestampTypeUnit(TimeUnit::type unit);
 ARROW_EXPORT std::shared_ptr<TypeMatcher> Time32TypeUnit(TimeUnit::type unit);
 ARROW_EXPORT std::shared_ptr<TypeMatcher> Time64TypeUnit(TimeUnit::type unit);
 ARROW_EXPORT std::shared_ptr<TypeMatcher> DurationTypeUnit(TimeUnit::type unit);

 // \brief Match any integer type
 ARROW_EXPORT std::shared_ptr<TypeMatcher> Integer();

 // Match types using 32-bit varbinary representation
 ARROW_EXPORT std::shared_ptr<TypeMatcher> BinaryLike();

 // Match types using 64-bit varbinary representation
 ARROW_EXPORT std::shared_ptr<TypeMatcher> LargeBinaryLike();

 // \brief Match any primitive type (boolean or any type representable as a C
 // Type)
 ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();

 }  // namespace match

 /// \brief An object used for type- and shape-checking arguments to be passed
 /// to a kernel and stored in a KernelSignature. Distinguishes between ARRAY
 /// and SCALAR arguments using ValueDescr::Shape. The type-checking rule can be
 /// supplied either with an exact DataType instance or a custom TypeMatcher.
 class ARROW_EXPORT InputType {
  public:
   /// \brief The kind of type-checking rule that the InputType contains.
   enum Kind {
     /// \brief Accept any value type.
     ANY_TYPE,

     /// \brief A fixed arrow::DataType and will only exact match having this
     /// exact type (e.g. same TimestampType unit, same decimal scale and
     /// precision, or same nested child types).
     EXACT_TYPE,

     /// \brief Uses a TypeMatcher implementation to check the type.
     USE_TYPE_MATCHER
   };

   /// \brief Accept any value type but with a specific shape (e.g. any Array or
   /// any Scalar).
   InputType(ValueDescr::Shape shape = ValueDescr::ANY)  // NOLINT implicit construction
       : kind_(ANY_TYPE), shape_(shape) {}

   /// \brief Accept an exact value type.
   InputType(std::shared_ptr<DataType> type,  // NOLINT implicit construction
             ValueDescr::Shape shape = ValueDescr::ANY)
       : kind_(EXACT_TYPE), shape_(shape), type_(std::move(type)) {}

   /// \brief Accept an exact value type and shape provided by a ValueDescr.
   InputType(const ValueDescr& descr)  // NOLINT implicit construction
       : InputType(descr.type, descr.shape) {}

   /// \brief Use the passed TypeMatcher to type check.
   InputType(std::shared_ptr<TypeMatcher> type_matcher,  // NOLINT implicit construction
             ValueDescr::Shape shape = ValueDescr::ANY)
       : kind_(USE_TYPE_MATCHER), shape_(shape), type_matcher_(std::move(type_matcher)) {}

   /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
   /// its implementation.
   explicit InputType(Type::type type_id, ValueDescr::Shape shape = ValueDescr::ANY)
       : InputType(match::SameTypeId(type_id), shape) {}

   InputType(const InputType& other) { CopyInto(other); }

   void operator=(const InputType& other) { CopyInto(other); }

   InputType(InputType&& other) { MoveInto(std::forward<InputType>(other)); }

   void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }

   // \brief Match an array with the given exact type. Convenience constructor.
   static InputType Array(std::shared_ptr<DataType> type) {
     return InputType(std::move(type), ValueDescr::ARRAY);
   }

   // \brief Match a scalar with the given exact type. Convenience constructor.
   static InputType Scalar(std::shared_ptr<DataType> type) {
     return InputType(std::move(type), ValueDescr::SCALAR);
   }

   // \brief Match an array with the given Type::type id. Convenience
   // constructor.
   static InputType Array(Type::type id) { return InputType(id, ValueDescr::ARRAY); }

   // \brief Match a scalar with the given Type::type id. Convenience
   // constructor.
   static InputType Scalar(Type::type id) { return InputType(id, ValueDescr::SCALAR); }

   /// \brief Return true if this input type matches the same type cases as the
   /// other.
   bool Equals(const InputType& other) const;

   bool operator==(const InputType& other) const { return this->Equals(other); }

   bool operator!=(const InputType& other) const { return !(*this == other); }

   /// \brief Return hash code.
   size_t Hash() const;

   /// \brief Render a human-readable string representation.
   std::string ToString() const;

   /// \brief Return true if the value matches this argument kind in type
   /// and shape.
   bool Matches(const Datum& value) const;

   /// \brief Return true if the value descriptor matches this argument kind in
   /// type and shape.
   bool Matches(const ValueDescr& value) const;

   /// \brief The type matching rule that this InputType uses.
   Kind kind() const { return kind_; }

   /// \brief Indicates whether this InputType matches Array (ValueDescr::ARRAY),
   /// Scalar (ValueDescr::SCALAR) values, or both (ValueDescr::ANY).
   ValueDescr::Shape shape() const { return shape_; }

   /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
   /// must match. Otherwise this function should not be used and will assert in
   /// debug builds.
   const std::shared_ptr<DataType>& type() const;

   /// \brief For InputType::USE_TYPE_MATCHER, the TypeMatcher to be used for
   /// checking the type of a value. Otherwise this function should not be used
   /// and will assert in debug builds.
   const TypeMatcher& type_matcher() const;

  private:
   void CopyInto(const InputType& other) {
     this->kind_ = other.kind_;
     this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->type_matcher_ = other.type_matcher_;
   }

   void MoveInto(InputType&& other) {
     this->kind_ = other.kind_;
     this->shape_ = other.shape_;
     this->type_ = std::move(other.type_);
     this->type_matcher_ = std::move(other.type_matcher_);
   }

   Kind kind_;

   ValueDescr::Shape shape_ = ValueDescr::ANY;

   // For EXACT_TYPE Kind
   std::shared_ptr<DataType> type_;

   // For USE_TYPE_MATCHER Kind
   std::shared_ptr<TypeMatcher> type_matcher_;
 };

 /// \brief Container to capture both exact and input-dependent output types.
 ///
 /// The value shape returned by Resolve will be determined by broadcasting the
 /// shapes of the input arguments, otherwise this is handled by the
 /// user-defined resolver function:
 ///
 /// * Any ARRAY shape -> output shape is ARRAY
 /// * All SCALAR shapes -> output shape is SCALAR
 class ARROW_EXPORT OutputType {
  public:
   /// \brief An enum indicating whether the value type is an invariant fixed
   /// value or one that's computed by a kernel-defined resolver function.
   enum ResolveKind { FIXED, COMPUTED };

   /// Type resolution function. Given input types and shapes, return output
   /// type and shape. This function SHOULD _not_ be used to check for arity,
   /// that is to be performed one or more layers above. May make use of kernel
   /// state to know what type to output in some cases.
   using Resolver =
       std::function<Result<ValueDescr>(KernelContext*, const std::vector<ValueDescr>&)>;

   /// \brief Output an exact type, but with shape determined by promoting the
   /// shapes of the inputs (any ARRAY argument yields ARRAY).
   OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
       : kind_(FIXED), type_(std::move(type)) {}

   /// \brief Output the exact type and shape provided by a ValueDescr
   OutputType(ValueDescr descr);  // NOLINT implicit construction

   explicit OutputType(Resolver resolver)
       : kind_(COMPUTED), resolver_(std::move(resolver)) {}

   OutputType(const OutputType& other) {
     this->kind_ = other.kind_;
     this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->resolver_ = other.resolver_;
   }

   OutputType(OutputType&& other) {
     this->kind_ = other.kind_;
     this->type_ = std::move(other.type_);
     this->shape_ = other.shape_;
     this->resolver_ = other.resolver_;
   }

   /// \brief Return the shape and type of the expected output value of the
   /// kernel given the value descriptors (shapes and types) of the input
   /// arguments. The resolver may make use of state information kept in the
   /// KernelContext.
   Result<ValueDescr> Resolve(KernelContext* ctx,
                              const std::vector<ValueDescr>& args) const;

   /// \brief The exact output value type for the FIXED kind.
   const std::shared_ptr<DataType>& type() const;

   /// \brief For use with COMPUTED resolution strategy. It may be more
   /// convenient to invoke this with OutputType::Resolve returned from this
   /// method.
   const Resolver& resolver() const;

   /// \brief Render a human-readable string representation.
   std::string ToString() const;

   /// \brief Return the kind of type resolution of this output type, whether
   /// fixed/invariant or computed by a resolver.
   ResolveKind kind() const { return kind_; }

   /// \brief If the shape is ANY, then Resolve will compute the shape based on
   /// the input arguments.
   ValueDescr::Shape shape() const { return shape_; }

  private:
   ResolveKind kind_;

   // For FIXED resolution
   std::shared_ptr<DataType> type_;

   /// \brief The shape of the output type to return when using Resolve. If ANY
   /// will promote the input shapes.
   ValueDescr::Shape shape_ = ValueDescr::ANY;

   // For COMPUTED resolution
   Resolver resolver_;
 };

 /// \brief Holds the input types and output type of the kernel.
 ///
 /// VarArgs functions should pass a single input type to be used to validate
 /// the input types of a function invocation.
 class ARROW_EXPORT KernelSignature {
  public:
   KernelSignature(std::vector<InputType> in_types, OutputType out_type,
                   bool is_varargs = false);

   /// \brief Convenience ctor since make_shared can be awkward
   static std::shared_ptr<KernelSignature> Make(std::vector<InputType> in_types,
                                                OutputType out_type,
                                                bool is_varargs = false);

   /// \brief Return true if the signature if compatible with the list of input
   /// value descriptors.
   bool MatchesInputs(const std::vector<ValueDescr>& descriptors) const;

   /// \brief Returns true if the input types of each signature are
   /// equal. Well-formed functions should have a deterministic output type
   /// given input types, but currently it is the responsibility of the
   /// developer to ensure this.
   bool Equals(const KernelSignature& other) const;

   bool operator==(const KernelSignature& other) const { return this->Equals(other); }

   bool operator!=(const KernelSignature& other) const { return !(*this == other); }

   /// \brief Compute a hash code for the signature
   size_t Hash() const;

   /// \brief The input types for the kernel. For VarArgs functions, this should
   /// generally contain a single validator to use for validating all of the
   /// function arguments.
   const std::vector<InputType>& in_types() const { return in_types_; }

   /// \brief The output type for the kernel. Use Resolve to return the exact
   /// output given input argument ValueDescrs, since many kernels' output types
   /// depend on their input types (or their type metadata).
   const OutputType& out_type() const { return out_type_; }

   /// \brief Render a human-readable string representation
   std::string ToString() const;

   bool is_varargs() const { return is_varargs_; }

  private:
   std::vector<InputType> in_types_;
   OutputType out_type_;
   bool is_varargs_;

   // For caching the hash code after it's computed the first time
   mutable uint64_t hash_code_;
 };

 /// \brief A function may contain multiple variants of a kernel for a given
 /// type combination for different SIMD levels. Based on the active system's
 /// CPU info or the user's preferences, we can elect to use one over the other.
 struct SimdLevel {
   enum type { NONE = 0, SSE4_2, AVX, AVX2, AVX512, NEON, MAX };
 };

 /// \brief The strategy to use for propagating or otherwise populating the
 /// validity bitmap of a kernel output.
 struct NullHandling {
   enum type {
     /// Compute the output validity bitmap by intersecting the validity bitmaps
     /// of the arguments using bitwise-and operations. This means that values
     /// in the output are valid/non-null only if the corresponding values in
     /// all input arguments were valid/non-null. Kernel generally need not
     /// touch the bitmap thereafter, but a kernel's exec function is permitted
     /// to alter the bitmap after the null intersection is computed if it needs
     /// to.
     INTERSECTION,

     /// Kernel expects a pre-allocated buffer to write the result bitmap
     /// into. The preallocated memory is not zeroed (except for the last byte),
     /// so the kernel should ensure to completely populate the bitmap.
     COMPUTED_PREALLOCATE,

     /// Kernel allocates and sets the validity bitmap of the output.
     COMPUTED_NO_PREALLOCATE,

     /// Kernel output is never null and a validity bitmap does not need to be
     /// allocated.
     OUTPUT_NOT_NULL
   };
 };

 /// \brief The preference for memory preallocation of fixed-width type outputs
 /// in kernel execution.
 struct MemAllocation {
   enum type {
     // For data types that support pre-allocation (i.e. fixed-width), the
     // kernel expects to be provided a pre-allocated data buffer to write
     // into. Non-fixed-width types must always allocate their own data
     // buffers. The allocation made for the same length as the execution batch,
     // so vector kernels yielding differently sized output should not use this.
     //
     // It is valid for the data to not be preallocated but the validity bitmap
     // is (or is computed using the intersection/bitwise-and method).
     //
     // For variable-size output types like BinaryType or StringType, or for
     // nested types, this option has no effect.
     PREALLOCATE,

     // The kernel is responsible for allocating its own data buffer for
     // fixed-width type outputs.
     NO_PREALLOCATE
   };
 };

 struct Kernel;

 /// \brief Arguments to pass to a KernelInit function. A struct is used to help
 /// avoid API breakage should the arguments passed need to be expanded.
 struct KernelInitArgs {
   /// \brief A pointer to the kernel being initialized. The init function may
   /// depend on the kernel's KernelSignature or other data contained there.
   const Kernel* kernel;

   /// \brief The types and shapes of the input arguments that the kernel is
   /// about to be executed against.
   ///
   /// TODO: should this be const std::vector<ValueDescr>*? const-ref is being
   /// used to avoid the cost of copying the struct into the args struct.
   const std::vector<ValueDescr>& inputs;

   /// \brief Opaque options specific to this kernel. May be nullptr for functions
   /// that do not require options.
   const FunctionOptions* options;
 };

 /// \brief Common initializer function for all kernel types.
 using KernelInit = std::function<Result<std::unique_ptr<KernelState>>(
     KernelContext*, const KernelInitArgs&)>;

 /// \brief Base type for kernels. Contains the function signature and
 /// optionally the state initialization function, along with some common
 /// attributes
 struct Kernel {
   Kernel() = default;

   Kernel(std::shared_ptr<KernelSignature> sig, KernelInit init)
       : signature(std::move(sig)), init(std::move(init)) {}

   Kernel(std::vector<InputType> in_types, OutputType out_type, KernelInit init)
       : Kernel(KernelSignature::Make(std::move(in_types), std::move(out_type)),
                std::move(init)) {}

   /// \brief The "signature" of the kernel containing the InputType input
   /// argument validators and OutputType output type and shape resolver.
   std::shared_ptr<KernelSignature> signature;

   /// \brief Create a new KernelState for invocations of this kernel, e.g. to
   /// set up any options or state relevant for execution.
   KernelInit init;

   /// \brief Indicates whether execution can benefit from parallelization
   /// (splitting large chunks into smaller chunks and using multiple
   /// threads). Some kernels may not support parallel execution at
   /// all. Synchronization and concurrency-related issues are currently the
   /// responsibility of the Kernel's implementation.
   bool parallelizable = true;

   /// \brief Indicates the level of SIMD instruction support in the host CPU is
   /// required to use the function. The intention is for functions to be able to
   /// contain multiple kernels with the same signature but different levels of SIMD,
   /// so that the most optimized kernel supported on a host's processor can be chosen.
   SimdLevel::type simd_level = SimdLevel::NONE;
 };

 /// \brief Common kernel base data structure for ScalarKernel and
 /// VectorKernel. It is called "ArrayKernel" in that the functions generally
 /// output array values (as opposed to scalar values in the case of aggregate
 /// functions).
 struct ArrayKernel : public Kernel {
   ArrayKernel() = default;

   ArrayKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
               KernelInit init = NULLPTR)
       : Kernel(std::move(sig), init), exec(std::move(exec)) {}

   ArrayKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
               KernelInit init = NULLPTR)
       : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
         exec(std::move(exec)) {}

   /// \brief Perform a single invocation of this kernel. Depending on the
   /// implementation, it may only write into preallocated memory, while in some
   /// cases it will allocate its own memory. Any required state is managed
   /// through the KernelContext.
   ArrayKernelExec exec;

   /// \brief Writing execution results into larger contiguous allocations
   /// requires that the kernel be able to write into sliced output ArrayData*,
   /// including sliced output validity bitmaps. Some kernel implementations may
   /// not be able to do this, so setting this to false disables this
   /// functionality.
   bool can_write_into_slices = true;
 };

 /// \brief Kernel data structure for implementations of ScalarFunction. In
 /// addition to the members found in ArrayKernel, contains the null handling
 /// and memory pre-allocation preferences.
 struct ScalarKernel : public ArrayKernel {
   using ArrayKernel::ArrayKernel;

   // For scalar functions preallocated data and intersecting arg validity
   // bitmaps is a reasonable default
   NullHandling::type null_handling = NullHandling::INTERSECTION;
   MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
 };

 // ----------------------------------------------------------------------
 // VectorKernel (for VectorFunction)

 /// \brief See VectorKernel::finalize member for usage
 using VectorFinalize = std::function<Status(KernelContext*, std::vector<Datum>*)>;

 /// \brief Kernel data structure for implementations of VectorFunction. In
 /// addition to the members found in ArrayKernel, contains an optional
 /// finalizer function, the null handling and memory pre-allocation preferences
 /// (which have different defaults from ScalarKernel), and some other
 /// execution-related options.
 struct VectorKernel : public ArrayKernel {
   VectorKernel() = default;

   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec)
       : ArrayKernel(std::move(sig), std::move(exec)) {}

   VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
       : ArrayKernel(std::move(in_types), std::move(out_type), std::move(exec),
                     std::move(init)),
         finalize(std::move(finalize)) {}

   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
                KernelInit init = NULLPTR, VectorFinalize finalize = NULLPTR)
       : ArrayKernel(std::move(sig), std::move(exec), std::move(init)),
         finalize(std::move(finalize)) {}

   /// \brief For VectorKernel, convert intermediate results into finalized
   /// results. Mutates input argument. Some kernels may accumulate state
   /// (example: hashing-related functions) through processing chunked inputs, and
   /// then need to attach some accumulated state to each of the outputs of
   /// processing each chunk of data.
   VectorFinalize finalize;

   /// Since vector kernels generally are implemented rather differently from
   /// scalar/elementwise kernels (and they may not even yield arrays of the same
   /// size), so we make the developer opt-in to any memory preallocation rather
   /// than having to turn it off.
   NullHandling::type null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   MemAllocation::type mem_allocation = MemAllocation::NO_PREALLOCATE;

   /// Some vector kernels can do chunkwise execution using ExecBatchIterator,
   /// in some cases accumulating some state. Other kernels (like Take) need to
   /// be passed whole arrays and don't work on ChunkedArray inputs
   bool can_execute_chunkwise = true;

   /// Some kernels (like unique and value_counts) yield non-chunked output from
   /// chunked-array inputs. This option controls how the results are boxed when
   /// returned from ExecVectorFunction
   ///
   /// true -> ChunkedArray
   /// false -> Array
   bool output_chunked = true;
 };

 // ----------------------------------------------------------------------
 // ScalarAggregateKernel (for ScalarAggregateFunction)

 using ScalarAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;

 using ScalarAggregateMerge =
     std::function<Status(KernelContext*, KernelState&&, KernelState*)>;

 // Finalize returns Datum to permit multiple return values
 using ScalarAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;

 /// \brief Kernel data structure for implementations of
 /// ScalarAggregateFunction. The four necessary components of an aggregation
 /// kernel are the init, consume, merge, and finalize functions.
 ///
 /// * init: creates a new KernelState for a kernel.
 /// * consume: processes an ExecBatch and updates the KernelState found in the
 ///   KernelContext.
 /// * merge: combines one KernelState with another.
 /// * finalize: produces the end result of the aggregation using the
 ///   KernelState in the KernelContext.
 struct ScalarAggregateKernel : public Kernel {
   ScalarAggregateKernel() = default;

   ScalarAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                         ScalarAggregateConsume consume, ScalarAggregateMerge merge,
                         ScalarAggregateFinalize finalize)
       : Kernel(std::move(sig), std::move(init)),
         consume(std::move(consume)),
         merge(std::move(merge)),
         finalize(std::move(finalize)) {}

   ScalarAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
                         KernelInit init, ScalarAggregateConsume consume,
                         ScalarAggregateMerge merge, ScalarAggregateFinalize finalize)
       : ScalarAggregateKernel(
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
             std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}

   ScalarAggregateConsume consume;
   ScalarAggregateMerge merge;
   ScalarAggregateFinalize finalize;
 };

 // ----------------------------------------------------------------------
 // HashAggregateKernel (for HashAggregateFunction)

 using HashAggregateConsume = std::function<Status(KernelContext*, const ExecBatch&)>;

 using HashAggregateMerge =
     std::function<Status(KernelContext*, KernelState&&, KernelState*)>;

 // Finalize returns Datum to permit multiple return values
 using HashAggregateFinalize = std::function<Status(KernelContext*, Datum*)>;

 /// \brief Kernel data structure for implementations of
 /// HashAggregateFunction. The four necessary components of an aggregation
 /// kernel are the init, consume, merge, and finalize functions.
 ///
 /// * init: creates a new KernelState for a kernel.
 /// * consume: processes an ExecBatch (which includes the argument as well
 ///   as an array of group identifiers) and updates the KernelState found in the
 ///   KernelContext.
 /// * merge: combines one KernelState with another.
 /// * finalize: produces the end result of the aggregation using the
 ///   KernelState in the KernelContext.
 struct HashAggregateKernel : public Kernel {
   HashAggregateKernel() = default;

   HashAggregateKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
                       HashAggregateConsume consume, HashAggregateMerge merge,
                       HashAggregateFinalize finalize)
       : Kernel(std::move(sig), std::move(init)),
         consume(std::move(consume)),
         merge(std::move(merge)),
         finalize(std::move(finalize)) {}

   HashAggregateKernel(std::vector<InputType> in_types, OutputType out_type,
                       KernelInit init, HashAggregateMerge merge,
                       HashAggregateConsume consume, HashAggregateFinalize finalize)
       : HashAggregateKernel(
             KernelSignature::Make(std::move(in_types), std::move(out_type)),
             std::move(init), std::move(consume), std::move(merge), std::move(finalize)) {}

   HashAggregateConsume consume;
   HashAggregateMerge merge;
   HashAggregateFinalize finalize;
 };

 }  // namespace compute
 }  // namespace arrow