blob: 9a3e1c1852f46d8e4acdaa8bdd9e129d60799e80 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// NOTE: API is EXPERIMENTAL and will change without going through a
// deprecation cycle.
#pragma once
#include <string>
#include <utility>
#include <vector>
#include "arrow/compute/kernel.h"
#include "arrow/compute/type_fwd.h"
#include "arrow/datum.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
namespace compute {
/// \defgroup compute-functions Abstract compute function API
///
/// @{
/// \brief Base class for specifying options configuring a function's behavior,
/// such as error handling.
struct ARROW_EXPORT FunctionOptions {
virtual ~FunctionOptions() = default;
};
/// \brief Contains the number of required arguments for the function.
///
/// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
struct ARROW_EXPORT Arity {
/// \brief A function taking no arguments
static Arity Nullary() { return Arity(0, false); }
/// \brief A function taking 1 argument
static Arity Unary() { return Arity(1, false); }
/// \brief A function taking 2 arguments
static Arity Binary() { return Arity(2, false); }
/// \brief A function taking 3 arguments
static Arity Ternary() { return Arity(3, false); }
/// \brief A function taking a variable number of arguments
///
/// \param[in] min_args the minimum number of arguments required when
/// invoking the function
static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }
// NOTE: the 0-argument form (default constructor) is required for Cython
explicit Arity(int num_args = 0, bool is_varargs = false)
: num_args(num_args), is_varargs(is_varargs) {}
/// The number of required arguments (or the minimum number for varargs
/// functions).
int num_args;
/// If true, then the num_args is the minimum number of required arguments.
bool is_varargs = false;
};
struct ARROW_EXPORT FunctionDoc {
/// \brief A one-line summary of the function, using a verb.
///
/// For example, "Add two numeric arrays or scalars".
std::string summary;
/// \brief A detailed description of the function, meant to follow the summary.
std::string description;
/// \brief Symbolic names (identifiers) for the function arguments.
///
/// Some bindings may use this to generate nicer function signatures.
std::vector<std::string> arg_names;
// TODO add argument descriptions?
/// \brief Name of the options class, if any.
std::string options_class;
FunctionDoc() = default;
FunctionDoc(std::string summary, std::string description,
std::vector<std::string> arg_names, std::string options_class = "")
: summary(std::move(summary)),
description(std::move(description)),
arg_names(std::move(arg_names)),
options_class(std::move(options_class)) {}
static const FunctionDoc& Empty();
};
/// \brief Base class for compute functions. Function implementations contain a
/// collection of "kernels" which are implementations of the function for
/// specific argument types. Selecting a viable kernel for executing a function
/// is referred to as "dispatching".
class ARROW_EXPORT Function {
public:
/// \brief The kind of function, which indicates in what contexts it is
/// valid for use.
enum Kind {
/// A function that performs scalar data operations on whole arrays of
/// data. Can generally process Array or Scalar values. The size of the
/// output will be the same as the size (or broadcasted size, in the case
/// of mixing Array and Scalar inputs) of the input.
SCALAR,
/// A function with array input and output whose behavior depends on the
/// values of the entire arrays passed, rather than the value of each scalar
/// value.
VECTOR,
/// A function that computes scalar summary statistics from array input.
SCALAR_AGGREGATE,
/// A function that computes grouped summary statistics from array input
/// and an array of group identifiers.
HASH_AGGREGATE,
/// A function that dispatches to other functions and does not contain its
/// own kernels.
META
};
virtual ~Function() = default;
/// \brief The name of the kernel. The registry enforces uniqueness of names.
const std::string& name() const { return name_; }
/// \brief The kind of kernel, which indicates in what contexts it is valid
/// for use.
Function::Kind kind() const { return kind_; }
/// \brief Contains the number of arguments the function requires, or if the
/// function accepts variable numbers of arguments.
const Arity& arity() const { return arity_; }
/// \brief Return the function documentation
const FunctionDoc& doc() const { return *doc_; }
/// \brief Returns the number of registered kernels for this function.
virtual int num_kernels() const = 0;
/// \brief Return a kernel that can execute the function given the exact
/// argument types (without implicit type casts or scalar->array promotions).
///
/// NB: This function is overridden in CastFunction.
virtual Result<const Kernel*> DispatchExact(
const std::vector<ValueDescr>& values) const;
/// \brief Return a best-match kernel that can execute the function given the argument
/// types, after implicit casts are applied.
///
/// \param[in,out] values Argument types. An element may be modified to indicate that
/// the returned kernel only approximately matches the input value descriptors; callers
/// are responsible for casting inputs to the type and shape required by the kernel.
virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
/// \brief Execute the function eagerly with the passed input arguments with
/// kernel dispatch, batch iteration, and memory allocation details taken
/// care of.
///
/// If the `options` pointer is null, then `default_options()` will be used.
///
/// This function can be overridden in subclasses.
virtual Result<Datum> Execute(const std::vector<Datum>& args,
const FunctionOptions* options, ExecContext* ctx) const;
/// \brief Returns a the default options for this function.
///
/// Whatever option semantics a Function has, implementations must guarantee
/// that default_options() is valid to pass to Execute as options.
const FunctionOptions* default_options() const { return default_options_; }
virtual Status Validate() const;
protected:
Function(std::string name, Function::Kind kind, const Arity& arity,
const FunctionDoc* doc, const FunctionOptions* default_options)
: name_(std::move(name)),
kind_(kind),
arity_(arity),
doc_(doc ? doc : &FunctionDoc::Empty()),
default_options_(default_options) {}
Status CheckArity(const std::vector<InputType>&) const;
Status CheckArity(const std::vector<ValueDescr>&) const;
std::string name_;
Function::Kind kind_;
Arity arity_;
const FunctionDoc* doc_;
const FunctionOptions* default_options_ = NULLPTR;
};
namespace detail {
template <typename KernelType>
class FunctionImpl : public Function {
public:
/// \brief Return pointers to current-available kernels for inspection
std::vector<const KernelType*> kernels() const {
std::vector<const KernelType*> result;
for (const auto& kernel : kernels_) {
result.push_back(&kernel);
}
return result;
}
int num_kernels() const override { return static_cast<int>(kernels_.size()); }
protected:
FunctionImpl(std::string name, Function::Kind kind, const Arity& arity,
const FunctionDoc* doc, const FunctionOptions* default_options)
: Function(std::move(name), kind, arity, doc, default_options) {}
std::vector<KernelType> kernels_;
};
/// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
ARROW_EXPORT
const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
/// \brief Return an error message if no Kernel is found.
ARROW_EXPORT
Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
} // namespace detail
/// \brief A function that executes elementwise operations on arrays or
/// scalars, and therefore whose results generally do not depend on the order
/// of the values in the arguments. Accepts and returns arrays that are all of
/// the same size. These functions roughly correspond to the functions used in
/// SQL expressions.
class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
public:
using KernelType = ScalarKernel;
ScalarFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
const FunctionOptions* default_options = NULLPTR)
: detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity, doc,
default_options) {}
/// \brief Add a kernel with given input/output types, no required state
/// initialization, preallocation for fixed-width types, and default null
/// handling (intersect validity bitmaps of inputs).
Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
ArrayKernelExec exec, KernelInit init = NULLPTR);
/// \brief Add a kernel (function implementation). Returns error if the
/// kernel's signature does not match the function's arity.
Status AddKernel(ScalarKernel kernel);
};
/// \brief A function that executes general array operations that may yield
/// outputs of different sizes or have results that depend on the whole array
/// contents. These functions roughly correspond to the functions found in
/// non-SQL array languages like APL and its derivatives.
class ARROW_EXPORT VectorFunction : public detail::FunctionImpl<VectorKernel> {
public:
using KernelType = VectorKernel;
VectorFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
const FunctionOptions* default_options = NULLPTR)
: detail::FunctionImpl<VectorKernel>(std::move(name), Function::VECTOR, arity, doc,
default_options) {}
/// \brief Add a simple kernel with given input/output types, no required
/// state initialization, no data preallocation, and no preallocation of the
/// validity bitmap.
Status AddKernel(std::vector<InputType> in_types, OutputType out_type,
ArrayKernelExec exec, KernelInit init = NULLPTR);
/// \brief Add a kernel (function implementation). Returns error if the
/// kernel's signature does not match the function's arity.
Status AddKernel(VectorKernel kernel);
};
class ARROW_EXPORT ScalarAggregateFunction
: public detail::FunctionImpl<ScalarAggregateKernel> {
public:
using KernelType = ScalarAggregateKernel;
ScalarAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
const FunctionOptions* default_options = NULLPTR)
: detail::FunctionImpl<ScalarAggregateKernel>(
std::move(name), Function::SCALAR_AGGREGATE, arity, doc, default_options) {}
/// \brief Add a kernel (function implementation). Returns error if the
/// kernel's signature does not match the function's arity.
Status AddKernel(ScalarAggregateKernel kernel);
};
class ARROW_EXPORT HashAggregateFunction
: public detail::FunctionImpl<HashAggregateKernel> {
public:
using KernelType = HashAggregateKernel;
HashAggregateFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
const FunctionOptions* default_options = NULLPTR)
: detail::FunctionImpl<HashAggregateKernel>(
std::move(name), Function::HASH_AGGREGATE, arity, doc, default_options) {}
/// \brief Add a kernel (function implementation). Returns error if the
/// kernel's signature does not match the function's arity.
Status AddKernel(HashAggregateKernel kernel);
};
/// \brief A function that dispatches to other functions. Must implement
/// MetaFunction::ExecuteImpl.
///
/// For Array, ChunkedArray, and Scalar Datum kinds, may rely on the execution
/// of concrete Function types, but must handle other Datum kinds on its own.
class ARROW_EXPORT MetaFunction : public Function {
public:
int num_kernels() const override { return 0; }
Result<Datum> Execute(const std::vector<Datum>& args, const FunctionOptions* options,
ExecContext* ctx) const override;
protected:
virtual Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
const FunctionOptions* options,
ExecContext* ctx) const = 0;
MetaFunction(std::string name, const Arity& arity, const FunctionDoc* doc,
const FunctionOptions* default_options = NULLPTR)
: Function(std::move(name), Function::META, arity, doc, default_options) {}
};
/// @}
} // namespace compute
} // namespace arrow