blob: 9e195f2d52914b86bed87f2615f2912c2475fb9d [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
/// macro that exports a list of function names as:
/// 1. individual functions in an `expr_fn` module
/// 2. a single function that returns a list of all functions
///
/// Equivalent to
/// ```text
/// pub mod expr_fn {
/// use super::*;
/// /// Return encode(arg)
/// pub fn encode(args: Vec<Expr>) -> Expr {
/// super::encode().call(args)
/// }
/// ...
/// /// Return a list of all functions in this package
/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
/// vec![
/// encode(),
/// decode()
/// ]
/// }
/// ```
///
/// Exported functions accept:
/// - `Vec<Expr>` argument (single argument followed by a comma)
/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
/// - Functions that require config (marked with `@config` prefix)
#[macro_export]
macro_rules! export_functions {
($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
$(
// switch to single-function cases below
$crate::export_functions!(single $FUNC, $DOC, $($arg)*);
)*
};
// function that requires config (marked with @config)
(single $FUNC:ident, $DOC:expr, @config) => {
#[doc = $DOC]
pub fn $FUNC() -> datafusion_expr::Expr {
use datafusion_common::config::ConfigOptions;
super::$FUNC(&ConfigOptions::default()).call(vec![])
}
};
// single vector argument (a single argument followed by a comma)
(single $FUNC:ident, $DOC:expr, $arg:ident,) => {
#[doc = $DOC]
pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
super::$FUNC().call($arg)
}
};
// variadic arguments (zero or more arguments, without commas)
(single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
#[doc = $DOC]
pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
super::$FUNC().call(vec![$($arg),*])
}
};
}
/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
/// named `$NAME` which returns that singleton.
///
/// This is used to ensure creating the list of `ScalarUDF` only happens once.
#[macro_export]
macro_rules! make_udf_function {
($UDF:ty, $NAME:ident) => {
#[allow(rustdoc::redundant_explicit_links)]
#[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
// Singleton instance of the function
static INSTANCE: std::sync::LazyLock<
std::sync::Arc<datafusion_expr::ScalarUDF>,
> = std::sync::LazyLock::new(|| {
std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
<$UDF>::new(),
))
});
std::sync::Arc::clone(&INSTANCE)
}
};
}
/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
/// named `$NAME` which returns that singleton. The function takes a
/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
#[macro_export]
macro_rules! make_udf_function_with_config {
($UDF:ty, $NAME:ident) => {
#[allow(rustdoc::redundant_explicit_links)]
#[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
<$UDF>::new_with_config(&config),
))
}
};
}
/// Macro creates a sub module if the feature is not enabled
///
/// The rationale for providing stub functions is to help users to configure datafusion
/// properly (so they get an error telling them why a function is not available)
/// instead of getting a cryptic "no function found" message at runtime.
macro_rules! make_stub_package {
($name:ident, $feature:literal) => {
#[cfg(not(feature = $feature))]
#[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
pub mod $name {
use datafusion_expr::ScalarUDF;
use log::debug;
use std::sync::Arc;
/// Returns an empty list of functions when the feature is not enabled
pub fn functions() -> Vec<Arc<ScalarUDF>> {
debug!("{} functions disabled", stringify!($name));
vec![]
}
}
};
}
/// Downcast a named argument to a specific array type, returning an internal error
/// if the cast fails
///
/// $ARG: ArrayRef
/// $NAME: name of the argument (for error messages)
/// $ARRAY_TYPE: the type of array to cast the argument to
#[macro_export]
macro_rules! downcast_named_arg {
($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
datafusion_common::internal_datafusion_err!(
"could not cast {} to {}",
$NAME,
std::any::type_name::<$ARRAY_TYPE>()
)
})?
}};
}
/// Downcast an argument to a specific array type, returning an internal error
/// if the cast fails
///
/// $ARG: ArrayRef
/// $ARRAY_TYPE: the type of array to cast the argument to
#[macro_export]
macro_rules! downcast_arg {
($ARG:expr, $ARRAY_TYPE:ident) => {{
$crate::downcast_named_arg!($ARG, "", $ARRAY_TYPE)
}};
}
/// Macro to create a unary math UDF.
///
/// A unary math function takes an argument of type Float32 or Float64,
/// applies a unary floating function to the argument, and returns a value of the same type.
///
/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
/// $NAME: the name of the function
/// $UNARY_FUNC: the unary function to apply to the argument
/// $OUTPUT_ORDERING: the output ordering calculation method of the function
/// $GET_DOC: the function to get the documentation of the UDF
macro_rules! make_math_unary_udf {
($UDF:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr, $GET_DOC:expr) => {
$crate::make_udf_function!($NAME::$UDF, $NAME);
mod $NAME {
use std::any::Any;
use std::sync::Arc;
use arrow::array::{ArrayRef, AsArray};
use arrow::datatypes::{DataType, Float32Type, Float64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::interval_arithmetic::Interval;
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility,
};
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct $UDF {
signature: Signature,
}
impl $UDF {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::uniform(
1,
vec![Float64, Float32],
Volatility::Immutable,
),
}
}
}
impl ScalarUDFImpl for $UDF {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
stringify!($NAME)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
let arg_type = &arg_types[0];
match arg_type {
DataType::Float32 => Ok(DataType::Float32),
// For other types (possible values float64/null/int), use Float64
_ => Ok(DataType::Float64),
}
}
fn output_ordering(
&self,
input: &[ExprProperties],
) -> Result<SortProperties> {
$OUTPUT_ORDERING(input)
}
fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
$EVALUATE_BOUNDS(inputs)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(&args.args)?;
let arr: ArrayRef = match args[0].data_type() {
DataType::Float64 => Arc::new(
args[0]
.as_primitive::<Float64Type>()
.unary::<_, Float64Type>(|x: f64| f64::$UNARY_FUNC(x)),
) as ArrayRef,
DataType::Float32 => Arc::new(
args[0]
.as_primitive::<Float32Type>()
.unary::<_, Float32Type>(|x: f32| f32::$UNARY_FUNC(x)),
) as ArrayRef,
other => {
return exec_err!(
"Unsupported data type {other:?} for function {}",
self.name()
)
}
};
Ok(ColumnarValue::Array(arr))
}
fn documentation(&self) -> Option<&Documentation> {
Some($GET_DOC())
}
}
}
};
}
/// Macro to create a binary math UDF.
///
/// A binary math function takes two arguments of types Float32 or Float64,
/// applies a binary floating function to the argument, and returns a value of the same type.
///
/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
/// $NAME: the name of the function
/// $BINARY_FUNC: the binary function to apply to the argument
/// $OUTPUT_ORDERING: the output ordering calculation method of the function
/// $GET_DOC: the function to get the documentation of the UDF
macro_rules! make_math_binary_udf {
($UDF:ident, $NAME:ident, $BINARY_FUNC:ident, $OUTPUT_ORDERING:expr, $GET_DOC:expr) => {
$crate::make_udf_function!($NAME::$UDF, $NAME);
mod $NAME {
use std::any::Any;
use std::sync::Arc;
use arrow::array::{ArrayRef, AsArray};
use arrow::datatypes::{DataType, Float32Type, Float64Type};
use datafusion_common::{exec_err, Result};
use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
use datafusion_expr::TypeSignature;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility,
};
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct $UDF {
signature: Signature,
}
impl $UDF {
pub fn new() -> Self {
use DataType::*;
Self {
signature: Signature::one_of(
vec![
TypeSignature::Exact(vec![Float32, Float32]),
TypeSignature::Exact(vec![Float64, Float64]),
],
Volatility::Immutable,
),
}
}
}
impl ScalarUDFImpl for $UDF {
fn as_any(&self) -> &dyn Any {
self
}
fn name(&self) -> &str {
stringify!($NAME)
}
fn signature(&self) -> &Signature {
&self.signature
}
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
let arg_type = &arg_types[0];
match arg_type {
DataType::Float32 => Ok(DataType::Float32),
// For other types (possible values float64/null/int), use Float64
_ => Ok(DataType::Float64),
}
}
fn output_ordering(
&self,
input: &[ExprProperties],
) -> Result<SortProperties> {
$OUTPUT_ORDERING(input)
}
fn invoke_with_args(
&self,
args: ScalarFunctionArgs,
) -> Result<ColumnarValue> {
let args = ColumnarValue::values_to_arrays(&args.args)?;
let arr: ArrayRef = match args[0].data_type() {
DataType::Float64 => {
let y = args[0].as_primitive::<Float64Type>();
let x = args[1].as_primitive::<Float64Type>();
let result = arrow::compute::binary::<_, _, _, Float64Type>(
y,
x,
|y, x| f64::$BINARY_FUNC(y, x),
)?;
Arc::new(result) as _
}
DataType::Float32 => {
let y = args[0].as_primitive::<Float32Type>();
let x = args[1].as_primitive::<Float32Type>();
let result = arrow::compute::binary::<_, _, _, Float32Type>(
y,
x,
|y, x| f32::$BINARY_FUNC(y, x),
)?;
Arc::new(result) as _
}
other => {
return exec_err!(
"Unsupported data type {other:?} for function {}",
self.name()
)
}
};
Ok(ColumnarValue::Array(arr))
}
fn documentation(&self) -> Option<&Documentation> {
Some($GET_DOC())
}
}
}
};
}