blob: ea703c44348b577765a9b3a0ede992666a60bfc8 [file] [log] [blame]
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/udf.R
\name{register_scalar_function}
\alias{register_scalar_function}
\title{Register user-defined functions}
\usage{
register_scalar_function(name, fun, in_type, out_type, auto_convert = FALSE)
}
\arguments{
\item{name}{The function name to be used in the dplyr bindings}
\item{fun}{An R function or rlang-style lambda expression. The function
will be called with a first argument \code{context} which is a \code{list()}
with elements \code{batch_size} (the expected length of the output) and
\code{output_type} (the required \link{DataType} of the output) that may be used
to ensure that the output has the correct type and length. Subsequent
arguments are passed by position as specified by \code{in_types}. If
\code{auto_convert} is \code{TRUE}, subsequent arguments are converted to
R vectors before being passed to \code{fun} and the output is automatically
constructed with the expected output type via \code{\link[=as_arrow_array]{as_arrow_array()}}.}
\item{in_type}{A \link{DataType} of the input type or a \code{\link[=schema]{schema()}}
for functions with more than one argument. This signature will be used
to determine if this function is appropriate for a given set of arguments.
If this function is appropriate for more than one signature, pass a
\code{list()} of the above.}
\item{out_type}{A \link{DataType} of the output type or a function accepting
a single argument (\code{types}), which is a \code{list()} of \link{DataType}s. If a
function it must return a \link{DataType}.}
\item{auto_convert}{Use \code{TRUE} to convert inputs before passing to \code{fun}
and construct an Array of the correct type from the output. Use this
option to write functions of R objects as opposed to functions of
Arrow R6 objects.}
}
\value{
\code{NULL}, invisibly
}
\description{
These functions support calling R code from query engine execution
(i.e., a \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[dplyr:filter]{dplyr::filter()}} on a \link{Table} or \link{Dataset}).
Use \code{\link[=register_scalar_function]{register_scalar_function()}} attach Arrow input and output types to an
R function and make it available for use in the dplyr interface and/or
\code{\link[=call_function]{call_function()}}. Scalar functions are currently the only type of
user-defined function supported. In Arrow, scalar functions must be
stateless and return output with the same shape (i.e., the same number
of rows) as the input.
}
\examples{
\dontshow{if (arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
library(dplyr, warn.conflicts = FALSE)
some_model <- lm(mpg ~ disp + cyl, data = mtcars)
register_scalar_function(
"mtcars_predict_mpg",
function(context, disp, cyl) {
predict(some_model, newdata = data.frame(disp, cyl))
},
in_type = schema(disp = float64(), cyl = float64()),
out_type = float64(),
auto_convert = TRUE
)
as_arrow_table(mtcars) \%>\%
transmute(mpg, mpg_predicted = mtcars_predict_mpg(disp, cyl)) \%>\%
collect() \%>\%
head()
\dontshow{\}) # examplesIf}
}