r/R/dplyr-functions.R - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.


 #' @include expression.R
 NULL

 # This environment is an internal cache for things including data mask functions
 # We'll populate it at package load time.
 .cache <- NULL
 init_env <- function() {
   .cache <<- new.env(hash = TRUE)
 }
 init_env()

 # nse_funcs is a list of functions that operated on (and return) Expressions
 # These will be the basis for a data_mask inside dplyr methods
 # and will be added to .cache at package load time

 # Start with mappings from R function name spellings
 nse_funcs <- lapply(set_names(names(.array_function_map)), function(operator) {
   force(operator)
   function(...) build_expr(operator, ...)
 })

 # Now add functions to that list where the mapping from R to Arrow isn't 1:1
 # Each of these functions should have the same signature as the R function
 # they're replacing.
 #
 # When to use `build_expr()` vs. `Expression$create()`?
 #
 # Use `build_expr()` if you need to
 # (1) map R function names to Arrow C++ functions
 # (2) wrap R inputs (vectors) as Array/Scalar
 #
 # `Expression$create()` is lower level. Most of the functions below use it
 # because they manage the preparation of the user-provided inputs
 # and don't need to wrap scalars

 nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
   opts <- cast_options(safe, ...)
   opts$to_type <- as_type(target_type)
   Expression$create("cast", x, options = opts)
 }

 nse_funcs$coalesce <- function(...) {
   args <- list2(...)
   if (length(args) < 1) {
     abort("At least one argument must be supplied to coalesce()")
   }

   # Treat NaN like NA for consistency with dplyr::coalesce(), but if *all*
   # the values are NaN, we should return NaN, not NA, so don't replace
   # NaN with NA in the final (or only) argument
   # TODO: if an option is added to the coalesce kernel to treat NaN as NA,
   # use that to simplify the code here (ARROW-13389)
   attr(args[[length(args)]], "last") <- TRUE
   args <- lapply(args, function(arg) {
     last_arg <- is.null(attr(arg, "last"))
     attr(arg, "last") <- NULL

     if (!inherits(arg, "Expression")) {
       arg <- Expression$scalar(arg)
     }

     # coalesce doesn't yet support factors/dictionaries
     # TODO: remove this after ARROW-13390 is merged
     if (nse_funcs$is.factor(arg)) {
       warning("Dictionaries (in R: factors) are currently converted to strings (characters) in coalesce", call. = FALSE)
     }

     if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
       # store the NA_real_ in the same type as arg to avoid avoid casting
       # smaller float types to larger float types
       NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
       Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)
     } else {
       arg
     }
   })
   Expression$create("coalesce", args = args)
 }

 nse_funcs$is.na <- function(x) {
   # TODO: if an option is added to the is_null kernel to treat NaN as NA,
   # use that to simplify the code here (ARROW-13367)
   if (is.double(x) || (inherits(x, "Expression") &&
       x$type_id() %in% TYPES_WITH_NAN)) {
     build_expr("is_nan", x) | build_expr("is_null", x)
   } else {
     build_expr("is_null", x)
   }
 }

 nse_funcs$is.nan <- function(x) {
   if (is.double(x) || (inherits(x, "Expression") &&
       x$type_id() %in% TYPES_WITH_NAN)) {
     # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
     # use that to simplify the code here (ARROW-13366)
     build_expr("is_nan", x) & build_expr("is_valid", x)
   } else {
     Expression$scalar(FALSE)
   }
 }

 nse_funcs$is <- function(object, class2) {
   if (is.string(class2)) {
     switch(class2,
       # for R data types, pass off to is.*() functions
       character = nse_funcs$is.character(object),
       numeric = nse_funcs$is.numeric(object),
       integer = nse_funcs$is.integer(object),
       integer64 = nse_funcs$is.integer64(object),
       logical = nse_funcs$is.logical(object),
       factor = nse_funcs$is.factor(object),
       list = nse_funcs$is.list(object),
       # for Arrow data types, compare class2 with object$type()$ToString(),
       # but first strip off any parameters to only compare the top-level data
       # type,  and canonicalize class2
       sub("^([^([<]+).*$", "\\1", object$type()$ToString()) ==
         canonical_type_str(class2)
     )
   } else if (inherits(class2, "DataType")) {
     object$type() == as_type(class2)
   } else {
     stop("Second argument to is() is not a string or DataType", call. = FALSE)
   }
 }

 nse_funcs$dictionary_encode <- function(x,
                                         null_encoding_behavior = c("mask", "encode")) {
   behavior <- toupper(match.arg(null_encoding_behavior))
   null_encoding_behavior <- NullEncodingBehavior[[behavior]]
   Expression$create(
     "dictionary_encode",
     x,
     options = list(null_encoding_behavior = null_encoding_behavior)
   )
 }

 nse_funcs$between <- function(x, left, right) {
   x >= left & x <= right
 }

 nse_funcs$is.finite <- function(x) {
   is_fin <- Expression$create("is_finite", x)
   # for compatibility with base::is.finite(), return FALSE for NA_real_
   is_fin & !nse_funcs$is.na(is_fin)
 }

 nse_funcs$is.infinite <- function(x) {
   is_inf <- Expression$create("is_inf", x)
   # for compatibility with base::is.infinite(), return FALSE for NA_real_
   is_inf & !nse_funcs$is.na(is_inf)
 }

 # as.* type casting functions
 # as.factor() is mapped in expression.R
 nse_funcs$as.character <- function(x) {
   Expression$create("cast", x, options = cast_options(to_type = string()))
 }
 nse_funcs$as.double <- function(x) {
   Expression$create("cast", x, options = cast_options(to_type = float64()))
 }
 nse_funcs$as.integer <- function(x) {
   Expression$create(
     "cast",
     x,
     options = cast_options(
       to_type = int32(),
       allow_float_truncate = TRUE,
       allow_decimal_truncate = TRUE
     )
   )
 }
 nse_funcs$as.integer64 <- function(x) {
   Expression$create(
     "cast",
     x,
     options = cast_options(
       to_type = int64(),
       allow_float_truncate = TRUE,
       allow_decimal_truncate = TRUE
     )
   )
 }
 nse_funcs$as.logical <- function(x) {
   Expression$create("cast", x, options = cast_options(to_type = boolean()))
 }
 nse_funcs$as.numeric <- function(x) {
   Expression$create("cast", x, options = cast_options(to_type = float64()))
 }

 # is.* type functions
 nse_funcs$is.character <- function(x) {
   is.character(x) || (inherits(x, "Expression") &&
     x$type_id() %in% Type[c("STRING", "LARGE_STRING")])
 }
 nse_funcs$is.numeric <- function(x) {
   is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
     "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
     "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
     "DECIMAL", "DECIMAL256"
   )])
 }
 nse_funcs$is.double <- function(x) {
   is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"])
 }
 nse_funcs$is.integer <- function(x) {
   is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
     "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
     "UINT64", "INT64"
   )])
 }
 nse_funcs$is.integer64 <- function(x) {
   is.integer64(x) || (inherits(x, "Expression") && x$type_id() == Type["INT64"])
 }
 nse_funcs$is.logical <- function(x) {
   is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"])
 }
 nse_funcs$is.factor <- function(x) {
   is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"])
 }
 nse_funcs$is.list <- function(x) {
   is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
     "LIST", "FIXED_SIZE_LIST", "LARGE_LIST"
   )])
 }

 # rlang::is_* type functions
 nse_funcs$is_character <- function(x, n = NULL) {
   assert_that(is.null(n))
   nse_funcs$is.character(x)
 }
 nse_funcs$is_double <- function(x, n = NULL, finite = NULL) {
   assert_that(is.null(n) && is.null(finite))
   nse_funcs$is.double(x)
 }
 nse_funcs$is_integer <- function(x, n = NULL) {
   assert_that(is.null(n))
   nse_funcs$is.integer(x)
 }
 nse_funcs$is_list <- function(x, n = NULL) {
   assert_that(is.null(n))
   nse_funcs$is.list(x)
 }
 nse_funcs$is_logical <- function(x, n = NULL) {
   assert_that(is.null(n))
   nse_funcs$is.logical(x)
 }

 # String functions
 nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
   if (allowNA) {
     arrow_not_supported("allowNA = TRUE")
   }
   if (is.na(keepNA)) {
     keepNA <- !identical(type, "width")
   }
   if (!keepNA) {
     # TODO: I think there is a fill_null kernel we could use, set null to 2
     arrow_not_supported("keepNA = TRUE")
   }
   if (identical(type, "bytes")) {
     Expression$create("binary_length", x)
   } else {
     Expression$create("utf8_length", x)
   }
 }

 nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
   assert_that(
     is.null(collapse),
     msg = "paste() with the collapse argument is not yet supported in Arrow"
   )
   if (!inherits(sep, "Expression")) {
     assert_that(!is.na(sep), msg = "Invalid separator")
   }
   arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
 }

 nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
   assert_that(
     is.null(collapse),
     msg = "paste0() with the collapse argument is not yet supported in Arrow"
   )
   arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
 }

 nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
   assert_that(
     is.null(collapse),
     msg = "str_c() with the collapse argument is not yet supported in Arrow"
   )
   arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
 }

 arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
   # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
   # as the last argument, so pass `sep` as the last dots arg to this function
   function(...) {
     args <- lapply(list(...), function(arg) {
       # handle scalar literal args, and cast all args to string for
       # consistency with base::paste(), base::paste0(), and stringr::str_c()
       if (!inherits(arg, "Expression")) {
         assert_that(
           length(arg) == 1,
           msg = "Literal vectors of length != 1 not supported in string concatenation"
         )
         Expression$scalar(as.character(arg))
       } else {
         nse_funcs$as.character(arg)
       }
     })
     Expression$create(
       "binary_join_element_wise",
       args = args,
       options = list(
         null_handling = null_handling,
         null_replacement = null_replacement
       )
     )
   }
 }

 nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
   side <- match.arg(side)
   trim_fun <- switch(side,
     left = "utf8_ltrim_whitespace",
     right = "utf8_rtrim_whitespace",
     both = "utf8_trim_whitespace"
   )
   Expression$create(trim_fun, string)
 }

 nse_funcs$substr <- function(x, start, stop) {
   assert_that(
     length(start) == 1,
     msg = "`start` must be length 1 - other lengths are not supported in Arrow"
   )
   assert_that(
     length(stop) == 1,
     msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
   )

   # substr treats values as if they're on a continous number line, so values
   # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
   # this behavior
   if (start <= 0) {
     start <- 1
   }

   # if `stop` is lower than `start`, this is invalid, so set `stop` to
   # 0 so that an empty string will be returned (consistent with base::substr())
   if (stop < start) {
     stop <- 0
   }

   Expression$create(
     "utf8_slice_codeunits",
     x,
     # we don't need to subtract 1 from `stop` as C++ counts exclusively
     # which effectively cancels out the difference in indexing between R & C++
     options = list(start = start - 1L, stop = stop)
   )
 }

 nse_funcs$substring <- function(text, first, last){
   nse_funcs$substr(x = text, start = first, stop = last)
 }

 nse_funcs$str_sub <- function(string, start = 1L, end = -1L) {
   assert_that(
     length(start) == 1,
     msg = "`start` must be length 1 - other lengths are not supported in Arrow"
   )
   assert_that(
     length(end) == 1,
     msg = "`end` must be length 1 - other lengths are not supported in Arrow"
   )

   # In stringr::str_sub, an `end` value of -1 means the end of the string, so
   # set it to the maximum integer to match this behavior
   if (end == -1) {
     end <- .Machine$integer.max
   }

   # An end value lower than a start value returns an empty string in
   # stringr::str_sub so set end to 0 here to match this behavior
   if (end < start) {
     end <- 0
   }

   # subtract 1 from `start` because C++ is 0-based and R is 1-based
   # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0
   # when `start` < 0, both str_sub and utf8_slice_codeunits count backwards from the end
   if (start > 0) {
     start <- start - 1L
   }

   Expression$create(
     "utf8_slice_codeunits",
     string,
     options = list(start = start, stop = end)
   )
 }

 nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
   arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex")
   Expression$create(
     arrow_fun,
     x,
     options = list(pattern = pattern, ignore_case = ignore.case)
   )
 }

 nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
   opts <- get_stringr_pattern_options(enexpr(pattern))
   out <- nse_funcs$grepl(
     pattern = opts$pattern,
     x = string,
     ignore.case = opts$ignore_case,
     fixed = opts$fixed
   )
   if (negate) {
     out <- !out
   }
   out
 }

 nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) {
   Expression$create(
     "match_like",
     string,
     options = list(pattern = pattern, ignore_case = ignore_case)
   )
 }

 # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
 arrow_r_string_replace_function <- function(max_replacements) {
   function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
     Expression$create(
       ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
       x,
       options = list(
         pattern = format_string_pattern(pattern, ignore.case, fixed),
         replacement = format_string_replacement(replacement, ignore.case, fixed),
         max_replacements = max_replacements
       )
     )
   }
 }

 arrow_stringr_string_replace_function <- function(max_replacements) {
   function(string, pattern, replacement) {
     opts <- get_stringr_pattern_options(enexpr(pattern))
     arrow_r_string_replace_function(max_replacements)(
       pattern = opts$pattern,
       replacement = replacement,
       x = string,
       ignore.case = opts$ignore_case,
       fixed = opts$fixed
     )
   }
 }

 nse_funcs$sub <- arrow_r_string_replace_function(1L)
 nse_funcs$gsub <- arrow_r_string_replace_function(-1L)
 nse_funcs$str_replace <- arrow_stringr_string_replace_function(1L)
 nse_funcs$str_replace_all <- arrow_stringr_string_replace_function(-1L)

 nse_funcs$strsplit <- function(x,
                                split,
                                fixed = FALSE,
                                perl = FALSE,
                                useBytes = FALSE) {
   assert_that(is.string(split))

   arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
   # warn when the user specifies both fixed = TRUE and perl = TRUE, for
   # consistency with the behavior of base::strsplit()
   if (fixed && perl) {
     warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
   }
   # since split is not a regex, proceed without any warnings or errors
   # regardless of the value of perl, for consistency with the behavior of
   # base::strsplit()
   Expression$create(
     arrow_fun,
     x,
     options = list(pattern = split, reverse = FALSE, max_splits = -1L)
   )
 }

 nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
   opts <- get_stringr_pattern_options(enexpr(pattern))
   arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
   if (opts$ignore_case) {
     arrow_not_supported("Case-insensitive string splitting")
   }
   if (n == 0) {
     arrow_not_supported("Splitting strings into zero parts")
   }
   if (identical(n, Inf)) {
     n <- 0L
   }
   if (simplify) {
     warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
   }
   # The max_splits option in the Arrow C++ library controls the maximum number
   # of places at which the string is split, whereas the argument n to
   # str_split() controls the maximum number of pieces to return. So we must
   # subtract 1 from n to get max_splits.
   Expression$create(
     arrow_fun,
     string,
     options = list(
       pattern = opts$pattern,
       reverse = FALSE,
       max_splits = n - 1L
     )
   )
 }

 nse_funcs$pmin <- function(..., na.rm = FALSE) {
   build_expr(
     "min_element_wise",
     ...,
     options = list(skip_nulls = na.rm)
   )
 }

 nse_funcs$pmax <- function(..., na.rm = FALSE) {
   build_expr(
     "max_element_wise",
     ...,
     options = list(skip_nulls = na.rm)
   )
 }

 nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") {
   assert_that(is_integerish(width))
   side <- match.arg(side)
   assert_that(is.string(pad))

   if (side == "left") {
     pad_func <- "utf8_lpad"
   } else if (side == "right") {
     pad_func <- "utf8_rpad"
   } else if (side == "both") {
     pad_func <- "utf8_center"
   }

   Expression$create(
     pad_func,
     string,
     options = list(width = width, padding = pad)
   )
 }

 # String function helpers

 # format `pattern` as needed for case insensitivity and literal matching by RE2
 format_string_pattern <- function(pattern, ignore.case, fixed) {
   # Arrow lacks native support for case-insensitive literal string matching and
   # replacement, so we use the regular expression engine (RE2) to do this.
   # https://github.com/google/re2/wiki/Syntax
   if (ignore.case) {
     if (fixed) {
       # Everything between "\Q" and "\E" is treated as literal text.
       # If the search text contains any literal "\E" strings, make them
       # lowercase so they won't signal the end of the literal text:
       pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
       pattern <- paste0("\\Q", pattern, "\\E")
     }
     # Prepend "(?i)" for case-insensitive matching
     pattern <- paste0("(?i)", pattern)
   }
   pattern
 }

 # format `replacement` as needed for literal replacement by RE2
 format_string_replacement <- function(replacement, ignore.case, fixed) {
   # Arrow lacks native support for case-insensitive literal string
   # replacement, so we use the regular expression engine (RE2) to do this.
   # https://github.com/google/re2/wiki/Syntax
   if (ignore.case && fixed) {
     # Escape single backslashes in the regex replacement text so they are
     # interpreted as literal backslashes:
     replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
   }
   replacement
 }

 #' Get `stringr` pattern options
 #'
 #' This function assigns definitions for the `stringr` pattern modifier
 #' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
 #' evaluate the quoted expression `pattern`, returning a list that is used
 #' to control pattern matching behavior in internal `arrow` functions.
 #'
 #' @param pattern Unevaluated expression containing a call to a `stringr`
 #' pattern modifier function
 #'
 #' @return List containing elements `pattern`, `fixed`, and `ignore_case`
 #' @keywords internal
 get_stringr_pattern_options <- function(pattern) {
   fixed <- function(pattern, ignore_case = FALSE, ...) {
     check_dots(...)
     list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
   }
   regex <- function(pattern, ignore_case = FALSE, ...) {
     check_dots(...)
     list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
   }
   coll <- function(...) {
     arrow_not_supported("Pattern modifier `coll()`")
   }
   boundary <- function(...) {
     arrow_not_supported("Pattern modifier `boundary()`")
   }
   check_dots <- function(...) {
     dots <- list(...)
     if (length(dots)) {
       warning(
         "Ignoring pattern modifier ",
         ngettext(length(dots), "argument ", "arguments "),
         "not supported in Arrow: ",
         oxford_paste(names(dots)),
         call. = FALSE
       )
     }
   }
   ensure_opts <- function(opts) {
     if (is.character(opts)) {
       opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
     }
     opts
   }
   ensure_opts(eval(pattern))
 }

 #' Does this string contain regex metacharacters?
 #'
 #' @param string String to be tested
 #' @keywords internal
 #' @return Logical: does `string` contain regex metacharacters?
 contains_regex <- function(string) {
   grepl("[.\\|()[{^$*+?]", string)
 }

 nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit = "ms") {
   # Arrow uses unit for time parsing, strptime() does not.
   # Arrow has no default option for strptime (format, unit),
   # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
   # (ARROW-12809)

   # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
   # Stop if tz is provided.
   if (is.character(tz)) {
     arrow_not_supported("Time zone argument")
   }

   unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))

   Expression$create("strptime", x, options = list(format = format, unit = unit))
 }

 nse_funcs$second <- function(x) {
   Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
 }

 nse_funcs$trunc <- function(x, ...) {
   # accepts and ignores ... for consistency with base::trunc()
   build_expr("trunc", x)
 }

 nse_funcs$wday <- function(x, label = FALSE, abbr = TRUE, week_start = getOption("lubridate.week.start", 7)) {

   # The "day_of_week" compute function returns numeric days of week and not locale-aware strftime
   # When the ticket below is resolved, we should be able to support the label argument
   # https://issues.apache.org/jira/browse/ARROW-13133
   if (label) {
     arrow_not_supported("Label argument")
   }

   Expression$create("day_of_week", x, options = list(one_based_numbering = TRUE, week_start = week_start))
 }

 nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {

   if (base == exp(1)) {
     return(Expression$create("ln_checked", x))
   }

   if (base == 2) {
     return(Expression$create("log2_checked", x))
   }

   if (base == 10) {
     return(Expression$create("log10_checked", x))
   }
   # ARROW-13345
   stop("`base` values other than exp(1), 2 and 10 not supported in Arrow", call. = FALSE)
 }

 nse_funcs$if_else <- function(condition, true, false, missing = NULL){
   if (!is.null(missing)) {
     return(nse_funcs$if_else(
       nse_funcs$is.na(condition),
       missing,
       nse_funcs$if_else(condition, true, false)
     ))
   }

   # if_else doesn't yet support factors/dictionaries
   # TODO: remove this after ARROW-13358 is merged
   warn_types <- nse_funcs$is.factor(true) | nse_funcs$is.factor(false)
   if (warn_types) {
     warning("Dictionaries (in R: factors) are currently converted to strings (characters) in if_else and ifelse", call. = FALSE)
   }

   build_expr("if_else", condition, true, false)
 }

 # Although base R ifelse allows `yes` and `no` to be different classes
 #
 nse_funcs$ifelse <- function(test, yes, no) {
   nse_funcs$if_else(condition = test, true = yes, false = no)
 }

 nse_funcs$case_when <- function(...) {
   formulas <- list2(...)
   n <- length(formulas)
   if (n == 0) {
     abort("No cases provided in case_when()")
   }
   query <- vector("list", n)
   value <- vector("list", n)
   mask <- caller_env()
   for (i in seq_len(n)) {
     f <- formulas[[i]]
     if (!inherits(f, "formula")) {
       abort("Each argument to case_when() must be a two-sided formula")
     }
     query[[i]] <- arrow_eval(f[[2]], mask)
     value[[i]] <- arrow_eval(f[[3]], mask)
     if (!nse_funcs$is.logical(query[[i]])) {
       abort("Left side of each formula in case_when() must be a logical expression")
     }
   }
   build_expr(
     "case_when",
     args = c(
       build_expr(
         "make_struct",
         args = query,
         options = list(field_names = as.character(seq_along(query)))
       ),
       value
     )
   )
 }