| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # Split up into several register functions by category to reduce cyclomatic |
| # complexity (linter) |
| register_bindings_datetime <- function() { |
| register_bindings_datetime_utility() |
| register_bindings_datetime_components() |
| register_bindings_datetime_conversion() |
| register_bindings_datetime_timezone() |
| register_bindings_duration() |
| register_bindings_duration_constructor() |
| register_bindings_duration_helpers() |
| register_bindings_datetime_parsers() |
| register_bindings_datetime_rounding() |
| } |
| |
| register_bindings_datetime_utility <- function() { |
| register_binding( |
| "base::strptime", |
| function(x, |
| format = "%Y-%m-%d %H:%M:%S", |
| tz = "", |
| unit = "ms") { |
| # Arrow uses unit for time parsing, strptime() does not. |
| # Arrow has no default option for strptime (format, unit), |
| # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms", |
| # (ARROW-12809) |
| |
| unit <- make_valid_time_unit( |
| unit, |
| c(valid_time64_units, valid_time32_units) |
| ) |
| |
| output <- Expression$create( |
| "strptime", |
| x, |
| options = |
| list( |
| format = format, |
| unit = unit, |
| error_is_null = TRUE |
| ) |
| ) |
| |
| if (tz == "") { |
| tz <- Sys.timezone() |
| } |
| |
| # If a timestamp does not contain timezone information (i.e. it is |
| # "timezone-naive") we can attach timezone information (i.e. convert it into |
| # a "timezone-aware" timestamp) with `assume_timezone` |
| # if we want to cast to a different timezone, we can only do it for |
| # timezone-aware timestamps, not for timezone-naive ones. |
| # strptime in Acero will return a timezone-aware timestamp if %z is |
| # part of the format string. |
| if (!is.null(tz) && !grepl("%z", format, fixed = TRUE)) { |
| output <- Expression$create( |
| "assume_timezone", |
| output, |
| options = |
| list( |
| timezone = tz |
| ) |
| ) |
| } |
| output |
| }, |
| notes = c( |
| "accepts a `unit` argument not present in the `base` function.", |
| 'Valid values are "s", "ms" (default), "us", "ns".' |
| ) |
| ) |
| |
| register_binding("base::strftime", function(x, |
| format = "", |
| tz = "", |
| usetz = FALSE) { |
| if (usetz) { |
| format <- paste(format, "%Z") |
| } |
| if (tz == "") { |
| tz <- Sys.timezone() |
| } |
| # Arrow's strftime prints in timezone of the timestamp. To match R's strftime behavior we first |
| # cast the timestamp to desired timezone. This is a metadata only change. |
| if (call_binding("is.POSIXct", x)) { |
| ts <- Expression$create("cast", x, options = list(to_type = timestamp(x$type()$unit(), tz))) |
| } else { |
| ts <- x |
| } |
| Expression$create("strftime", ts, options = list(format = format, locale = check_time_locale())) |
| }) |
| |
| register_binding("lubridate::format_ISO8601", function(x, usetz = FALSE, precision = NULL, ...) { |
| ISO8601_precision_map <- |
| list( |
| y = "%Y", |
| ym = "%Y-%m", |
| ymd = "%Y-%m-%d", |
| ymdh = "%Y-%m-%dT%H", |
| ymdhm = "%Y-%m-%dT%H:%M", |
| ymdhms = "%Y-%m-%dT%H:%M:%S" |
| ) |
| |
| if (is.null(precision)) { |
| precision <- "ymdhms" |
| } |
| if (!precision %in% names(ISO8601_precision_map)) { |
| abort( |
| paste( |
| "`precision` must be one of the following values:", |
| paste(names(ISO8601_precision_map), collapse = ", "), |
| "\nValue supplied was: ", |
| precision |
| ) |
| ) |
| } |
| format <- ISO8601_precision_map[[precision]] |
| if (usetz) { |
| format <- paste0(format, "%z") |
| } |
| Expression$create("strftime", x, options = list(format = format, locale = "C")) |
| }) |
| |
| register_binding("lubridate::is.Date", function(x) { |
| inherits(x, "Date") || |
| (inherits(x, "Expression") && x$type_id() %in% Type[c("DATE32", "DATE64")]) |
| }) |
| |
| is_instant_binding <- function(x) { |
| inherits(x, c("POSIXt", "POSIXct", "POSIXlt", "Date")) || |
| (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) |
| } |
| register_binding("lubridate::is.instant", is_instant_binding) |
| register_binding("lubridate::is.timepoint", is_instant_binding) |
| |
| register_binding("lubridate::is.POSIXct", function(x) { |
| inherits(x, "POSIXct") || |
| (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")]) |
| }) |
| |
| register_binding("lubridate::date", function(x) { |
| cast(x, date32()) |
| }) |
| } |
| |
| register_bindings_datetime_components <- function() { |
| register_binding("lubridate::second", function(x) { |
| Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x)) |
| }) |
| |
| register_binding("lubridate::wday", function(x, |
| label = FALSE, |
| abbr = TRUE, |
| week_start = getOption("lubridate.week.start", 7), |
| locale = Sys.getlocale("LC_TIME")) { |
| if (label) { |
| if (abbr) { |
| format <- "%a" |
| } else { |
| format <- "%A" |
| } |
| return(Expression$create("strftime", x, options = list(format = format, locale = check_time_locale(locale)))) |
| } |
| |
| Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start)) |
| }) |
| |
| register_binding("lubridate::week", function(x) { |
| (call_binding("yday", x) - 1) %/% 7 + 1 |
| }) |
| |
| register_binding("lubridate::month", function(x, |
| label = FALSE, |
| abbr = TRUE, |
| locale = Sys.getlocale("LC_TIME")) { |
| if (call_binding("is.integer", x)) { |
| x <- call_binding( |
| "if_else", |
| call_binding("between", x, 1, 12), |
| x, |
| NA_integer_ |
| ) |
| if (!label) { |
| # if we don't need a label we can return the integer itself (already |
| # constrained to 1:12) |
| return(x) |
| } |
| # make the integer into a date32() - which interprets integers as |
| # days from epoch (we multiply by 28 to be able to later extract the |
| # month with label) - NB this builds a false date (to be used by strftime) |
| # since we only know and care about the month |
| x <- cast(x * 28L, date32()) |
| } |
| |
| if (label) { |
| if (abbr) { |
| format <- "%b" |
| } else { |
| format <- "%B" |
| } |
| return(Expression$create("strftime", x, options = list(format = format, locale = check_time_locale(locale)))) |
| } |
| |
| Expression$create("month", x) |
| }) |
| |
| register_binding("lubridate::qday", function(x) { |
| # We calculate day of quarter by flooring timestamp to beginning of quarter and |
| # calculating days between beginning of quarter and timestamp/date in question. |
| # Since we use one one-based numbering we add one. |
| floored_x <- Expression$create("floor_temporal", x, options = list(unit = 9L)) |
| Expression$create("days_between", floored_x, x) + Expression$scalar(1L) |
| }) |
| |
| register_binding("lubridate::am", function(x) { |
| hour <- Expression$create("hour", x) |
| hour < 12 |
| }) |
| register_binding("lubridate::pm", function(x) { |
| !call_binding("am", x) |
| }) |
| register_binding("lubridate::tz", function(x) { |
| if (!call_binding("is.POSIXct", x)) { |
| arrow_not_supported( |
| paste0( |
| "timezone extraction for objects of class `", |
| infer_type(x)$ToString(), |
| "`" |
| ) |
| ) |
| } |
| |
| x$type()$timezone() |
| }) |
| register_binding("lubridate::semester", function(x, with_year = FALSE) { |
| month <- call_binding("month", x) |
| semester <- Expression$create("if_else", month <= 6, 1L, 2L) |
| if (with_year) { |
| year <- call_binding("year", x) |
| return(year + semester / 10) |
| } else { |
| return(semester) |
| } |
| }) |
| } |
| |
| register_bindings_datetime_conversion <- function() { |
| register_binding( |
| "lubridate::make_datetime", |
| function(year = 1970L, |
| month = 1L, |
| day = 1L, |
| hour = 0L, |
| min = 0L, |
| sec = 0, |
| tz = "UTC") { |
| # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820). |
| # Stop if tz other than 'UTC' is provided. |
| if (tz != "UTC") { |
| arrow_not_supported("Time zone other than 'UTC'") |
| } |
| |
| x <- call_binding("str_c", year, month, day, hour, min, sec, sep = "-") |
| Expression$create("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L)) |
| }, |
| notes = "only supports UTC (default) timezone" |
| ) |
| |
| register_binding("lubridate::make_date", function(year = 1970L, |
| month = 1L, |
| day = 1L) { |
| x <- call_binding("make_datetime", year, month, day) |
| cast(x, date32()) |
| }) |
| |
| register_binding("base::ISOdatetime", function(year, |
| month, |
| day, |
| hour, |
| min, |
| sec, |
| tz = "UTC") { |
| # NAs for seconds aren't propagated (but treated as 0) in the base version |
| sec <- call_binding( |
| "if_else", |
| call_binding("is.na", sec), |
| 0, |
| sec |
| ) |
| |
| call_binding("make_datetime", year, month, day, hour, min, sec, tz) |
| }) |
| |
| register_binding("base::ISOdate", function(year, |
| month, |
| day, |
| hour = 12, |
| min = 0, |
| sec = 0, |
| tz = "UTC") { |
| call_binding("make_datetime", year, month, day, hour, min, sec, tz) |
| }) |
| |
| register_binding( |
| "base::as.Date", |
| function(x, |
| format = NULL, |
| tryFormats = "%Y-%m-%d", |
| origin = "1970-01-01", |
| tz = "UTC") { |
| if (is.null(format) && length(tryFormats) > 1) { |
| abort( |
| paste( |
| "`as.Date()` with multiple `tryFormats` is not supported in Arrow.", |
| "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc." |
| ) |
| ) |
| } |
| |
| # base::as.Date() and lubridate::as_date() differ in the way they use the |
| # `tz` argument. Both cast to the desired timezone, if present. The |
| # difference appears when the `tz` argument is not set: `as.Date()` uses the |
| # default value ("UTC"), while `as_date()` keeps the original attribute |
| # => we only cast when we want the behaviour of the base version or when |
| # `tz` is set (i.e. not NULL) |
| if (call_binding("is.POSIXct", x)) { |
| unit <- if (inherits(x, "Expression")) x$type()$unit() else "s" |
| x <- cast(x, timestamp(unit = unit, timezone = tz)) |
| } |
| |
| binding_as_date( |
| x = x, |
| format = format, |
| tryFormats = tryFormats, |
| origin = origin |
| ) |
| }, |
| notes = c( |
| "Multiple `tryFormats` not supported in Arrow.", |
| "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc." |
| ) |
| ) |
| |
| register_binding("lubridate::as_date", function(x, |
| format = NULL, |
| origin = "1970-01-01", |
| tz = NULL) { |
| # base::as.Date() and lubridate::as_date() differ in the way they use the |
| # `tz` argument. Both cast to the desired timezone, if present. The |
| # difference appears when the `tz` argument is not set: `as.Date()` uses the |
| # default value ("UTC"), while `as_date()` keeps the original attribute |
| # => we only cast when we want the behaviour of the base version or when |
| # `tz` is set (i.e. not NULL) |
| if (call_binding("is.POSIXct", x) && !is.null(tz)) { |
| unit <- if (inherits(x, "Expression")) x$type()$unit() else "s" |
| x <- cast(x, timestamp(unit = unit, timezone = tz)) |
| } |
| binding_as_date( |
| x = x, |
| format = format, |
| origin = origin |
| ) |
| }) |
| |
| register_binding("lubridate::as_datetime", function(x, |
| origin = "1970-01-01", |
| tz = "UTC", |
| format = NULL, |
| unit = "ns") { |
| # Arrow uses unit for time parsing, as_datetime() does not. |
| unit <- make_valid_time_unit( |
| unit, |
| c(valid_time64_units, valid_time32_units) |
| ) |
| |
| if (call_binding("is.integer", x)) { |
| x <- cast(x, int64()) |
| } |
| |
| if (call_binding("is.numeric", x)) { |
| multiple <- Expression$create("power_checked", 1000L, unit) |
| delta <- call_binding("difftime", origin, "1970-01-01") |
| delta <- cast(delta, int64()) |
| delta <- Expression$create("multiply_checked", delta, multiple) |
| x <- Expression$create("multiply_checked", x, multiple) |
| x <- cast(x, int64()) |
| x <- Expression$create("add_checked", x, delta) |
| } |
| |
| if (call_binding("is.character", x) && !is.null(format)) { |
| x <- Expression$create( |
| "strptime", |
| x, |
| options = list(format = format, unit = unit, error_is_null = TRUE) |
| ) |
| } |
| output <- cast(x, timestamp(unit = unit)) |
| Expression$create("assume_timezone", output, options = list(timezone = tz)) |
| }) |
| |
| register_binding("lubridate::decimal_date", function(date) { |
| y <- Expression$create("year", date) |
| start <- call_binding("make_datetime", year = y, tz = "UTC") |
| sofar <- call_binding("difftime", date, start, units = "secs") |
| total <- Expression$create( |
| "if_else", |
| Expression$create("is_leap_year", date), |
| Expression$scalar(31622400L), # number of seconds in a leap year (366 days) |
| Expression$scalar(31536000L) # number of seconds in a regular year (365 days) |
| ) |
| y + cast(sofar, int64()) / total |
| }) |
| |
| register_binding("lubridate::date_decimal", function(decimal, tz = "UTC") { |
| y <- Expression$create("floor", decimal) |
| |
| start <- call_binding("make_datetime", year = y, tz = tz) |
| seconds <- Expression$create( |
| "if_else", |
| Expression$create("is_leap_year", start), |
| Expression$scalar(31622400L), # number of seconds in a leap year (366 days) |
| Expression$scalar(31536000L) # number of seconds in a regular year (365 days) |
| ) |
| |
| fraction <- decimal - y |
| delta <- Expression$create("floor", seconds * fraction) |
| delta <- make_duration(delta, "s") |
| start + delta |
| }) |
| } |
| |
| register_bindings_datetime_timezone <- function() { |
| register_binding( |
| "lubridate::force_tz", |
| function(time, tzone = "", roll_dst = c("error", "post")) { |
| if (length(roll_dst) == 1L) { |
| roll_dst <- c(roll_dst, roll_dst) |
| } else if (length(roll_dst) != 2L) { |
| arrow_not_supported("`roll_dst` must be 1 or 2 items long; other lengths") |
| } |
| |
| nonexistent <- switch( |
| roll_dst[1], |
| "error" = 0L, |
| "boundary" = 2L, |
| arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for non-existent times; other values") |
| ) |
| |
| ambiguous <- switch( |
| roll_dst[2], |
| "error" = 0L, |
| "pre" = 1L, |
| "post" = 2L, |
| arrow_not_supported("`roll_dst` value must be 'error', 'pre', or 'post' for non-existent times") |
| ) |
| |
| if (identical(tzone, "")) { |
| tzone <- Sys.timezone() |
| } |
| |
| if (!inherits(time, "Expression")) { |
| time <- Expression$scalar(time) |
| } |
| |
| # Non-UTC timezones don't work here and getting them to do so was too |
| # hard to do in the initial PR because there is no way in Arrow to |
| # "unapply" a UTC offset (i.e., the reverse of assume_timezone). |
| if (!time$type()$timezone() %in% c("", "UTC")) { |
| arrow_not_supported("`time` with a non-UTC timezone") |
| } |
| |
| # Remove timezone if needed |
| current_unit <- time$type()$unit() |
| time <- cast(time, timestamp(current_unit, "")) |
| |
| # Add timezone |
| Expression$create( |
| "assume_timezone", |
| time, |
| options = list( |
| timezone = tzone, |
| nonexistent = nonexistent, |
| ambiguous = ambiguous |
| ) |
| ) |
| }, |
| notes = c( |
| "Timezone conversion from non-UTC timezone not supported;", |
| "`roll_dst` values of 'error' and 'boundary' are supported for nonexistent times,", |
| "`roll_dst` values of 'error', 'pre', and 'post' are supported for ambiguous times." |
| ) |
| ) |
| |
| register_binding("lubridate::with_tz", function(time, tzone = "") { |
| if (tzone == "") { |
| tzone <- Sys.timezone() |
| } |
| cast(time, timestamp(unit = time$type()$unit(), timezone = tzone)) |
| }) |
| } |
| |
| register_bindings_duration <- function() { |
| register_binding( |
| "base::difftime", |
| function(time1, time2, tz, units = "secs") { |
| if (units != "secs") { |
| arrow_not_supported("`difftime()` with units other than `secs`") |
| } |
| |
| if (!missing(tz)) { |
| warn("`tz` argument is not supported in Arrow, so it will be ignored") |
| } |
| |
| # cast to timestamp if time1 and time2 are not dates or timestamp expressions |
| # (the subtraction of which would output a `duration`) |
| if (!call_binding("is.instant", time1)) { |
| time1 <- cast(time1, timestamp()) |
| } |
| |
| if (!call_binding("is.instant", time2)) { |
| time2 <- cast(time2, timestamp()) |
| } |
| |
| # if time1 or time2 are timestamps they cannot be expressed in "s" /seconds |
| # otherwise they cannot be added subtracted with durations |
| # TODO delete the casting to "us" once |
| # https://issues.apache.org/jira/browse/ARROW-16060 is solved |
| if (inherits(time1, "Expression") && |
| time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) { |
| time1 <- cast(time1, timestamp("us")) |
| } |
| |
| if (inherits(time2, "Expression") && |
| time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) { |
| time2 <- cast(time2, timestamp("us")) |
| } |
| |
| # we need to go build the subtract expression instead of `time1 - time2` to |
| # prevent complaints when we try to subtract an R object from an Expression |
| cast(call_binding("-", time1, time2), duration("s")) |
| }, |
| notes = c( |
| 'only supports `units = "secs"` (the default);', |
| "`tz` argument not supported" |
| ) |
| ) |
| |
| register_binding( |
| "base::as.difftime", |
| function(x, format = "%X", units = "secs") { |
| # windows doesn't seem to like "%X" |
| if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") { |
| format <- "%H:%M:%S" |
| } |
| |
| if (units != "secs") { |
| arrow_not_supported("`as.difftime()` with units other than 'secs'") |
| } |
| |
| if (call_binding("is.character", x)) { |
| x <- Expression$create("strptime", x, options = list(format = format, unit = 0L)) |
| # we do a final cast to duration ("s") at the end |
| x <- make_duration(cast(x, time64("us")), unit = "us") |
| } |
| |
| # numeric -> duration not supported in Arrow yet so we use int64() as an |
| # intermediate step |
| # TODO: revisit after ARROW-15862 |
| |
| if (call_binding("is.numeric", x)) { |
| # coerce x to be int64(). it should work for integer-like doubles and fail |
| # for pure doubles |
| # if we abort for all doubles, we risk erroring in cases in which |
| # coercion to int64() would work |
| x <- cast(x, int64()) |
| } |
| |
| cast(x, duration(unit = "s")) |
| }, |
| notes = 'only supports `units = "secs"` (the default)' |
| ) |
| } |
| |
| register_bindings_duration_constructor <- function() { |
| register_binding( |
| "lubridate::make_difftime", |
| function(num = NULL, units = "secs", ...) { |
| if (units != "secs") { |
| arrow_not_supported("`make_difftime()` with units other than 'secs'") |
| } |
| |
| chunks <- list(...) |
| |
| # lubridate concatenates durations passed via the `num` argument with those |
| # passed via `...` resulting in a vector of length 2 - which is virtually |
| # unusable in a dplyr pipeline. Arrow errors in this situation |
| if (!is.null(num) && length(chunks) > 0) { |
| arrow_not_supported("`make_difftime()` with both `num` and `...`") |
| } |
| |
| if (!is.null(num)) { |
| # build duration from num if present |
| duration <- num |
| } else { |
| # build duration from chunks when nothing is passed via ... |
| duration <- duration_from_chunks(chunks) |
| } |
| |
| make_duration(duration, "s") |
| }, |
| notes = c( |
| 'only supports `units = "secs"` (the default);', |
| "providing both `num` and `...` is not supported" |
| ) |
| ) |
| } |
| |
| register_bindings_duration_helpers <- function() { |
| duration_factory <- function(value, unit) { |
| force(value) |
| force(unit) |
| function(x = 1) make_duration(x * value, unit) |
| } |
| |
| register_binding("lubridate::dminutes", duration_factory(60, "s")) |
| register_binding("lubridate::dhours", duration_factory(3600, "s")) |
| register_binding("lubridate::ddays", duration_factory(86400, "s")) |
| register_binding("lubridate::dweeks", duration_factory(604800, "s")) |
| register_binding("lubridate::dmonths", duration_factory(2629800, "s")) |
| register_binding("lubridate::dyears", duration_factory(31557600, "s")) |
| register_binding("lubridate::dseconds", duration_factory(1, "s")) |
| register_binding("lubridate::dmilliseconds", duration_factory(1, "ms")) |
| register_binding("lubridate::dmicroseconds", duration_factory(1, "us")) |
| register_binding("lubridate::dnanoseconds", duration_factory(1, "ns")) |
| register_binding( |
| "lubridate::dpicoseconds", |
| function(x = 1) { |
| abort("Duration in picoseconds not supported in Arrow.") |
| }, |
| notes = "not supported" |
| ) |
| } |
| |
| register_bindings_datetime_parsers <- function() { |
| register_binding( |
| "lubridate::parse_date_time", |
| function(x, |
| orders, |
| tz = "UTC", |
| truncated = 0, |
| quiet = TRUE, |
| exact = FALSE) { |
| if (!quiet) { |
| arrow_not_supported("`quiet = FALSE`") |
| } |
| |
| if (truncated > 0) { |
| if (truncated > (nchar(orders) - 3)) { |
| arrow_not_supported(paste0("a value for `truncated` > ", nchar(orders) - 3)) |
| } |
| # build several orders for truncated formats |
| orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x)) |
| } |
| |
| if (!inherits(x, "Expression")) { |
| x <- Expression$scalar(x) |
| } |
| |
| if (exact == TRUE) { |
| # no data processing takes place & we don't derive formats |
| parse_attempts <- build_strptime_exprs(x, orders) |
| } else { |
| parse_attempts <- attempt_parsing(x, orders = orders) |
| } |
| |
| coalesce_output <- Expression$create("coalesce", args = parse_attempts) |
| |
| # we need this binding to be able to handle a NULL `tz`, which, in turn, |
| # will be used by bindings such as `ymd()` to return a date or timestamp, |
| # based on whether tz is NULL or not |
| if (!is.null(tz)) { |
| Expression$create("assume_timezone", coalesce_output, options = list(timezone = tz)) |
| } else { |
| coalesce_output |
| } |
| }, |
| notes = c( |
| "`quiet = FALSE` is not supported", |
| "Available formats are H, I, j, M, S, U, w, W, y, Y, R, T.", |
| "On Linux and OS X additionally a, A, b, B, Om, p, r are available." |
| ) |
| ) |
| |
| parser_vec <- c( |
| "ymd", "ydm", "mdy", "myd", "dmy", "dym", "ym", "my", "yq", |
| "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H", |
| "mdy_HMS", "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H" |
| ) |
| |
| parser_map_factory <- function(order) { |
| force(order) |
| function(x, quiet = TRUE, tz = NULL, locale = NULL, truncated = 0) { |
| if (!is.null(locale)) { |
| arrow_not_supported("`locale`") |
| } |
| # Parsers returning datetimes return UTC by default and never return dates. |
| if (is.null(tz) && nchar(order) > 3) { |
| tz <- "UTC" |
| } |
| parse_x <- call_binding("parse_date_time", x, order, tz, truncated, quiet) |
| if (is.null(tz)) { |
| # we cast so we can mimic the behaviour of the `tz` argument in lubridate |
| # "If NULL (default), a Date object is returned. Otherwise a POSIXct with |
| # time zone attribute set to tz." |
| parse_x <- cast(parse_x, date32()) |
| } |
| parse_x |
| } |
| } |
| |
| for (order in parser_vec) { |
| register_binding( |
| paste0("lubridate::", tolower(order)), |
| parser_map_factory(order), |
| notes = "`locale` argument not supported" |
| ) |
| } |
| |
| register_binding( |
| "lubridate::fast_strptime", |
| function(x, format, tz = "UTC", lt = FALSE, cutoff_2000 = 68L) { |
| # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't play |
| # well with mutate, for example) |
| if (lt) { |
| arrow_not_supported("`lt = TRUE` argument") |
| } |
| |
| # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16596 |
| if (cutoff_2000 != 68L) { |
| arrow_not_supported("`cutoff_2000` != 68L argument") |
| } |
| |
| parse_attempt_expressions <- list() |
| |
| parse_attempt_expressions <- map( |
| format, |
| ~ Expression$create( |
| "strptime", |
| x, |
| options = list( |
| format = .x, |
| unit = 0L, |
| error_is_null = TRUE |
| ) |
| ) |
| ) |
| |
| coalesce_output <- Expression$create("coalesce", args = parse_attempt_expressions) |
| |
| Expression$create("assume_timezone", coalesce_output, options = list(timezone = tz)) |
| }, |
| notes = "non-default values of `lt` and `cutoff_2000` not supported" |
| ) |
| } |
| |
| register_bindings_datetime_rounding <- function() { |
| register_binding( |
| "lubridate::round_date", |
| function(x, |
| unit = "second", |
| week_start = getOption("lubridate.week.start", 7)) { |
| opts <- parse_period_unit(unit) |
| if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start |
| return(shift_temporal_to_week("round_temporal", x, week_start, options = opts)) |
| } |
| |
| Expression$create("round_temporal", x, options = opts) |
| } |
| ) |
| |
| register_binding( |
| "lubridate::floor_date", |
| function(x, |
| unit = "second", |
| week_start = getOption("lubridate.week.start", 7)) { |
| opts <- parse_period_unit(unit) |
| if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start |
| return(shift_temporal_to_week("floor_temporal", x, week_start, options = opts)) |
| } |
| |
| Expression$create("floor_temporal", x, options = opts) |
| } |
| ) |
| |
| register_binding( |
| "lubridate::ceiling_date", |
| function(x, |
| unit = "second", |
| change_on_boundary = NULL, |
| week_start = getOption("lubridate.week.start", 7)) { |
| opts <- parse_period_unit(unit) |
| if (is.null(change_on_boundary)) { |
| change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE) |
| } |
| opts$ceil_is_strictly_greater <- change_on_boundary |
| |
| if (opts$unit == 7L) { # weeks (unit = 7L) need to accommodate week_start |
| return(shift_temporal_to_week("ceil_temporal", x, week_start, options = opts)) |
| } |
| |
| Expression$create("ceil_temporal", x, options = opts) |
| } |
| ) |
| } |