| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| #' Infer an R vector prototype |
| #' |
| #' Resolves the default `to` value to use in [convert_array()] and |
| #' [convert_array_stream()]. The default conversions are: |
| #' |
| #' - null to [vctrs::unspecified()] |
| #' - boolean to [logical()] |
| #' - int8, uint8, int16, uint16, and int13 to [integer()] |
| #' - uint32, int64, uint64, float, and double to [double()] |
| #' - string and large string to [character()] |
| #' - struct to [data.frame()] |
| #' - binary and large binary to [blob::blob()] |
| #' - list, large_list, and fixed_size_list to [vctrs::list_of()] |
| #' - time32 and time64 to [hms::hms()] |
| #' - duration to [difftime()] |
| #' - date32 to [as.Date()] |
| #' - timestamp to [as.POSIXct()] |
| #' |
| #' Additional conversions are possible by specifying an explicit value for |
| #' `to`. For details of each conversion, see [convert_array()]. |
| #' |
| #' @param x A [nanoarrow_schema][as_nanoarrow_schema], |
| #' [nanoarrow_array][as_nanoarrow_array], or |
| #' [nanoarrow_array_stream][as_nanoarrow_array_stream]. |
| #' |
| #' @return An R vector of zero size describing the target into which |
| #' the array should be materialized. |
| #' @export |
| #' |
| #' @examples |
| #' infer_nanoarrow_ptype(as_nanoarrow_array(1:10)) |
| #' |
| infer_nanoarrow_ptype <- function(x) { |
| if (inherits(x, "nanoarrow_array")) { |
| x <- .Call(nanoarrow_c_infer_schema_array, x) |
| } else if (inherits(x, "nanoarrow_array_stream")) { |
| x <- .Call(nanoarrow_c_array_stream_get_schema, x) |
| } else if (!inherits(x, "nanoarrow_schema")) { |
| stop("`x` must be a nanoarrow_schema(), nanoarrow_array(), or nanoarrow_array_stream()") |
| } |
| |
| .Call(nanoarrow_c_infer_ptype, x) |
| } |
| |
| # This is called from C from nanoarrow_c_infer_ptype when all the C conversions |
| # have been tried. Some of these inferences could be moved to C to be faster |
| # (but are much less verbose to create here) |
| infer_ptype_other <- function(schema) { |
| # We don't need the user-friendly versions and this is performance-sensitive |
| parsed <- .Call(nanoarrow_c_schema_parse, schema) |
| |
| # Give registered extension types a chance to resolve the ptype |
| if (!is.null(parsed$extension_name)) { |
| spec <- resolve_nanoarrow_extension(parsed$extension_name) |
| return(infer_nanoarrow_ptype_extension(spec, schema)) |
| } |
| |
| switch( |
| parsed$type, |
| "na" = vctrs::unspecified(), |
| "binary" = , |
| "large_binary" = new_blob_internal(), |
| "date32" = structure(numeric(), class = "Date"), |
| "time32" = , |
| "time64" = hms::hms(), |
| "duration" = structure(numeric(), class = "difftime", units = "secs"), |
| "date64" = , |
| "timestamp" = { |
| if (is.null(parsed$timezone) || parsed$timezone == "") { |
| # We almost never want to assume the user's timezone here, which is |
| # what would happen if we passed on "". This is consistent with how |
| # readr handles reading timezones (assign "UTC" since it's DST-free |
| # and let the user explicitly set this later) |
| parsed$timezone <- getOption("nanoarrow.timezone_if_unspecified", "UTC") |
| } |
| |
| structure( |
| numeric(0), |
| class = c("POSIXct", "POSIXt"), |
| tzone = parsed$timezone |
| ) |
| }, |
| "map" = , |
| "large_list" = , |
| "list" = , |
| "fixed_size_list" = { |
| ptype <- infer_nanoarrow_ptype(schema$children[[1]]) |
| vctrs::list_of(.ptype = ptype) |
| }, |
| "dictionary" = { |
| # Even though R's 'factor' can handle a dictionary of strings |
| # (perhaps the most common case), an array arriving in chunks may have |
| # different dictionary arrays. Thus, the best type-stable default we can |
| # achieve is to expand dictionaries. |
| infer_nanoarrow_ptype(schema$dictionary) |
| }, |
| stop_cant_infer_ptype(schema, n = -1) |
| ) |
| } |
| |
| stop_cant_infer_ptype <- function(schema, n = 0) { |
| schema_label <- nanoarrow_schema_formatted(schema) |
| |
| if (is.null(schema$name) || identical(schema$name, "")) { |
| cnd <- simpleError( |
| sprintf( |
| "Can't infer R vector type for <%s>", |
| schema_label |
| ), |
| call = sys.call(n - 1) |
| ) |
| } else { |
| cnd <- simpleError( |
| sprintf( |
| "Can't infer R vector type for `%s` <%s>", |
| schema$name, |
| schema_label |
| ), |
| call = sys.call(n - 1) |
| ) |
| } |
| |
| stop(cnd) |
| } |
| |
| # Try to load the blob namespace. If it fails, we still return the correct |
| # ptype object. This is not ideal because the behaviour of the output object |
| # may be slightly different if blob isn't installed; however, we use this |
| # conversion for printing buffers and it's difficult to work around with the |
| # current system for conversion. |
| new_blob_internal <- function() { |
| requireNamespace("blob", quietly = TRUE) |
| structure( |
| list(), |
| ptype = raw(0), |
| class = c("blob", "vctrs_list_of", "vctrs_vctr", "list") |
| ) |
| } |