blob: 7348c210d28997fc0f6dbdf18303ff696193b8c8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#' Create type objects
#'
#' In nanoarrow, types, fields, and schemas are all represented by a
#' [nanoarrow_schema][as_nanoarrow_schema]. These functions are convenience
#' constructors to create these objects in a readable way. Use [na_type()] to
#' construct types based on the constructor name, which is also the name that
#' prints/is returned by [nanoarrow_schema_parse()].
#'
#' @param type_name The name of the type (e.g., "int32"). This form of the
#' constructor is useful for writing tests that loop over many types.
#' @param byte_width For [na_fixed_size_binary()], the number of bytes
#' occupied by each item.
#' @param list_size The number of elements in each item in a
#' [na_fixed_size_list()].
#' @param precision The total number of digits representable by the decimal type
#' @param scale The number of digits after the decimal point in a decimal type
#' @param unit One of 's' (seconds), 'ms' (milliseconds), 'us' (microseconds),
#' or 'ns' (nanoseconds).
#' @param timezone A string representing a timezone name. The empty string ""
#' represents a naive point in time (i.e., one that has no associated
#' timezone).
#' @param column_types A `list()` of [nanoarrow_schema][as_nanoarrow_schema]s.
#' @param item_type For [na_list()], [na_large_list()], [na_fixed_size_list()],
#' and [na_map()], the [nanoarrow_schema][as_nanoarrow_schema] representing
#' the item type.
#' @param key_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#' [na_map()] key type.
#' @param index_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#' [na_dictionary()] index type.
#' @param value_type The [nanoarrow_schema][as_nanoarrow_schema] representing the
#' [na_dictionary()] or [na_map()] value type.
#' @param keys_sorted Use `TRUE` to assert that keys are sorted.
#' @param storage_type For [na_extension()], the underlying value type.
#' @param extension_name For [na_extension()], the extension name. This is
#' typically namespaced separated by dots (e.g., arrow.r.vctrs).
#' @param extension_metadata A string or raw vector defining extension metadata.
#' Most Arrow extension types define extension metadata as a JSON object.
#' @param nullable Use `FALSE` to assert that this field cannot contain
#' null values.
#' @param ordered Use `TRUE` to assert that the order of values in the
#' dictionary are meaningful.
#'
#' @return A [nanoarrow_schema][as_nanoarrow_schema]
#' @export
#'
#' @examples
#' na_int32()
#' na_struct(list(col1 = na_int32()))
#'
na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
column_types = NULL, item_type = NULL, key_type = NULL,
value_type = NULL, index_type = NULL, ordered = NULL,
list_size = NULL, keys_sorted = NULL, storage_type = NULL,
extension_name = NULL, extension_metadata = NULL,
nullable = NULL) {
# Create a call and evaluate it. This leads to reasonable error messages
# regarding nonexistent type names and extraneous or missing parameters.
args <- list(
byte_width = byte_width,
unit = unit,
timezone = timezone,
column_types = column_types,
item_type = item_type,
key_type = key_type,
value_type = value_type,
index_type = index_type,
ordered = ordered,
list_size = list_size,
keys_sorted = keys_sorted,
storage_type = storage_type,
extension_name = extension_name,
extension_metadata = extension_metadata,
nullable = nullable
)
args <- args[!vapply(args, is.null, logical(1))]
constructor <- as.symbol(paste0("na_", type_name))
call_obj <- as.call(c(list(constructor), args))
eval(call_obj)
}
#' @rdname na_type
#' @export
na_na <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE[["NA"]], isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_bool <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$BOOL, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_int8 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT8, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_uint8 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT8, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_int16 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT16, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_uint16 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT16, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_int32 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT32, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_uint32 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT32, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_int64 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INT64, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_uint64 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$UINT64, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_half_float <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$HALF_FLOAT, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_float <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$FLOAT, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_double <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DOUBLE, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_string <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRING, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_large_string <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_STRING, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_binary <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$BINARY, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_large_binary <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_BINARY, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_fixed_size_binary <- function(byte_width, nullable = TRUE) {
.Call(
nanoarrow_c_schema_init_fixed_size,
NANOARROW_TYPE$FIXED_SIZE_BINARY,
as.integer(byte_width)[1],
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_date32 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DATE32, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_date64 <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$DATE64, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_time32 <- function(unit = c("ms", "s"), nullable = TRUE) {
unit <- match.arg(unit)
.Call(
nanoarrow_c_schema_init_date_time,
NANOARROW_TYPE$TIME32,
time_unit_id(unit),
NULL,
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_time64 <- function(unit = c("us", "ns"), nullable = TRUE) {
unit <- match.arg(unit)
.Call(
nanoarrow_c_schema_init_date_time,
NANOARROW_TYPE$TIME64,
time_unit_id(unit),
NULL,
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_duration <- function(unit = c("ms", "s", "us", "ns"), nullable = TRUE) {
unit <- match.arg(unit)
.Call(
nanoarrow_c_schema_init_date_time,
NANOARROW_TYPE$DURATION,
time_unit_id(unit),
NULL,
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_interval_months <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_MONTHS, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_interval_day_time <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_DAY_TIME, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_interval_month_day_nano <- function(nullable = TRUE) {
.Call(nanoarrow_c_schema_init, NANOARROW_TYPE$INTERVAL_MONTH_DAY_NANO, isTRUE(nullable))
}
#' @rdname na_type
#' @export
na_timestamp <- function(unit = c("us", "ns", "s", "ms"), timezone = "", nullable = TRUE) {
unit <- match.arg(unit)
if (!is.character(timezone) || length(timezone) != 1 || is.na(timezone)) {
stop("`timezone` must be character(1)")
}
.Call(
nanoarrow_c_schema_init_date_time,
NANOARROW_TYPE$TIMESTAMP,
time_unit_id(unit),
timezone,
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_decimal128 <- function(precision, scale, nullable = TRUE) {
.Call(
nanoarrow_c_schema_init_decimal,
NANOARROW_TYPE$DECIMAL128,
as.integer(precision)[1],
as.integer(scale)[1],
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_decimal256 <- function(precision, scale, nullable = TRUE) {
.Call(
nanoarrow_c_schema_init_decimal,
NANOARROW_TYPE$DECIMAL256,
as.integer(precision)[1],
as.integer(scale)[1],
isTRUE(nullable)
)
}
#' @rdname na_type
#' @export
na_struct <- function(column_types = list(), nullable = FALSE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRUCT, isTRUE(nullable))
schema$children <- column_types
schema
}
#' @rdname na_type
#' @export
na_sparse_union <- function(column_types = list()) {
schema <- na_struct(column_types)
schema$format <- paste0("+us:", paste(seq_along(schema$children) - 1L, collapse = ","))
schema
}
#' @rdname na_type
#' @export
na_dense_union <- function(column_types = list()) {
schema <- na_struct(column_types)
schema$format <- paste0("+ud:", paste(seq_along(schema$children) - 1L, collapse = ","))
schema
}
#' @rdname na_type
#' @export
na_list <- function(item_type, nullable = TRUE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LIST, isTRUE(nullable))
schema$children[[1]] <- item_type
schema
}
#' @rdname na_type
#' @export
na_large_list <- function(item_type, nullable = TRUE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_LIST, isTRUE(nullable))
schema$children[[1]] <- item_type
schema
}
#' @rdname na_type
#' @export
na_fixed_size_list <- function(item_type, list_size, nullable = TRUE) {
schema <- .Call(
nanoarrow_c_schema_init_fixed_size,
NANOARROW_TYPE$FIXED_SIZE_LIST,
as.integer(list_size)[1],
isTRUE(nullable)
)
schema$children[[1]] <- item_type
schema
}
#' @rdname na_type
#' @export
na_map <- function(key_type, item_type, keys_sorted = FALSE, nullable = TRUE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$MAP, isTRUE(nullable))
schema$children[[1]]$children[[1]] <- key_type
schema$children[[1]]$children[[2]] <- item_type
schema
}
#' @rdname na_type
#' @export
na_dictionary <- function(value_type, index_type = na_int32(), ordered = FALSE) {
index_type <- as_nanoarrow_schema(index_type)
index_type$dictionary <- value_type
if (ordered) {
index_type$flags <- bitwOr(index_type$flags, ARROW_FLAG$DICTIONARY_ORDERED)
} else {
index_type$flags <- bitwAnd(
index_type$flags,
bitwNot(ARROW_FLAG$DICTIONARY_ORDERED)
)
}
index_type
}
#' @rdname na_type
#' @export
na_extension <- function(storage_type, extension_name, extension_metadata = "") {
storage_type <- as_nanoarrow_schema(storage_type)
new_metadata <- list(
"ARROW:extension:name" = extension_name,
"ARROW:extension:metadata" = extension_metadata
)
new_metadata <- c(new_metadata, storage_type$metadata)
storage_type$metadata <- new_metadata[unique(names(new_metadata))]
storage_type
}
time_unit_id <- function(time_unit) {
match(time_unit, c("s", "ms", "us", "ns")) - 1L
}
# These values aren't guaranteed to stay stable between nanoarrow versions,
# so we keep them internal but use them in these functions to simplify the
# number of C functions we need to build all the types.
NANOARROW_TYPE <- list(
UNINITIALIZED = 0,
"NA" = 1L,
BOOL = 2L,
UINT8 = 3L,
INT8 = 4L,
UINT16 = 5L,
INT16 = 6L,
UINT32 = 7L,
INT32 = 8L,
UINT64 = 9L,
INT64 = 10L,
HALF_FLOAT = 11L,
FLOAT = 12L,
DOUBLE = 13L,
STRING = 14L,
BINARY = 15L,
FIXED_SIZE_BINARY = 16L,
DATE32 = 17L,
DATE64 = 18L,
TIMESTAMP = 19L,
TIME32 = 20L,
TIME64 = 21L,
INTERVAL_MONTHS = 22L,
INTERVAL_DAY_TIME = 23L,
DECIMAL128 = 24L,
DECIMAL256 = 25L,
LIST = 26L,
STRUCT = 27L,
SPARSE_UNION = 28L,
DENSE_UNION = 29L,
DICTIONARY = 30L,
MAP = 31L,
EXTENSION = 32L,
FIXED_SIZE_LIST = 33L,
DURATION = 34L,
LARGE_STRING = 35L,
LARGE_BINARY = 36L,
LARGE_LIST = 37L,
INTERVAL_MONTH_DAY_NANO = 38L
)
ARROW_FLAG <- list(
DICTIONARY_ORDERED = 1L,
NULLABLE = 2L,
MAP_KEYS_SORTED = 4L
)