blob: 8e6d0fdb3ac7f695fb5b1e7b6ca6506e7abde97c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#define R_NO_REMAP
#include <R.h>
#include <Rinternals.h>
#include "nanoarrow.h"
#include "altrep.h"
#include "array.h"
#include "array_view.h"
#include "convert.h"
#include "util.h"
// The common case of converting a single array into a single vector is
// defined here, powered by the generic conversion available via
// convert.h but special-casing the common case of "just use the defaults"
// (i.e., no need to allocate a zero-size ptype) and returning ALTREP
// where possible.
// borrow nanoarrow_c_infer_ptype() from infer_ptype.c
SEXP nanoarrow_c_infer_ptype(SEXP schema_xptr);
enum VectorType nanoarrow_infer_vector_type_array(SEXP array_xptr);
// This calls nanoarrow::convert_array() (via a package helper) to try S3
// dispatch to find a convert_array() method (or error if there
// isn't one)
static SEXP call_convert_array(SEXP array_xptr, SEXP ptype_sexp) {
SEXP fun = PROTECT(Rf_install("convert_fallback_other"));
// offset/length don't need to be modified in this case
SEXP call = PROTECT(Rf_lang5(fun, array_xptr, R_NilValue, R_NilValue, ptype_sexp));
SEXP result = PROTECT(Rf_eval(call, nanoarrow_ns_pkg));
UNPROTECT(3);
return result;
}
// Call stop_cant_convert_array(), which gives a more informative error
// message than we can provide in a reasonable amount of C code here.
// Because we opportunistically avoid allocating a ptype object, we might
// have to allocate one here.
static void call_stop_cant_convert_array(SEXP array_xptr, enum VectorType type,
SEXP ptype_sexp) {
SEXP fun = PROTECT(Rf_install("stop_cant_convert_array"));
if (ptype_sexp == R_NilValue) {
ptype_sexp = PROTECT(nanoarrow_alloc_type(type, 0));
SEXP call = PROTECT(Rf_lang3(fun, array_xptr, ptype_sexp));
Rf_eval(call, nanoarrow_ns_pkg);
UNPROTECT(3);
} else {
SEXP call = PROTECT(Rf_lang3(fun, array_xptr, ptype_sexp));
Rf_eval(call, nanoarrow_ns_pkg);
UNPROTECT(2);
}
}
static SEXP convert_array_default(SEXP array_xptr, enum VectorType vector_type,
SEXP ptype) {
SEXP converter_xptr;
if (ptype == R_NilValue) {
converter_xptr = PROTECT(nanoarrow_converter_from_type(vector_type));
} else {
converter_xptr = PROTECT(nanoarrow_converter_from_ptype(ptype));
}
if (nanoarrow_converter_set_schema(converter_xptr, array_xptr_get_schema(array_xptr)) !=
NANOARROW_OK) {
nanoarrow_converter_stop(converter_xptr);
}
if (nanoarrow_converter_set_array(converter_xptr, array_xptr) != NANOARROW_OK) {
nanoarrow_converter_stop(converter_xptr);
}
if (nanoarrow_converter_materialize_all(converter_xptr) != NANOARROW_OK) {
call_stop_cant_convert_array(array_xptr, vector_type, ptype);
}
if (nanoarrow_converter_finalize(converter_xptr) != NANOARROW_OK) {
nanoarrow_converter_stop(converter_xptr);
}
SEXP result = PROTECT(nanoarrow_converter_release_result(converter_xptr));
UNPROTECT(2);
return result;
}
static SEXP convert_array_chr(SEXP array_xptr, SEXP ptype_sexp) {
struct ArrowSchema* schema = schema_from_array_xptr(array_xptr);
struct ArrowSchemaView schema_view;
if (ArrowSchemaViewInit(&schema_view, schema, NULL) != NANOARROW_OK) {
Rf_error("Invalid schema");
}
// If array_xptr is an extension, use default conversion
int source_can_altrep;
switch (schema_view.type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
source_can_altrep = 1;
break;
default:
source_can_altrep = 0;
}
if (!source_can_altrep || schema_view.extension_name.size_bytes > 0) {
// Default conversion requires a ptype: resolve it if not already specified
if (ptype_sexp == R_NilValue) {
ptype_sexp = PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
SEXP default_result =
PROTECT(convert_array_default(array_xptr, VECTOR_TYPE_CHR, ptype_sexp));
UNPROTECT(2);
return default_result;
} else {
return convert_array_default(array_xptr, VECTOR_TYPE_CHR, ptype_sexp);
}
}
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
if (array->dictionary == NULL) {
SEXP result = PROTECT(nanoarrow_c_make_altrep_chr(array_xptr));
if (result == R_NilValue) {
call_stop_cant_convert_array(array_xptr, VECTOR_TYPE_CHR, R_NilValue);
}
UNPROTECT(1);
return result;
} else {
return convert_array_default(array_xptr, VECTOR_TYPE_CHR, R_NilValue);
}
}
SEXP nanoarrow_c_convert_array(SEXP array_xptr, SEXP ptype_sexp);
static SEXP convert_array_data_frame(SEXP array_xptr, SEXP ptype_sexp) {
struct ArrowSchema* schema = schema_from_array_xptr(array_xptr);
struct ArrowSchemaView schema_view;
if (ArrowSchemaViewInit(&schema_view, schema, NULL) != NANOARROW_OK) {
Rf_error("Invalid schema");
}
// If array_xptr is an extension, union, or the ptype isn't a data.frame
// use convert/materialize convert behaviour.
// Default conversion requires a ptype: resolve it if not already specified
if (schema_view.storage_type != NANOARROW_TYPE_STRUCT ||
schema_view.extension_name.size_bytes > 0 ||
(ptype_sexp != R_NilValue && !Rf_inherits(ptype_sexp, "data.frame"))) {
if (ptype_sexp == R_NilValue) {
ptype_sexp = PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
SEXP default_result =
PROTECT(convert_array_default(array_xptr, VECTOR_TYPE_OTHER, ptype_sexp));
UNPROTECT(2);
return default_result;
} else {
return convert_array_default(array_xptr, VECTOR_TYPE_DATA_FRAME, ptype_sexp);
}
}
struct ArrowArray* array = nanoarrow_array_from_xptr(array_xptr);
R_xlen_t n_col = array->n_children;
SEXP result = PROTECT(Rf_allocVector(VECSXP, n_col));
if (ptype_sexp == R_NilValue) {
SEXP result_names = PROTECT(Rf_allocVector(STRSXP, n_col));
for (R_xlen_t i = 0; i < n_col; i++) {
SEXP child_xptr = PROTECT(borrow_array_child_xptr(array_xptr, i));
SET_VECTOR_ELT(result, i, nanoarrow_c_convert_array(child_xptr, R_NilValue));
UNPROTECT(1);
struct ArrowSchema* schema = schema_from_array_xptr(child_xptr);
if (schema->name != NULL) {
SET_STRING_ELT(result_names, i, Rf_mkCharCE(schema->name, CE_UTF8));
} else {
SET_STRING_ELT(result_names, i, Rf_mkChar(""));
}
}
Rf_setAttrib(result, R_NamesSymbol, result_names);
Rf_setAttrib(result, R_ClassSymbol, nanoarrow_cls_data_frame);
UNPROTECT(1);
} else {
if (n_col != Rf_xlength(ptype_sexp)) {
Rf_error("Expected data.frame() ptype with %ld column(s) but found %ld column(s)",
(long)n_col, (long)Rf_xlength(ptype_sexp));
}
for (R_xlen_t i = 0; i < n_col; i++) {
SEXP child_xptr = PROTECT(borrow_array_child_xptr(array_xptr, i));
SEXP child_ptype = VECTOR_ELT(ptype_sexp, i);
SET_VECTOR_ELT(result, i, nanoarrow_c_convert_array(child_xptr, child_ptype));
UNPROTECT(1);
}
Rf_setAttrib(result, R_NamesSymbol, Rf_getAttrib(ptype_sexp, R_NamesSymbol));
Rf_copyMostAttrib(ptype_sexp, result);
}
if (Rf_inherits(result, "data.frame")) {
nanoarrow_set_rownames(result, array->length);
}
UNPROTECT(1);
return result;
}
SEXP nanoarrow_c_convert_array(SEXP array_xptr, SEXP ptype_sexp) {
// See if we can skip any ptype resolution at all
if (ptype_sexp == R_NilValue) {
enum VectorType vector_type = nanoarrow_infer_vector_type_array(array_xptr);
switch (vector_type) {
case VECTOR_TYPE_LGL:
case VECTOR_TYPE_INT:
case VECTOR_TYPE_DBL:
return convert_array_default(array_xptr, vector_type, R_NilValue);
case VECTOR_TYPE_CHR:
return convert_array_chr(array_xptr, ptype_sexp);
case VECTOR_TYPE_DATA_FRAME:
return convert_array_data_frame(array_xptr, R_NilValue);
default:
break;
}
// Otherwise, resolve the ptype and use it (this will also error
// for ptypes that can't be resolved)
ptype_sexp = PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
SEXP result = nanoarrow_c_convert_array(array_xptr, ptype_sexp);
UNPROTECT(1);
return result;
}
// Handle some S3 objects internally to avoid S3 dispatch
// (e.g., when looping over a data frame with a lot of columns)
if (Rf_isObject(ptype_sexp)) {
if (nanoarrow_ptype_is_data_frame(ptype_sexp)) {
return convert_array_data_frame(array_xptr, ptype_sexp);
} else if (Rf_inherits(ptype_sexp, "vctrs_unspecified") ||
Rf_inherits(ptype_sexp, "blob") ||
Rf_inherits(ptype_sexp, "vctrs_list_of") ||
Rf_inherits(ptype_sexp, "Date") || Rf_inherits(ptype_sexp, "hms") ||
Rf_inherits(ptype_sexp, "POSIXct") ||
Rf_inherits(ptype_sexp, "difftime") ||
Rf_inherits(ptype_sexp, "integer64")) {
return convert_array_default(array_xptr, VECTOR_TYPE_UNINITIALIZED, ptype_sexp);
} else {
return call_convert_array(array_xptr, ptype_sexp);
}
}
// If we're here, these are non-S3 objects
switch (TYPEOF(ptype_sexp)) {
case LGLSXP:
return convert_array_default(array_xptr, VECTOR_TYPE_LGL, ptype_sexp);
case INTSXP:
return convert_array_default(array_xptr, VECTOR_TYPE_INT, ptype_sexp);
case REALSXP:
return convert_array_default(array_xptr, VECTOR_TYPE_DBL, ptype_sexp);
case STRSXP:
return convert_array_chr(array_xptr, ptype_sexp);
default:
return call_convert_array(array_xptr, ptype_sexp);
}
}