| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "./arrow_types.h" |
| |
| #include <arrow/array.h> |
| #include <arrow/chunk_resolver.h> |
| #include <arrow/chunked_array.h> |
| #include <arrow/compute/api.h> |
| #include <arrow/util/bitmap_reader.h> |
| #include <arrow/visit_data_inline.h> |
| |
| #include <cpp11/declarations.hpp> |
| |
| #include <R_ext/Altrep.h> |
| |
| #include "./r_task_group.h" |
| |
| // defined in array_to_vector.cpp |
| SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array); |
| |
| namespace arrow { |
| namespace r { |
| namespace altrep { |
| |
| namespace { |
| template <typename c_type> |
| R_xlen_t Standard_Get_region(SEXP data2, R_xlen_t i, R_xlen_t n, c_type* buf); |
| |
| template <> |
| R_xlen_t Standard_Get_region<double>(SEXP data2, R_xlen_t i, R_xlen_t n, double* buf) { |
| return REAL_GET_REGION(data2, i, n, buf); |
| } |
| |
| template <> |
| R_xlen_t Standard_Get_region<int>(SEXP data2, R_xlen_t i, R_xlen_t n, int* buf) { |
| return INTEGER_GET_REGION(data2, i, n, buf); |
| } |
| |
| template <typename T> |
| void DeletePointer(std::shared_ptr<T>* ptr) { |
| delete ptr; |
| } |
| |
| template <typename T> |
| using Pointer = cpp11::external_pointer<std::shared_ptr<T>, DeletePointer<T>>; |
| |
| using ChunkResolver = arrow::ChunkResolver; |
| using ChunkLocation = arrow::ChunkLocation; |
| |
| class ArrowAltrepData { |
| public: |
| explicit ArrowAltrepData(const std::shared_ptr<ChunkedArray>& chunked_array) |
| : chunked_array_(chunked_array), resolver_(chunked_array->chunks()) {} |
| |
| const std::shared_ptr<ChunkedArray>& chunked_array() { return chunked_array_; } |
| |
| ChunkLocation locate(int64_t index) { return resolver_.Resolve(index); } |
| |
| private: |
| std::shared_ptr<ChunkedArray> chunked_array_; |
| ChunkResolver resolver_; |
| }; |
| |
| // the ChunkedArray that is being wrapped by the altrep object |
| const std::shared_ptr<ChunkedArray>& GetChunkedArray(SEXP alt) { |
| auto array_data = |
| reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt))); |
| return array_data->chunked_array(); |
| } |
| |
| // base class for all altrep vectors |
| // |
| // data1: the Array as an external pointer; becomes NULL when |
| // materialization is needed. |
| // data2: starts as NULL, and becomes a standard R vector with the same |
| // data if necessary: if materialization is needed, e.g. if we need |
| // to access its data pointer, with INTEGER(), REAL(), etc. |
| template <typename Impl> |
| struct AltrepVectorBase { |
| // store the Array as an external pointer in data1, mark as immutable |
| static SEXP Make(const std::shared_ptr<ChunkedArray>& chunked_array) { |
| SEXP alt = R_new_altrep( |
| Impl::class_t, |
| external_pointer<ArrowAltrepData>(new ArrowAltrepData(chunked_array)), |
| R_NilValue); |
| MARK_NOT_MUTABLE(alt); |
| |
| return alt; |
| } |
| |
| // Is the vector materialized, i.e. does the data2 slot contain a |
| // standard R vector with the same data as the array. |
| static bool IsMaterialized(SEXP alt) { return !Rf_isNull(Impl::Representation(alt)); } |
| |
| static R_xlen_t Length(SEXP alt) { |
| if (IsMaterialized(alt)) { |
| return Rf_xlength(Representation(alt)); |
| } else { |
| return GetChunkedArray(alt)->length(); |
| } |
| } |
| |
| static int No_NA(SEXP alt) { |
| if (IsMaterialized(alt)) { |
| return false; |
| } |
| |
| return GetChunkedArray(alt)->null_count() == 0; |
| } |
| |
| static int Is_sorted(SEXP alt) { return UNKNOWN_SORTEDNESS; } |
| |
| // What gets printed on .Internal(inspect(<the altrep object>)) |
| static Rboolean Inspect(SEXP alt, int pre, int deep, int pvec, |
| void (*inspect_subtree)(SEXP, int, int, int)) { |
| SEXP class_sym = R_altrep_class_name(alt); |
| const char* class_name = CHAR(PRINTNAME(class_sym)); |
| |
| if (IsMaterialized(alt)) { |
| Rprintf("materialized %s len=%ld\n", class_name, |
| static_cast<long>(Rf_xlength(Representation(alt)))); // NOLINT: runtime/int |
| } else { |
| const auto& chunked_array = GetChunkedArray(alt); |
| Rprintf("%s<%p, %s, %d chunks, %ld nulls> len=%ld\n", class_name, |
| reinterpret_cast<void*>(chunked_array.get()), |
| chunked_array->type()->ToString().c_str(), chunked_array->num_chunks(), |
| static_cast<long>(chunked_array->null_count()), // NOLINT: runtime/int |
| static_cast<long>(chunked_array->length())); // NOLINT: runtime/int |
| } |
| |
| return TRUE; |
| } |
| |
| // Materialize and then duplicate data2 |
| static SEXP Duplicate(SEXP alt, Rboolean /* deep */) { |
| return Rf_duplicate(Impl::Materialize(alt)); |
| } |
| |
| static SEXP Coerce(SEXP alt, int type) { |
| return Rf_coerceVector(Impl::Materialize(alt), type); |
| } |
| |
| static SEXP Serialized_state(SEXP alt) { return Impl::Materialize(alt); } |
| |
| static SEXP Unserialize(SEXP /* class_ */, SEXP state) { return state; } |
| |
| // default methods used when data2 is the representation |
| // this is overridden when data2 needs to be richer (e.g. for factors) |
| static SEXP Representation(SEXP alt) { return R_altrep_data2(alt); } |
| |
| static void SetRepresentation(SEXP alt, SEXP x) { R_set_altrep_data2(alt, x); } |
| }; |
| |
| // altrep R vector shadowing an primitive (int or double) Array. |
| // |
| // This tries as much as possible to directly use the data |
| // from the Array and minimize data copies. |
| template <int sexp_type> |
| struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sexp_type>> { |
| using Base = AltrepVectorBase<AltrepVectorPrimitive<sexp_type>>; |
| |
| // singleton altrep class description |
| static R_altrep_class_t class_t; |
| |
| using c_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; |
| using Base::IsMaterialized; |
| using Base::Representation; |
| using Base::SetRepresentation; |
| |
| // Force materialization. After calling this, the data2 slot of the altrep |
| // object contains a standard R vector with the same data, with |
| // R sentinels where the Array has nulls. This method also releases the |
| // reference to the original ChunkedArray. |
| static SEXP Materialize(SEXP alt) { |
| if (!IsMaterialized(alt)) { |
| auto size = Base::Length(alt); |
| |
| // create a standard R vector |
| SEXP copy = PROTECT(Rf_allocVector(sexp_type, size)); |
| |
| // copy the data from the array, through Get_region |
| if constexpr (std::is_same_v<c_type, double>) { |
| Get_region(alt, 0, size, REAL(copy)); |
| } else if constexpr (std::is_same_v<c_type, int>) { |
| Get_region(alt, 0, size, INTEGER(copy)); |
| } else { |
| static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>, |
| "ALTREP not implemented for this c_type"); |
| } |
| |
| // store as data2, this is now considered materialized |
| SetRepresentation(alt, copy); |
| |
| // we no longer need the original ChunkedArray (keeping it alive uses more |
| // memory than is required, since our methods can now use the |
| // materialized array) |
| R_set_altrep_data1(alt, R_NilValue); |
| |
| UNPROTECT(1); |
| } |
| return Representation(alt); |
| } |
| |
| // R calls this to get a pointer to the start of the vector data |
| // but only if this is possible without allocating (in the R sense). |
| static const void* Dataptr_or_null(SEXP alt) { |
| // data2 has been created, and so the R sentinels are in place where the array has |
| // nulls |
| if (IsMaterialized(alt)) { |
| return DATAPTR_RO(Representation(alt)); |
| } |
| |
| // there is only one chunk with no nulls, we can directly return the start of its data |
| auto chunked_array = GetChunkedArray(alt); |
| if (chunked_array->num_chunks() == 1 && chunked_array->null_count() == 0) { |
| return reinterpret_cast<const void*>( |
| chunked_array->chunk(0)->data()->template GetValues<c_type>(1)); |
| } |
| |
| // Otherwise: if the array has nulls and data2 has not been generated: give up |
| return nullptr; |
| } |
| |
| // R calls this to get a pointer to the start of the data, R allocations are allowed. |
| static void* Dataptr(SEXP alt, Rboolean writeable) { |
| // If the object hasn't been materialized, and the array has no |
| // nulls we can directly point to the array data. |
| if (!IsMaterialized(alt)) { |
| const auto& chunked_array = GetChunkedArray(alt); |
| |
| if (chunked_array->num_chunks() == 1 && chunked_array->null_count() == 0) { |
| return reinterpret_cast<void*>(const_cast<c_type*>( |
| chunked_array->chunk(0)->data()->template GetValues<c_type>(1))); |
| } |
| } |
| |
| // Otherwise we have to materialize and hand the pointer to data2 |
| if constexpr (std::is_same_v<c_type, double>) { |
| return REAL(Materialize(alt)); |
| } else if constexpr (std::is_same_v<c_type, int>) { |
| return INTEGER(Materialize(alt)); |
| } else { |
| static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>, |
| "ALTREP not implemented for this c_type"); |
| } |
| } |
| |
| // The value at position i |
| static c_type Elt(SEXP alt, R_xlen_t i) { |
| if (IsMaterialized(alt)) { |
| if constexpr (std::is_same_v<c_type, double>) { |
| return REAL(Representation(alt))[i]; |
| } else if constexpr (std::is_same_v<c_type, int>) { |
| return INTEGER(Representation(alt))[i]; |
| } else { |
| static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, int>, |
| "ALTREP not implemented for this c_type"); |
| } |
| } |
| |
| auto altrep_data = |
| reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt))); |
| auto resolve = altrep_data->locate(i); |
| const auto& array = |
| altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index)); |
| auto j = resolve.index_in_chunk; |
| |
| return array->IsNull(j) ? cpp11::na<c_type>() |
| : array->data()->template GetValues<c_type>(1)[j]; |
| } |
| |
| // R calls this when it wants data from position `i` to `i + n` copied into `buf` |
| // The returned value is the number of values that were really copied |
| // (this can be lower than n) |
| static R_xlen_t Get_region(SEXP alt, R_xlen_t i, R_xlen_t n, c_type* buf) { |
| // If we have data2, we can just copy the region into buf |
| // using the standard Get_region for this R type |
| if (IsMaterialized(alt)) { |
| return Standard_Get_region<c_type>(Representation(alt), i, n, buf); |
| } |
| |
| // The vector was not materialized, aka we don't have data2 |
| // |
| // In that case, we copy the data from the Array, and then |
| // do a second pass to force the R sentinels for where the |
| // array has nulls |
| // |
| // This only materializes the region into buf (not the entire vector). |
| auto slice = GetChunkedArray(alt)->Slice(i, n); |
| R_xlen_t ncopy = slice->length(); |
| |
| c_type* out = buf; |
| for (const auto& array : slice->chunks()) { |
| auto n_i = array->length(); |
| |
| // first copy the data buffer |
| memcpy(out, array->data()->template GetValues<c_type>(1), n_i * sizeof(c_type)); |
| |
| // then set the R NA sentinels if needed |
| if (array->null_count() > 0) { |
| internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), |
| array->offset(), n_i); |
| |
| for (R_xlen_t j = 0; j < n_i; j++, bitmap_reader.Next()) { |
| if (bitmap_reader.IsNotSet()) { |
| out[j] = cpp11::na<c_type>(); |
| } |
| } |
| } |
| |
| out += n_i; |
| } |
| |
| return ncopy; |
| } |
| |
| static std::shared_ptr<arrow::compute::ScalarAggregateOptions> NaRmOptions(bool na_rm) { |
| auto options = std::make_shared<arrow::compute::ScalarAggregateOptions>( |
| arrow::compute::ScalarAggregateOptions::Defaults()); |
| options->min_count = 0; |
| options->skip_nulls = na_rm; |
| return options; |
| } |
| |
| template <bool Min> |
| static SEXP MinMax(SEXP alt, Rboolean narm) { |
| if (IsMaterialized(alt)) { |
| return nullptr; |
| } |
| |
| using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; |
| using scalar_type = |
| typename std::conditional<sexp_type == INTSXP, Int32Scalar, DoubleScalar>::type; |
| |
| const auto& chunked_array = GetChunkedArray(alt); |
| bool na_rm = narm == TRUE; |
| auto n = chunked_array->length(); |
| auto null_count = chunked_array->null_count(); |
| if ((na_rm || n == 0) && null_count == n) { |
| if (Min) { |
| Rf_warning("no non-missing arguments to min; returning Inf"); |
| return Rf_ScalarReal(R_PosInf); |
| } else { |
| Rf_warning("no non-missing arguments to max; returning -Inf"); |
| return Rf_ScalarReal(R_NegInf); |
| } |
| } |
| if (!na_rm && null_count > 0) { |
| return cpp11::as_sexp(cpp11::na<data_type>()); |
| } |
| |
| auto options = NaRmOptions(na_rm); |
| |
| const auto& minmax = ValueOrStop( |
| arrow::compute::CallFunction("min_max", {chunked_array}, options.get())); |
| const auto& minmax_scalar = |
| internal::checked_cast<const StructScalar&>(*minmax.scalar()); |
| |
| const auto& result_scalar = internal::checked_cast<const scalar_type&>( |
| *ValueOrStop(minmax_scalar.field(Min ? "min" : "max"))); |
| return cpp11::as_sexp(result_scalar.value); |
| } |
| |
| static SEXP Min(SEXP alt, Rboolean narm) { return MinMax<true>(alt, narm); } |
| |
| static SEXP Max(SEXP alt, Rboolean narm) { return MinMax<false>(alt, narm); } |
| |
| static SEXP Sum(SEXP alt, Rboolean narm) { |
| if (IsMaterialized(alt)) { |
| return nullptr; |
| } |
| |
| using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type; |
| |
| const auto& chunked_array = GetChunkedArray(alt); |
| bool na_rm = narm == TRUE; |
| auto null_count = chunked_array->null_count(); |
| |
| if (!na_rm && null_count > 0) { |
| return cpp11::as_sexp(cpp11::na<data_type>()); |
| } |
| auto options = NaRmOptions(na_rm); |
| |
| const auto& sum = |
| ValueOrStop(arrow::compute::CallFunction("sum", {chunked_array}, options.get())); |
| |
| if (sexp_type == INTSXP) { |
| // When calling the "sum" function on an int32 array, we get an Int64 scalar |
| // in case of overflow, make it a double like R |
| int64_t value = internal::checked_cast<const Int64Scalar&>(*sum.scalar()).value; |
| if (value <= INT32_MIN || value > INT32_MAX) { |
| return Rf_ScalarReal(static_cast<double>(value)); |
| } else { |
| return Rf_ScalarInteger(static_cast<int>(value)); |
| } |
| } else { |
| return Rf_ScalarReal( |
| internal::checked_cast<const DoubleScalar&>(*sum.scalar()).value); |
| } |
| } |
| }; |
| template <int sexp_type> |
| R_altrep_class_t AltrepVectorPrimitive<sexp_type>::class_t; |
| |
| struct AltrepFactor : public AltrepVectorBase<AltrepFactor> { |
| // singleton altrep class description |
| static R_altrep_class_t class_t; |
| |
| using Base = AltrepVectorBase<AltrepFactor>; |
| using Base::IsMaterialized; |
| |
| static R_xlen_t Length(SEXP alt) { |
| if (IsMaterialized(alt)) { |
| return Rf_xlength(Representation(alt)); |
| } else { |
| return GetChunkedArray(alt)->length(); |
| } |
| } |
| |
| // redefining because data2 is a paired list with the representation as the |
| // first node: the CAR |
| static SEXP Representation(SEXP alt) { return CAR(R_altrep_data2(alt)); } |
| |
| static void SetRepresentation(SEXP alt, SEXP x) { SETCAR(R_altrep_data2(alt), x); } |
| |
| // The CADR(data2) is used to store the transposed arrays when unification is needed |
| // In that case we store a vector of Buffers |
| using BufferVector = std::vector<std::shared_ptr<Buffer>>; |
| |
| static bool WasUnified(SEXP alt) { return !Rf_isNull(CADR(R_altrep_data2(alt))); } |
| |
| static const std::shared_ptr<Buffer>& GetArrayTransposed(SEXP alt, int i) { |
| const auto& arrays = *Pointer<BufferVector>(CADR(R_altrep_data2(alt))); |
| return (*arrays)[i]; |
| } |
| |
| static SEXP Make(const std::shared_ptr<ChunkedArray>& chunked_array) { |
| // only dealing with dictionaries of strings |
| if (internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(0)) |
| .dictionary() |
| ->type_id() != Type::STRING) { |
| return R_NilValue; |
| } |
| |
| bool need_unification = DictionaryChunkArrayNeedUnification(chunked_array); |
| |
| std::shared_ptr<Array> dictionary; |
| SEXP pointer_arrays_transpose; |
| |
| if (need_unification) { |
| const auto& arr_type = |
| internal::checked_cast<const DictionaryType&>(*chunked_array->type()); |
| std::unique_ptr<arrow::DictionaryUnifier> unifier_ = |
| ValueOrStop(DictionaryUnifier::Make(arr_type.value_type())); |
| |
| int n_arrays = chunked_array->num_chunks(); |
| BufferVector arrays_transpose(n_arrays); |
| |
| for (int i = 0; i < n_arrays; i++) { |
| const auto& dict_i = |
| *internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)) |
| .dictionary(); |
| StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose[i])); |
| } |
| |
| std::shared_ptr<DataType> out_type; |
| StopIfNotOk(unifier_->GetResult(&out_type, &dictionary)); |
| |
| Pointer<BufferVector> ptr( |
| new std::shared_ptr<BufferVector>(new BufferVector(arrays_transpose))); |
| pointer_arrays_transpose = PROTECT(ptr); |
| } else { |
| // just use the first one |
| const auto& dict_array = |
| internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(0)); |
| dictionary = dict_array.dictionary(); |
| |
| pointer_arrays_transpose = PROTECT(R_NilValue); |
| } |
| |
| // the chunked array as data1 |
| SEXP data1 = |
| PROTECT(external_pointer<ArrowAltrepData>(new ArrowAltrepData(chunked_array))); |
| |
| // a pairlist with the representation in the first node |
| SEXP data2 = PROTECT(Rf_list2(R_NilValue, // representation, empty at first |
| pointer_arrays_transpose)); |
| |
| SEXP alt = PROTECT(R_new_altrep(class_t, data1, data2)); |
| MARK_NOT_MUTABLE(alt); |
| |
| // set factor attributes |
| Rf_setAttrib(alt, R_LevelsSymbol, Array__as_vector(dictionary)); |
| |
| if (internal::checked_cast<const DictionaryType&>(*chunked_array->type()).ordered()) { |
| Rf_classgets(alt, arrow::r::data::classes_ordered); |
| } else { |
| Rf_classgets(alt, arrow::r::data::classes_factor); |
| } |
| |
| UNPROTECT(4); |
| return alt; |
| } |
| |
| // TODO: this is similar to the primitive Materialize |
| static SEXP Materialize(SEXP alt) { |
| if (!IsMaterialized(alt)) { |
| auto size = Base::Length(alt); |
| |
| // create a standard R vector |
| SEXP copy = PROTECT(Rf_allocVector(INTSXP, size)); |
| |
| // copy the data from the array, through Get_region |
| Get_region(alt, 0, size, INTEGER(copy)); |
| |
| // store as data2, this is now considered materialized |
| SetRepresentation(alt, copy); |
| |
| // remove the ChunkedArray reference |
| R_set_altrep_data1(alt, R_NilValue); |
| |
| UNPROTECT(1); |
| } |
| return Representation(alt); |
| } |
| |
| static const void* Dataptr_or_null(SEXP alt) { |
| if (IsMaterialized(alt)) { |
| return DATAPTR_RO(Representation(alt)); |
| } |
| |
| return nullptr; |
| } |
| |
| static void* Dataptr(SEXP alt, Rboolean writeable) { return INTEGER(Materialize(alt)); } |
| |
| static SEXP Duplicate(SEXP alt, Rboolean /* deep */) { |
| // the representation integer vector |
| SEXP dup = PROTECT(Rf_shallow_duplicate(Materialize(alt))); |
| |
| // copy attributes from the altrep object |
| DUPLICATE_ATTRIB(dup, alt); |
| |
| UNPROTECT(1); |
| return dup; |
| } |
| |
| // The value at position i as an int64_t (to make bounds checking less verbose) |
| static int64_t Elt64(SEXP alt, R_xlen_t i) { |
| auto altrep_data = |
| reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt))); |
| auto resolve = altrep_data->locate(i); |
| |
| const auto& array = |
| altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index)); |
| auto j = resolve.index_in_chunk; |
| |
| if (!array->IsNull(j)) { |
| const auto& indices = |
| internal::checked_cast<const DictionaryArray&>(*array).indices(); |
| |
| if (WasUnified(alt)) { |
| const auto* transpose_data = reinterpret_cast<const int32_t*>( |
| GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data()); |
| |
| switch (indices->type_id()) { |
| case Type::UINT8: |
| return transpose_data[indices->data()->GetValues<uint8_t>(1)[j]] + 1; |
| case Type::INT8: |
| return transpose_data[indices->data()->GetValues<int8_t>(1)[j]] + 1; |
| case Type::UINT16: |
| return transpose_data[indices->data()->GetValues<uint16_t>(1)[j]] + 1; |
| case Type::INT16: |
| return transpose_data[indices->data()->GetValues<int16_t>(1)[j]] + 1; |
| case Type::INT32: |
| return transpose_data[indices->data()->GetValues<int32_t>(1)[j]] + 1; |
| case Type::UINT32: |
| return transpose_data[indices->data()->GetValues<uint32_t>(1)[j]] + 1; |
| case Type::INT64: |
| return transpose_data[indices->data()->GetValues<int64_t>(1)[j]] + 1; |
| case Type::UINT64: |
| return transpose_data[indices->data()->GetValues<uint64_t>(1)[j]] + 1; |
| default: |
| break; |
| } |
| } else { |
| switch (indices->type_id()) { |
| case Type::UINT8: |
| return indices->data()->GetValues<uint8_t>(1)[j] + 1; |
| case Type::INT8: |
| return indices->data()->GetValues<int8_t>(1)[j] + 1; |
| case Type::UINT16: |
| return indices->data()->GetValues<uint16_t>(1)[j] + 1; |
| case Type::INT16: |
| return indices->data()->GetValues<int16_t>(1)[j] + 1; |
| case Type::INT32: |
| return indices->data()->GetValues<int32_t>(1)[j] + 1; |
| case Type::UINT32: |
| return indices->data()->GetValues<uint32_t>(1)[j] + 1; |
| case Type::INT64: |
| return indices->data()->GetValues<int64_t>(1)[j] + 1; |
| case Type::UINT64: |
| return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1); |
| default: |
| break; |
| } |
| } |
| } |
| |
| // not reached |
| return NA_INTEGER; |
| } |
| |
| // The value at position i as an int (which R needs because this is a factor) |
| static int Elt(SEXP alt, R_xlen_t i) { |
| if (Base::IsMaterialized(alt)) { |
| return INTEGER_ELT(Representation(alt), i); |
| } |
| |
| int64_t elt64 = Elt64(alt, i); |
| ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1); |
| ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max()); |
| return static_cast<int>(elt64); |
| } |
| |
| static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) { |
| // If we have data2, we can just copy the region into buf |
| // using the standard Get_region for this R type |
| if (Base::IsMaterialized(alt)) { |
| return Standard_Get_region<int>(Representation(alt), start, n, buf); |
| } |
| |
| auto chunked_array = GetChunkedArray(alt); |
| |
| // get out if there is nothing to do |
| auto chunked_array_size = chunked_array->length(); |
| if (start >= chunked_array_size) return 0; |
| |
| auto slice = GetChunkedArray(alt)->Slice(start, n); |
| |
| if (WasUnified(alt)) { |
| int j = 0; |
| |
| // find out which is the first chunk of the chunk array |
| // that is present in the slice, because the main loop |
| // needs to refer to the correct transpose buffers |
| int64_t k = 0; |
| for (; j < chunked_array->num_chunks(); j++) { |
| auto nj = chunked_array->chunk(j)->length(); |
| if (k + nj > start) { |
| break; |
| } |
| |
| k += nj; |
| } |
| |
| int* out = buf; |
| for (const auto& array : slice->chunks()) { |
| const auto& indices = |
| internal::checked_cast<const DictionaryArray&>(*array).indices(); |
| |
| // using the transpose data for this chunk |
| const auto* transpose_data = |
| reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data()); |
| auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; }; |
| |
| GetRegionDispatch(array, indices, transpose, out); |
| |
| out += array->length(); |
| j++; |
| } |
| |
| } else { |
| // simpler case, identity transpose |
| auto transpose = [](int64_t x) { return static_cast<int>(x); }; |
| |
| int* out = buf; |
| for (const auto& array : slice->chunks()) { |
| const auto& indices = |
| internal::checked_cast<const DictionaryArray&>(*array).indices(); |
| |
| GetRegionDispatch(array, indices, transpose, out); |
| |
| out += array->length(); |
| } |
| } |
| |
| return slice->length(); |
| } |
| |
| #define CALL_GET_REGION_TRANSPOSE(TYPE_CLASS) \ |
| case TYPE_CLASS##Type::type_id: \ |
| GetRegionTranspose<TYPE_CLASS##Type>(array, indices, \ |
| std::forward<Transpose>(transpose), out); \ |
| break |
| |
| template <typename Transpose> |
| static void GetRegionDispatch(const std::shared_ptr<Array>& array, |
| const std::shared_ptr<Array>& indices, |
| Transpose&& transpose, int* out) { |
| switch (indices->type_id()) { |
| ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_GET_REGION_TRANSPOSE); |
| default: |
| break; |
| } |
| } |
| |
| template <typename Type, typename Transpose> |
| static void GetRegionTranspose(const std::shared_ptr<Array>& array, |
| const std::shared_ptr<Array>& indices, |
| Transpose transpose, int* out) { |
| using index_type = typename Type::c_type; |
| |
| VisitArraySpanInline<Type>( |
| *array->data(), |
| /*valid_func=*/ |
| [&](index_type index) { |
| int64_t transposed = transpose(index) + 1; |
| ARROW_R_DCHECK(transposed >= 1); |
| ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max()); |
| *out++ = static_cast<int>(transposed); |
| }, |
| /*null_func=*/[&]() { *out++ = cpp11::na<int>(); }); |
| } |
| |
| static SEXP Min(SEXP alt, Rboolean narm) { return nullptr; } |
| static SEXP Max(SEXP alt, Rboolean narm) { return nullptr; } |
| static SEXP Sum(SEXP alt, Rboolean narm) { return nullptr; } |
| }; |
| R_altrep_class_t AltrepFactor::class_t; |
| |
| // Implementation for string arrays |
| template <typename Type> |
| struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> { |
| using Base = AltrepVectorBase<AltrepVectorString<Type>>; |
| |
| static R_altrep_class_t class_t; |
| using StringArrayType = typename TypeTraits<Type>::ArrayType; |
| |
| using Base::Representation; |
| using Base::SetRepresentation; |
| |
| static SEXP Make(const std::shared_ptr<ChunkedArray>& chunked_array) { |
| string_viewer().set_strip_out_nuls(GetBoolOption("arrow.skip_nul", false)); |
| return Base::Make(chunked_array); |
| } |
| |
| // Helper class to convert to R strings. We declare one of these for the |
| // class to avoid having to stack-allocate one for every STRING_ELT call. |
| // This class does not own a reference to any arrays: it is the caller's |
| // responsibility to ensure the Array lifetime exceeds that of the viewer. |
| struct RStringViewer { |
| RStringViewer() : strip_out_nuls_(false), nul_was_stripped_(false) {} |
| |
| void reset_null_was_stripped() { nul_was_stripped_ = false; } |
| |
| void set_strip_out_nuls(bool strip_out_nuls) { strip_out_nuls_ = strip_out_nuls; } |
| |
| // convert the i'th string of the Array to an R string (CHARSXP) |
| SEXP Convert(size_t i) { |
| if (array_->IsNull(i)) { |
| return NA_STRING; |
| } |
| |
| view_ = string_array_->GetView(i); |
| bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end(); |
| |
| if (no_nul) { |
| ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max()); |
| return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8); |
| } else if (strip_out_nuls_) { |
| return ConvertStripNul(); |
| } else { |
| Error(); |
| |
| // not reached |
| return R_NilValue; |
| } |
| } |
| |
| // strip the nuls and then convert to R string |
| SEXP ConvertStripNul() { |
| const char* old_string = view_.data(); |
| |
| size_t stripped_len = 0, nul_count = 0; |
| |
| for (size_t i = 0; i < view_.size(); i++) { |
| if (old_string[i] == '\0') { |
| ++nul_count; |
| |
| if (nul_count == 1) { |
| // first nul spotted: allocate stripped string storage |
| stripped_string_.assign(view_.begin(), view_.end()); |
| stripped_len = i; |
| } |
| |
| // don't copy old_string[i] (which is \0) into stripped_string |
| continue; |
| } |
| |
| if (nul_count > 0) { |
| stripped_string_[stripped_len++] = old_string[i]; |
| } |
| } |
| |
| nul_was_stripped_ = true; |
| ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max()); |
| return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len), |
| CE_UTF8); |
| } |
| |
| bool nul_was_stripped() const { return nul_was_stripped_; } |
| |
| // throw R error about embedded nul |
| void Error() { |
| stripped_string_ = "embedded nul in string: '"; |
| for (char c : view_) { |
| if (c) { |
| stripped_string_ += c; |
| } else { |
| stripped_string_ += "\\0"; |
| } |
| } |
| |
| stripped_string_ += |
| "'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul " |
| "= TRUE)"; |
| |
| Rf_error("%s", stripped_string_.c_str()); |
| } |
| |
| void SetArray(const std::shared_ptr<Array>& array) { |
| array_ = array.get(); |
| string_array_ = internal::checked_cast<const StringArrayType*>(array.get()); |
| } |
| |
| const Array* array_; |
| const StringArrayType* string_array_; |
| std::string stripped_string_; |
| bool strip_out_nuls_; |
| bool nul_was_stripped_; |
| std::string_view view_; |
| }; |
| |
| // Get a single string as a CHARSXP SEXP |
| static SEXP Elt(SEXP alt, R_xlen_t i) { |
| if (Base::IsMaterialized(alt)) { |
| return STRING_ELT(Representation(alt), i); |
| } |
| |
| auto altrep_data = |
| reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt))); |
| auto resolve = altrep_data->locate(i); |
| const auto& array = |
| altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index)); |
| auto j = resolve.index_in_chunk; |
| |
| SEXP s = NA_STRING; |
| RStringViewer& r_string_viewer = string_viewer(); |
| r_string_viewer.SetArray(array); |
| // Note: we don't check GetBoolOption("arrow.skip_nul", false) here |
| // because it is too expensive to do so. We do set this value whenever |
| // an altrep string; however, there is a chance that this value could |
| // be out of date by the time a value in the vector is accessed. |
| r_string_viewer.reset_null_was_stripped(); |
| s = r_string_viewer.Convert(j); |
| if (r_string_viewer.nul_was_stripped()) { |
| Rf_warning("Stripping '\\0' (nul) from character vector"); |
| } |
| |
| return s; |
| } |
| |
| static void* Dataptr(SEXP alt, Rboolean writeable) { |
| return const_cast<void*>(DATAPTR_RO(Materialize(alt))); |
| } |
| |
| static SEXP Materialize(SEXP alt) { |
| if (Base::IsMaterialized(alt)) { |
| return Representation(alt); |
| } |
| |
| const auto& chunked_array = GetChunkedArray(alt); |
| SEXP data2 = PROTECT(Rf_allocVector(STRSXP, chunked_array->length())); |
| MARK_NOT_MUTABLE(data2); |
| |
| R_xlen_t i = 0; |
| RStringViewer& r_string_viewer = string_viewer(); |
| r_string_viewer.reset_null_was_stripped(); |
| r_string_viewer.set_strip_out_nuls(GetBoolOption("arrow.skip_nul", false)); |
| for (const auto& array : chunked_array->chunks()) { |
| r_string_viewer.SetArray(array); |
| |
| auto ni = array->length(); |
| for (R_xlen_t j = 0; j < ni; j++, i++) { |
| SET_STRING_ELT(data2, i, r_string_viewer.Convert(j)); |
| } |
| } |
| |
| if (r_string_viewer.nul_was_stripped()) { |
| Rf_warning("Stripping '\\0' (nul) from character vector"); |
| } |
| |
| // only set to data2 if all the values have been converted |
| SetRepresentation(alt, data2); |
| UNPROTECT(1); // data2 |
| |
| // remove reference to chunked array |
| R_set_altrep_data1(alt, R_NilValue); |
| |
| return data2; |
| } |
| |
| static const void* Dataptr_or_null(SEXP alt) { |
| if (Base::IsMaterialized(alt)) { |
| return DATAPTR_RO(alt); |
| } |
| |
| // otherwise give up |
| return nullptr; |
| } |
| |
| static void Set_elt(SEXP alt, R_xlen_t i, SEXP v) { |
| Rf_error("ALTSTRING objects of type <arrow::array_string_vector> are immutable"); |
| } |
| |
| static RStringViewer& string_viewer() { |
| static RStringViewer string_viewer; |
| return string_viewer; |
| } |
| }; |
| |
| template <typename Type> |
| R_altrep_class_t AltrepVectorString<Type>::class_t; |
| |
| // initialize altrep, altvec, altreal, and altinteger methods |
| template <typename AltrepClass> |
| void InitAltrepMethods(R_altrep_class_t class_t, DllInfo* dll) { |
| R_set_altrep_Length_method(class_t, AltrepClass::Length); |
| R_set_altrep_Inspect_method(class_t, AltrepClass::Inspect); |
| R_set_altrep_Duplicate_method(class_t, AltrepClass::Duplicate); |
| R_set_altrep_Serialized_state_method(class_t, AltrepClass::Serialized_state); |
| R_set_altrep_Unserialize_method(class_t, AltrepClass::Unserialize); |
| R_set_altrep_Coerce_method(class_t, AltrepClass::Coerce); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltvecMethods(R_altrep_class_t class_t, DllInfo* dll) { |
| R_set_altvec_Dataptr_method(class_t, AltrepClass::Dataptr); |
| R_set_altvec_Dataptr_or_null_method(class_t, AltrepClass::Dataptr_or_null); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltRealMethods(R_altrep_class_t class_t, DllInfo* dll) { |
| R_set_altreal_No_NA_method(class_t, AltrepClass::No_NA); |
| R_set_altreal_Is_sorted_method(class_t, AltrepClass::Is_sorted); |
| |
| R_set_altreal_Sum_method(class_t, AltrepClass::Sum); |
| R_set_altreal_Min_method(class_t, AltrepClass::Min); |
| R_set_altreal_Max_method(class_t, AltrepClass::Max); |
| |
| R_set_altreal_Elt_method(class_t, AltrepClass::Elt); |
| R_set_altreal_Get_region_method(class_t, AltrepClass::Get_region); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltIntegerMethods(R_altrep_class_t class_t, DllInfo* dll) { |
| R_set_altinteger_No_NA_method(class_t, AltrepClass::No_NA); |
| R_set_altinteger_Is_sorted_method(class_t, AltrepClass::Is_sorted); |
| |
| R_set_altinteger_Sum_method(class_t, AltrepClass::Sum); |
| R_set_altinteger_Min_method(class_t, AltrepClass::Min); |
| R_set_altinteger_Max_method(class_t, AltrepClass::Max); |
| |
| R_set_altinteger_Elt_method(class_t, AltrepClass::Elt); |
| R_set_altinteger_Get_region_method(class_t, AltrepClass::Get_region); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltRealClass(DllInfo* dll, const char* name) { |
| AltrepClass::class_t = R_make_altreal_class(name, "arrow", dll); |
| InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll); |
| InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll); |
| InitAltRealMethods<AltrepClass>(AltrepClass::class_t, dll); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltIntegerClass(DllInfo* dll, const char* name) { |
| AltrepClass::class_t = R_make_altinteger_class(name, "arrow", dll); |
| InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll); |
| InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll); |
| InitAltIntegerMethods<AltrepClass>(AltrepClass::class_t, dll); |
| } |
| |
| template <typename AltrepClass> |
| void InitAltStringClass(DllInfo* dll, const char* name) { |
| AltrepClass::class_t = R_make_altstring_class(name, "arrow", dll); |
| R_set_altrep_Length_method(AltrepClass::class_t, AltrepClass::Length); |
| R_set_altrep_Inspect_method(AltrepClass::class_t, AltrepClass::Inspect); |
| R_set_altrep_Duplicate_method(AltrepClass::class_t, AltrepClass::Duplicate); |
| R_set_altrep_Serialized_state_method(AltrepClass::class_t, |
| AltrepClass::Serialized_state); |
| R_set_altrep_Unserialize_method(AltrepClass::class_t, AltrepClass::Unserialize); |
| R_set_altrep_Coerce_method(AltrepClass::class_t, AltrepClass::Coerce); |
| |
| R_set_altvec_Dataptr_method(AltrepClass::class_t, AltrepClass::Dataptr); |
| R_set_altvec_Dataptr_or_null_method(AltrepClass::class_t, AltrepClass::Dataptr_or_null); |
| |
| R_set_altstring_Elt_method(AltrepClass::class_t, AltrepClass::Elt); |
| R_set_altstring_Set_elt_method(AltrepClass::class_t, AltrepClass::Set_elt); |
| R_set_altstring_No_NA_method(AltrepClass::class_t, AltrepClass::No_NA); |
| R_set_altstring_Is_sorted_method(AltrepClass::class_t, AltrepClass::Is_sorted); |
| } |
| |
| } // namespace |
| |
| // initialize the altrep classes |
| void Init_Altrep_classes(DllInfo* dll) { |
| InitAltRealClass<AltrepVectorPrimitive<REALSXP>>(dll, "arrow::array_dbl_vector"); |
| InitAltIntegerClass<AltrepVectorPrimitive<INTSXP>>(dll, "arrow::array_int_vector"); |
| InitAltIntegerClass<AltrepFactor>(dll, "arrow::array_factor"); |
| |
| InitAltStringClass<AltrepVectorString<StringType>>(dll, "arrow::array_string_vector"); |
| InitAltStringClass<AltrepVectorString<LargeStringType>>( |
| dll, "arrow::array_large_string_vector"); |
| } |
| |
| // return an altrep R vector that shadows the array if possible |
| SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array) { |
| // using altrep if |
| // - the arrow.use_altrep is set to TRUE or unset (implicit TRUE) |
| // - the chunked array has at least one element |
| if (arrow::r::GetBoolOption("arrow.use_altrep", true) && chunked_array->length() > 0) { |
| switch (chunked_array->type()->id()) { |
| case arrow::Type::DOUBLE: |
| return altrep::AltrepVectorPrimitive<REALSXP>::Make(chunked_array); |
| |
| case arrow::Type::INT32: |
| return altrep::AltrepVectorPrimitive<INTSXP>::Make(chunked_array); |
| |
| case arrow::Type::STRING: |
| return altrep::AltrepVectorString<StringType>::Make(chunked_array); |
| |
| case arrow::Type::LARGE_STRING: |
| return altrep::AltrepVectorString<LargeStringType>::Make(chunked_array); |
| |
| case arrow::Type::DICTIONARY: |
| return altrep::AltrepFactor::Make(chunked_array); |
| |
| default: |
| break; |
| } |
| } |
| |
| return R_NilValue; |
| } |
| |
| bool is_arrow_altrep(SEXP x) { |
| if (ALTREP(x)) { |
| SEXP pkg = R_altrep_class_package(x); |
| if (pkg == symbols::arrow) return true; |
| } |
| |
| return false; |
| } |
| |
| bool is_unmaterialized_arrow_altrep(SEXP x) { |
| return is_arrow_altrep(x) && R_altrep_data1(x) != R_NilValue; |
| } |
| |
| std::shared_ptr<ChunkedArray> vec_to_arrow_altrep_bypass(SEXP x) { |
| if (is_unmaterialized_arrow_altrep(x)) { |
| return GetChunkedArray(x); |
| } |
| |
| return nullptr; |
| } |
| |
| } // namespace altrep |
| } // namespace r |
| } // namespace arrow |
| |
| // [[arrow::export]] |
| bool is_arrow_altrep(cpp11::sexp x) { return arrow::r::altrep::is_arrow_altrep(x); } |
| |
| // [[arrow::export]] |
| void test_arrow_altrep_set_string_elt(sexp x, int i, std::string value) { |
| if (!is_arrow_altrep(x)) { |
| stop("x is not arrow ALTREP"); |
| } |
| |
| SET_STRING_ELT(x, i, Rf_mkChar(value.c_str())); |
| } |
| |
| // [[arrow::export]] |
| sexp test_arrow_altrep_is_materialized(sexp x) { |
| if (!is_arrow_altrep(x)) { |
| return Rf_ScalarLogical(NA_LOGICAL); |
| } |
| |
| SEXP class_sym = R_altrep_class_name(x); |
| std::string class_name(CHAR(PRINTNAME(class_sym))); |
| |
| int result = NA_LOGICAL; |
| if (class_name == "arrow::array_dbl_vector") { |
| result = arrow::r::altrep::AltrepVectorPrimitive<REALSXP>::IsMaterialized(x); |
| } else if (class_name == "arrow::array_int_vector") { |
| result = arrow::r::altrep::AltrepVectorPrimitive<INTSXP>::IsMaterialized(x); |
| } else if (class_name == "arrow::array_string_vector") { |
| result = arrow::r::altrep::AltrepVectorString<arrow::StringType>::IsMaterialized(x); |
| } else if (class_name == "arrow::array_large_string_vector") { |
| result = |
| arrow::r::altrep::AltrepVectorString<arrow::LargeStringType>::IsMaterialized(x); |
| } else if (class_name == "arrow::array_factor") { |
| result = arrow::r::altrep::AltrepFactor::IsMaterialized(x); |
| } |
| |
| return Rf_ScalarLogical(result); |
| } |
| |
| // [[arrow::export]] |
| bool test_arrow_altrep_force_materialize(sexp x) { |
| if (!is_arrow_altrep(x)) { |
| stop("x is not arrow ALTREP"); |
| } |
| |
| bool already_materialized = as_cpp<bool>(test_arrow_altrep_is_materialized(x)); |
| if (already_materialized) { |
| stop("x is already materialized"); |
| } |
| |
| SEXP class_sym = R_altrep_class_name(x); |
| std::string class_name(CHAR(PRINTNAME(class_sym))); |
| |
| if (class_name == "arrow::array_dbl_vector") { |
| arrow::r::altrep::AltrepVectorPrimitive<REALSXP>::Materialize(x); |
| } else if (class_name == "arrow::array_int_vector") { |
| arrow::r::altrep::AltrepVectorPrimitive<INTSXP>::Materialize(x); |
| } else if (class_name == "arrow::array_string_vector") { |
| arrow::r::altrep::AltrepVectorString<arrow::StringType>::Materialize(x); |
| } else if (class_name == "arrow::array_large_string_vector") { |
| arrow::r::altrep::AltrepVectorString<arrow::LargeStringType>::Materialize(x); |
| } else if (class_name == "arrow::array_factor") { |
| arrow::r::altrep::AltrepFactor::Materialize(x); |
| } else { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // [[arrow::export]] |
| sexp test_arrow_altrep_copy_by_element(sexp x) { |
| if (!is_arrow_altrep(x)) { |
| stop("x is not arrow ALTREP"); |
| } |
| |
| R_xlen_t n = Rf_xlength(x); |
| |
| if (TYPEOF(x) == INTSXP) { |
| cpp11::writable::integers out(Rf_xlength(x)); |
| for (R_xlen_t i = 0; i < n; i++) { |
| out[i] = INTEGER_ELT(x, i); |
| } |
| return out; |
| } else if (TYPEOF(x) == REALSXP) { |
| cpp11::writable::doubles out(Rf_xlength(x)); |
| for (R_xlen_t i = 0; i < n; i++) { |
| out[i] = REAL_ELT(x, i); |
| } |
| return out; |
| } else if (TYPEOF(x) == STRSXP) { |
| cpp11::writable::strings out(Rf_xlength(x)); |
| for (R_xlen_t i = 0; i < n; i++) { |
| out[i] = STRING_ELT(x, i); |
| } |
| return out; |
| } else { |
| return R_NilValue; |
| } |
| } |
| |
| // [[arrow::export]] |
| sexp test_arrow_altrep_copy_by_region(sexp x, R_xlen_t region_size) { |
| if (!is_arrow_altrep(x)) { |
| stop("x is not arrow ALTREP"); |
| } |
| |
| R_xlen_t n = Rf_xlength(x); |
| |
| if (TYPEOF(x) == INTSXP) { |
| cpp11::writable::integers out(Rf_xlength(x)); |
| cpp11::writable::integers buf_shelter(region_size); |
| int* buf = INTEGER(buf_shelter); |
| for (R_xlen_t i = 0; i < n; i++) { |
| if ((i % region_size) == 0) { |
| INTEGER_GET_REGION(x, i, region_size, buf); |
| } |
| out[i] = buf[i % region_size]; |
| } |
| return out; |
| } else if (TYPEOF(x) == REALSXP) { |
| cpp11::writable::doubles out(Rf_xlength(x)); |
| cpp11::writable::doubles buf_shelter(region_size); |
| double* buf = REAL(buf_shelter); |
| for (R_xlen_t i = 0; i < n; i++) { |
| if ((i % region_size) == 0) { |
| REAL_GET_REGION(x, i, region_size, buf); |
| } |
| out[i] = buf[i % region_size]; |
| } |
| return out; |
| } else { |
| return R_NilValue; |
| } |
| } |
| |
| // [[arrow::export]] |
| sexp test_arrow_altrep_copy_by_dataptr(sexp x) { |
| if (!is_arrow_altrep(x)) { |
| stop("x is not arrow ALTREP"); |
| } |
| |
| R_xlen_t n = Rf_xlength(x); |
| |
| if (TYPEOF(x) == INTSXP) { |
| cpp11::writable::integers out(Rf_xlength(x)); |
| int* ptr = INTEGER(x); |
| for (R_xlen_t i = 0; i < n; i++) { |
| out[i] = ptr[i]; |
| } |
| return out; |
| } else if (TYPEOF(x) == REALSXP) { |
| cpp11::writable::doubles out(Rf_xlength(x)); |
| double* ptr = REAL(x); |
| for (R_xlen_t i = 0; i < n; i++) { |
| out[i] = ptr[i]; |
| } |
| return out; |
| } else { |
| return R_NilValue; |
| } |
| } |