blob: aa8b248aec53942303be885a6493894c67d6376c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <stdexcept>
#include <string>
#include <vector>
#include "nanoarrow.h"
#ifndef NANOARROW_HPP_INCLUDED
#define NANOARROW_HPP_INCLUDED
/// \defgroup nanoarrow_hpp Nanoarrow C++ Helpers
///
/// The utilities provided in this file are intended to support C++ users
/// of the nanoarrow C library such that C++-style resource allocation
/// and error handling can be used with nanoarrow data structures.
/// These utilities are not intended to mirror the nanoarrow C API.
namespace nanoarrow {
/// \defgroup nanoarrow_hpp-errors Error handling helpers
///
/// Most functions in the C API return an ArrowErrorCode to communicate
/// possible failure. Except where documented, it is usually not safe to
/// continue after a non-zero value has been returned. While the
/// nanoarrow C++ helpers do not throw any exceptions of their own,
/// these helpers are provided to facilitate using the nanoarrow C++ helpers
/// in frameworks where this is a useful error handling idiom.
///
/// @{
class Exception : public std::exception {
public:
Exception(const std::string& msg) : msg_(msg) {}
const char* what() const noexcept { return msg_.c_str(); }
private:
std::string msg_;
};
#if defined(NANOARROW_DEBUG)
#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
throw nanoarrow::Exception( \
std::string(EXPR_STR) + std::string(" failed with errno ") + \
std::to_string(NAME) + std::string("\n * ") + std::string(__FILE__) + \
std::string(":") + std::to_string(__LINE__) + std::string("\n")); \
} \
} while (0)
#else
#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
throw nanoarrow::Exception(std::string(EXPR_STR) + \
std::string(" failed with errno ") + \
std::to_string(NAME)); \
} \
} while (0)
#endif
#define NANOARROW_THROW_NOT_OK(EXPR) \
_NANOARROW_THROW_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, \
#EXPR)
/// @}
namespace internal {
/// \defgroup nanoarrow_hpp-unique_base Base classes for Unique wrappers
///
/// @{
template <typename T>
static inline void init_pointer(T* data);
template <typename T>
static inline void move_pointer(T* src, T* dst);
template <typename T>
static inline void release_pointer(T* data);
template <>
inline void init_pointer(struct ArrowSchema* data) {
data->release = nullptr;
}
template <>
inline void move_pointer(struct ArrowSchema* src, struct ArrowSchema* dst) {
ArrowSchemaMove(src, dst);
}
template <>
inline void release_pointer(struct ArrowSchema* data) {
if (data->release != nullptr) {
data->release(data);
}
}
template <>
inline void init_pointer(struct ArrowArray* data) {
data->release = nullptr;
}
template <>
inline void move_pointer(struct ArrowArray* src, struct ArrowArray* dst) {
ArrowArrayMove(src, dst);
}
template <>
inline void release_pointer(struct ArrowArray* data) {
if (data->release != nullptr) {
data->release(data);
}
}
template <>
inline void init_pointer(struct ArrowArrayStream* data) {
data->release = nullptr;
}
template <>
inline void move_pointer(struct ArrowArrayStream* src, struct ArrowArrayStream* dst) {
ArrowArrayStreamMove(src, dst);
}
template <>
inline void release_pointer(ArrowArrayStream* data) {
if (data->release != nullptr) {
data->release(data);
}
}
template <>
inline void init_pointer(struct ArrowBuffer* data) {
ArrowBufferInit(data);
}
template <>
inline void move_pointer(struct ArrowBuffer* src, struct ArrowBuffer* dst) {
ArrowBufferMove(src, dst);
}
template <>
inline void release_pointer(struct ArrowBuffer* data) {
ArrowBufferReset(data);
}
template <>
inline void init_pointer(struct ArrowBitmap* data) {
ArrowBitmapInit(data);
}
template <>
inline void move_pointer(struct ArrowBitmap* src, struct ArrowBitmap* dst) {
ArrowBitmapMove(src, dst);
}
template <>
inline void release_pointer(struct ArrowBitmap* data) {
ArrowBitmapReset(data);
}
template <>
inline void init_pointer(struct ArrowArrayView* data) {
ArrowArrayViewInitFromType(data, NANOARROW_TYPE_UNINITIALIZED);
}
template <>
inline void move_pointer(struct ArrowArrayView* src, struct ArrowArrayView* dst) {
ArrowArrayViewMove(src, dst);
}
template <>
inline void release_pointer(struct ArrowArrayView* data) {
ArrowArrayViewReset(data);
}
/// \brief A unique_ptr-like base class for stack-allocatable objects
/// \tparam T The object type
template <typename T>
class Unique {
public:
/// \brief Construct an invalid instance of T holding no resources
Unique() { init_pointer(&data_); }
/// \brief Move and take ownership of data
Unique(T* data) { move_pointer(data, &data_); }
/// \brief Move and take ownership of data wrapped by rhs
Unique(Unique&& rhs) : Unique(rhs.get()) {}
Unique& operator=(Unique&& rhs) {
reset(rhs.get());
return *this;
}
// These objects are not copyable
Unique(const Unique& rhs) = delete;
/// \brief Get a pointer to the data owned by this object
T* get() noexcept { return &data_; }
const T* get() const noexcept { return &data_; }
/// \brief Use the pointer operator to access fields of this object
T* operator->() noexcept { return &data_; }
const T* operator->() const noexcept { return &data_; }
/// \brief Call data's release callback if valid
void reset() { release_pointer(&data_); }
/// \brief Call data's release callback if valid and move ownership of the data
/// pointed to by data
void reset(T* data) {
reset();
move_pointer(data, &data_);
}
/// \brief Move ownership of this object to the data pointed to by out
void move(T* out) { move_pointer(&data_, out); }
~Unique() { reset(); }
protected:
T data_;
};
template <typename T>
static inline void DeallocateWrappedBuffer(struct ArrowBufferAllocator* allocator,
uint8_t* ptr, int64_t size) {
NANOARROW_UNUSED(ptr);
NANOARROW_UNUSED(size);
auto obj = reinterpret_cast<T*>(allocator->private_data);
delete obj;
}
/// @}
} // namespace internal
/// \defgroup nanoarrow_hpp-unique Unique object wrappers
///
/// The Arrow C Data interface, the Arrow C Stream interface, and the
/// nanoarrow C library use stack-allocatable objects, some of which
/// require initialization or cleanup.
///
/// @{
/// \brief Class wrapping a unique struct ArrowSchema
using UniqueSchema = internal::Unique<struct ArrowSchema>;
/// \brief Class wrapping a unique struct ArrowArray
using UniqueArray = internal::Unique<struct ArrowArray>;
/// \brief Class wrapping a unique struct ArrowArrayStream
using UniqueArrayStream = internal::Unique<struct ArrowArrayStream>;
/// \brief Class wrapping a unique struct ArrowBuffer
using UniqueBuffer = internal::Unique<struct ArrowBuffer>;
/// \brief Class wrapping a unique struct ArrowBitmap
using UniqueBitmap = internal::Unique<struct ArrowBitmap>;
/// \brief Class wrapping a unique struct ArrowArrayView
using UniqueArrayView = internal::Unique<struct ArrowArrayView>;
/// @}
/// \defgroup nanoarrow_hpp-buffer Buffer helpers
///
/// Helpers to wrap buffer-like C++ objects as ArrowBuffer objects that can
/// be used to build ArrowArray objects.
///
/// @{
/// \brief Initialize a buffer wrapping an arbitrary C++ object
///
/// Initializes a buffer with a release callback that deletes the moved obj
/// when ArrowBufferReset is called. This version is useful for wrapping
/// an object whose .data() member is missing or unrelated to the buffer
/// value that is destined for a the buffer of an ArrowArray. T must be movable.
template <typename T>
static inline void BufferInitWrapped(struct ArrowBuffer* buffer, T obj,
const uint8_t* data, int64_t size_bytes) {
T* obj_moved = new T(std::move(obj));
buffer->data = const_cast<uint8_t*>(data);
buffer->size_bytes = size_bytes;
buffer->capacity_bytes = 0;
buffer->allocator =
ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer<T>, obj_moved);
}
/// \brief Initialize a buffer wrapping a C++ sequence
///
/// Specifically, this uses obj.data() to set the buffer address and
/// obj.size() * sizeof(T::value_type) to set the buffer size. This works
/// for STL containers like std::vector, std::array, and std::string.
/// This function moves obj and ensures it is deleted when ArrowBufferReset
/// is called.
template <typename T>
void BufferInitSequence(struct ArrowBuffer* buffer, T obj) {
// Move before calling .data() (matters sometimes).
T* obj_moved = new T(std::move(obj));
buffer->data =
const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(obj_moved->data()));
buffer->size_bytes = obj_moved->size() * sizeof(typename T::value_type);
buffer->capacity_bytes = 0;
buffer->allocator =
ArrowBufferDeallocator(&internal::DeallocateWrappedBuffer<T>, obj_moved);
}
/// @}
/// \defgroup nanoarrow_hpp-array-stream ArrayStream helpers
///
/// These classes provide simple ArrowArrayStream implementations that
/// can be extended to help simplify the process of creating a valid
/// ArrowArrayStream implementation or used as-is for testing.
///
/// @{
/// @brief Export an ArrowArrayStream from a standard C++ class
/// @tparam T A class with methods `int GetSchema(ArrowSchema*)`, `int
/// GetNext(ArrowArray*)`, and `const char* GetLastError()`
///
/// This class allows a standard C++ class to be exported to a generic ArrowArrayStream
/// consumer by mapping C callback invocations to method calls on an instance of the
/// object whose lifecycle is owned by the ArrowArrayStream. See VectorArrayStream for
/// minimal useful example of this pattern.
///
/// The methods must be accessible to the ArrayStreamFactory, either as public methods or
/// by declaring ArrayStreamFactory<ImplClass> a friend. Implementors are encouraged (but
/// not required) to implement a ToArrayStream(ArrowArrayStream*) that creates a new
/// instance owned by the ArrowArrayStream and moves the relevant data to that instance.
///
/// An example implementation might be:
///
/// \code
/// class StreamImpl {
/// public:
/// // Public methods (e.g., constructor) used from C++ to initialize relevant data
///
/// // Idiomatic exporter to move data + lifecycle responsibility to an instance
/// // managed by the ArrowArrayStream callbacks
/// void ToArrayStream(struct ArrowArrayStream* out) {
/// ArrayStreamFactory<StreamImpl>::InitArrayStream(new StreamImpl(...), out);
/// }
///
/// private:
/// // Make relevant methods available to the ArrayStreamFactory
/// friend class ArrayStreamFactory<StreamImpl>;
///
/// // Method implementations (called from C, not normally interacted with from C++)
/// int GetSchema(struct ArrowSchema* schema) { return ENOTSUP; }
/// int GetNext(struct ArrowArray* array) { return ENOTSUP; }
/// const char* GetLastError() { nullptr; }
/// };
/// \endcode
///
/// An example usage might be:
///
/// \code
/// // Call constructor and/or public methods to initialize relevant data
/// StreamImpl impl;
///
/// // Export to ArrowArrayStream after data are finalized
/// UniqueArrayStream stream;
/// impl.ToArrayStream(stream.get());
/// \endcode
template <typename T>
class ArrayStreamFactory {
public:
/// \brief Take ownership of instance and populate callbacks of out
static void InitArrayStream(T* instance, struct ArrowArrayStream* out) {
out->get_schema = &get_schema_wrapper;
out->get_next = &get_next_wrapper;
out->get_last_error = &get_last_error_wrapper;
out->release = &release_wrapper;
out->private_data = instance;
}
private:
static int get_schema_wrapper(struct ArrowArrayStream* stream,
struct ArrowSchema* schema) {
return reinterpret_cast<T*>(stream->private_data)->GetSchema(schema);
}
static int get_next_wrapper(struct ArrowArrayStream* stream, struct ArrowArray* array) {
return reinterpret_cast<T*>(stream->private_data)->GetNext(array);
}
static const char* get_last_error_wrapper(struct ArrowArrayStream* stream) {
return reinterpret_cast<T*>(stream->private_data)->GetLastError();
}
static void release_wrapper(struct ArrowArrayStream* stream) {
delete reinterpret_cast<T*>(stream->private_data);
stream->release = nullptr;
stream->private_data = nullptr;
}
};
/// \brief An empty array stream
///
/// This class can be constructed from an struct ArrowSchema and implements a default
/// get_next() method that always marks the output ArrowArray as released.
///
/// DEPRECATED (0.4.0): Early versions of nanoarrow allowed subclasses to override
/// get_schema(), get_next(), and get_last_error(). This functionality will be removed
/// in a future release: use the pattern documented in ArrayStreamFactory to create
/// custom ArrowArrayStream implementations.
class EmptyArrayStream {
public:
/// \brief Create an EmptyArrayStream from an ArrowSchema
///
/// Takes ownership of schema.
EmptyArrayStream(struct ArrowSchema* schema) : schema_(schema) {
ArrowErrorInit(&error_);
}
/// \brief Export to ArrowArrayStream
void ToArrayStream(struct ArrowArrayStream* out) {
EmptyArrayStream* impl = new EmptyArrayStream(schema_.get());
ArrayStreamFactory<EmptyArrayStream>::InitArrayStream(impl, out);
}
/// \brief Create an empty UniqueArrayStream from a struct ArrowSchema
///
/// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export an
/// EmptyArrayStream to an ArrowArrayStream consumer.
static UniqueArrayStream MakeUnique(struct ArrowSchema* schema) {
UniqueArrayStream stream;
EmptyArrayStream(schema).ToArrayStream(stream.get());
return stream;
}
virtual ~EmptyArrayStream() {}
protected:
UniqueSchema schema_;
struct ArrowError error_;
void MakeStream(struct ArrowArrayStream* stream) { ToArrayStream(stream); }
virtual int get_schema(struct ArrowSchema* schema) {
return ArrowSchemaDeepCopy(schema_.get(), schema);
}
virtual int get_next(struct ArrowArray* array) {
array->release = nullptr;
return NANOARROW_OK;
}
virtual const char* get_last_error() { return error_.message; }
private:
friend class ArrayStreamFactory<EmptyArrayStream>;
int GetSchema(struct ArrowSchema* schema) { return get_schema(schema); }
int GetNext(struct ArrowArray* array) { return get_next(array); }
const char* GetLastError() { return get_last_error(); }
};
/// \brief Implementation of an ArrowArrayStream backed by a vector of UniqueArray objects
class VectorArrayStream {
public:
/// \brief Create a VectorArrayStream from an ArrowSchema + vector of UniqueArray
///
/// Takes ownership of schema and moves arrays if possible.
VectorArrayStream(struct ArrowSchema* schema, std::vector<UniqueArray> arrays)
: offset_(0), schema_(schema), arrays_(std::move(arrays)) {}
/// \brief Create a one-shot VectorArrayStream from an ArrowSchema + ArrowArray
///
/// Takes ownership of schema and array.
VectorArrayStream(struct ArrowSchema* schema, struct ArrowArray* array)
: offset_(0), schema_(schema) {
arrays_.emplace_back(array);
}
/// \brief Export to ArrowArrayStream
void ToArrayStream(struct ArrowArrayStream* out) {
VectorArrayStream* impl = new VectorArrayStream(schema_.get(), std::move(arrays_));
ArrayStreamFactory<VectorArrayStream>::InitArrayStream(impl, out);
}
/// \brief Create a UniqueArrowArrayStream from an existing array
///
/// DEPRECATED (0.4.0): Use the constructors + ToArrayStream() to export a
/// VectorArrayStream to an ArrowArrayStream consumer.
static UniqueArrayStream MakeUnique(struct ArrowSchema* schema,
struct ArrowArray* array) {
UniqueArrayStream stream;
VectorArrayStream(schema, array).ToArrayStream(stream.get());
return stream;
}
/// \brief Create a UniqueArrowArrayStream from existing arrays
///
/// DEPRECATED (0.4.0): Use the constructor + ToArrayStream() to export a
/// VectorArrayStream to an ArrowArrayStream consumer.
static UniqueArrayStream MakeUnique(struct ArrowSchema* schema,
std::vector<UniqueArray> arrays) {
UniqueArrayStream stream;
VectorArrayStream(schema, std::move(arrays)).ToArrayStream(stream.get());
return stream;
}
private:
int64_t offset_;
UniqueSchema schema_;
std::vector<UniqueArray> arrays_;
friend class ArrayStreamFactory<VectorArrayStream>;
int GetSchema(struct ArrowSchema* schema) {
return ArrowSchemaDeepCopy(schema_.get(), schema);
}
int GetNext(struct ArrowArray* array) {
if (offset_ < static_cast<int64_t>(arrays_.size())) {
arrays_[offset_++].move(array);
} else {
array->release = nullptr;
}
return NANOARROW_OK;
}
const char* GetLastError() { return ""; }
};
/// @}
} // namespace nanoarrow
#endif