blob: 3999d60af97dbb7add26d88b8a33f860173bf26d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_BUILD_ID_H_INCLUDED
#define NANOARROW_BUILD_ID_H_INCLUDED
#define NANOARROW_VERSION_MAJOR 0
#define NANOARROW_VERSION_MINOR 3
#define NANOARROW_VERSION_PATCH 0
#define NANOARROW_VERSION "0.3.0-SNAPSHOT"
#define NANOARROW_VERSION_INT \
(NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \
NANOARROW_VERSION_PATCH)
// #define NANOARROW_NAMESPACE YourNamespaceHere
#endif
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED
#define NANOARROW_NANOARROW_TYPES_H_INCLUDED
#include <stdint.h>
#include <string.h>
#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
// Extra guard for versions of Arrow without the canonical guard
#ifndef ARROW_FLAG_DICTIONARY_ORDERED
/// \defgroup nanoarrow-arrow-cdata Arrow C Data interface
///
/// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html)
/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html)
/// interfaces are part of the
/// Arrow Columnar Format specification
/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
/// documentation of these structures.
///
/// @{
#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE
#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4
struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;
// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};
struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;
// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_DATA_INTERFACE
#ifndef ARROW_C_STREAM_INTERFACE
#define ARROW_C_STREAM_INTERFACE
struct ArrowArrayStream {
// Callback to get the stream type
// (will be the same for all arrays in the stream).
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowArray must be released independently from the stream.
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowArrayStream*);
// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowArrayStream*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_STREAM_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED
/// \brief Move the contents of src into dst and set src->release to NULL
static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) {
memcpy(dst, src, sizeof(struct ArrowSchema));
src->release = NULL;
}
/// \brief Move the contents of src into dst and set src->release to NULL
static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) {
memcpy(dst, src, sizeof(struct ArrowArray));
src->release = NULL;
}
/// \brief Move the contents of src into dst and set src->release to NULL
static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
struct ArrowArrayStream* dst) {
memcpy(dst, src, sizeof(struct ArrowArrayStream));
src->release = NULL;
}
/// @}
// Utility macros
#define _NANOARROW_CONCAT(x, y) x##y
#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
do { \
const int NAME = (EXPR); \
if (NAME) return NAME; \
} while (0)
#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \
NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
#if defined(NANOARROW_DEBUG)
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \
NAME, __FILE__, __LINE__); \
return NAME; \
} \
} while (0)
#else
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \
return NAME; \
} \
} while (0)
#endif
/// \brief Return code for success.
/// \ingroup nanoarrow-errors
#define NANOARROW_OK 0
/// \brief Represents an errno-compatible error code
/// \ingroup nanoarrow-errors
typedef int ArrowErrorCode;
/// \brief Check the result of an expression and return it if not NANOARROW_OK
/// \ingroup nanoarrow-errors
#define NANOARROW_RETURN_NOT_OK(EXPR) \
_NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
/// \brief Check the result of an expression and return it if not NANOARROW_OK,
/// adding an auto-generated message to an ArrowError.
/// \ingroup nanoarrow-errors
///
/// This macro is used to ensure that functions that accept an ArrowError
/// as input always set its message when returning an error code (e.g., when calling
/// a nanoarrow function that does *not* accept ArrowError).
#define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \
_NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \
_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR)
#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \
do { \
fprintf(stderr, "%s failed with errno %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \
__FILE__, (int)__LINE__); \
abort(); \
} while (0)
#endif
#if defined(NANOARROW_DEBUG)
#define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \
} while (0)
/// \brief Assert that an expression's value is NANOARROW_OK
/// \ingroup nanoarrow-errors
///
/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true),
/// print a message to stderr and abort. If nanoarrow was built in release mode,
/// this statement has no effect. You can customize fatal error behaviour
/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h
/// This macro is provided as a convenience for users and is not used internally.
#define NANOARROW_ASSERT_OK(EXPR) \
_NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR)
#else
#define NANOARROW_ASSERT_OK(EXPR) EXPR
#endif
static char _ArrowIsLittleEndian(void) {
uint32_t check = 1;
char first_byte;
memcpy(&first_byte, &check, sizeof(char));
return first_byte;
}
/// \brief Arrow type enumerator
/// \ingroup nanoarrow-utils
///
/// These names are intended to map to the corresponding arrow::Type::type
/// enumerator; however, the numeric values are specifically not equal
/// (i.e., do not rely on numeric comparison).
enum ArrowType {
NANOARROW_TYPE_UNINITIALIZED = 0,
NANOARROW_TYPE_NA = 1,
NANOARROW_TYPE_BOOL,
NANOARROW_TYPE_UINT8,
NANOARROW_TYPE_INT8,
NANOARROW_TYPE_UINT16,
NANOARROW_TYPE_INT16,
NANOARROW_TYPE_UINT32,
NANOARROW_TYPE_INT32,
NANOARROW_TYPE_UINT64,
NANOARROW_TYPE_INT64,
NANOARROW_TYPE_HALF_FLOAT,
NANOARROW_TYPE_FLOAT,
NANOARROW_TYPE_DOUBLE,
NANOARROW_TYPE_STRING,
NANOARROW_TYPE_BINARY,
NANOARROW_TYPE_FIXED_SIZE_BINARY,
NANOARROW_TYPE_DATE32,
NANOARROW_TYPE_DATE64,
NANOARROW_TYPE_TIMESTAMP,
NANOARROW_TYPE_TIME32,
NANOARROW_TYPE_TIME64,
NANOARROW_TYPE_INTERVAL_MONTHS,
NANOARROW_TYPE_INTERVAL_DAY_TIME,
NANOARROW_TYPE_DECIMAL128,
NANOARROW_TYPE_DECIMAL256,
NANOARROW_TYPE_LIST,
NANOARROW_TYPE_STRUCT,
NANOARROW_TYPE_SPARSE_UNION,
NANOARROW_TYPE_DENSE_UNION,
NANOARROW_TYPE_DICTIONARY,
NANOARROW_TYPE_MAP,
NANOARROW_TYPE_EXTENSION,
NANOARROW_TYPE_FIXED_SIZE_LIST,
NANOARROW_TYPE_DURATION,
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
};
/// \brief Get a string value of an enum ArrowType value
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for type
static inline const char* ArrowTypeString(enum ArrowType type);
static inline const char* ArrowTypeString(enum ArrowType type) {
switch (type) {
case NANOARROW_TYPE_NA:
return "na";
case NANOARROW_TYPE_BOOL:
return "bool";
case NANOARROW_TYPE_UINT8:
return "uint8";
case NANOARROW_TYPE_INT8:
return "int8";
case NANOARROW_TYPE_UINT16:
return "uint16";
case NANOARROW_TYPE_INT16:
return "int16";
case NANOARROW_TYPE_UINT32:
return "uint32";
case NANOARROW_TYPE_INT32:
return "int32";
case NANOARROW_TYPE_UINT64:
return "uint64";
case NANOARROW_TYPE_INT64:
return "int64";
case NANOARROW_TYPE_HALF_FLOAT:
return "half_float";
case NANOARROW_TYPE_FLOAT:
return "float";
case NANOARROW_TYPE_DOUBLE:
return "double";
case NANOARROW_TYPE_STRING:
return "string";
case NANOARROW_TYPE_BINARY:
return "binary";
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
return "fixed_size_binary";
case NANOARROW_TYPE_DATE32:
return "date32";
case NANOARROW_TYPE_DATE64:
return "date64";
case NANOARROW_TYPE_TIMESTAMP:
return "timestamp";
case NANOARROW_TYPE_TIME32:
return "time32";
case NANOARROW_TYPE_TIME64:
return "time64";
case NANOARROW_TYPE_INTERVAL_MONTHS:
return "interval_months";
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
return "interval_day_time";
case NANOARROW_TYPE_DECIMAL128:
return "decimal128";
case NANOARROW_TYPE_DECIMAL256:
return "decimal256";
case NANOARROW_TYPE_LIST:
return "list";
case NANOARROW_TYPE_STRUCT:
return "struct";
case NANOARROW_TYPE_SPARSE_UNION:
return "sparse_union";
case NANOARROW_TYPE_DENSE_UNION:
return "dense_union";
case NANOARROW_TYPE_DICTIONARY:
return "dictionary";
case NANOARROW_TYPE_MAP:
return "map";
case NANOARROW_TYPE_EXTENSION:
return "extension";
case NANOARROW_TYPE_FIXED_SIZE_LIST:
return "fixed_size_list";
case NANOARROW_TYPE_DURATION:
return "duration";
case NANOARROW_TYPE_LARGE_STRING:
return "large_string";
case NANOARROW_TYPE_LARGE_BINARY:
return "large_binary";
case NANOARROW_TYPE_LARGE_LIST:
return "large_list";
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
return "interval_month_day_nano";
default:
return NULL;
}
}
/// \brief Arrow time unit enumerator
/// \ingroup nanoarrow-utils
///
/// These names and values map to the corresponding arrow::TimeUnit::type
/// enumerator.
enum ArrowTimeUnit {
NANOARROW_TIME_UNIT_SECOND = 0,
NANOARROW_TIME_UNIT_MILLI = 1,
NANOARROW_TIME_UNIT_MICRO = 2,
NANOARROW_TIME_UNIT_NANO = 3
};
/// \brief Validation level enumerator
/// \ingroup nanoarrow-array
enum ArrowValidationLevel {
/// \brief Do not validate buffer sizes or content.
NANOARROW_VALIDATION_LEVEL_NONE = 0,
/// \brief Validate buffer sizes that depend on array length but do not validate buffer
/// sizes that depend on buffer data access.
NANOARROW_VALIDATION_LEVEL_MINIMAL = 1,
/// \brief Validate all buffer sizes, including those that require buffer data access,
/// but do not perform any checks that are O(1) along the length of the buffers.
NANOARROW_VALIDATION_LEVEL_DEFAULT = 2,
/// \brief Validate all buffer sizes and all buffer content. This is useful in the
/// context of untrusted input or input that may have been corrupted in transit.
NANOARROW_VALIDATION_LEVEL_FULL = 3
};
/// \brief Get a string value of an enum ArrowTimeUnit value
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for time_unit
static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
return "s";
case NANOARROW_TIME_UNIT_MILLI:
return "ms";
case NANOARROW_TIME_UNIT_MICRO:
return "us";
case NANOARROW_TIME_UNIT_NANO:
return "ns";
default:
return NULL;
}
}
/// \brief Functional types of buffers as described in the Arrow Columnar Specification
/// \ingroup nanoarrow-array-view
enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_NONE,
NANOARROW_BUFFER_TYPE_VALIDITY,
NANOARROW_BUFFER_TYPE_TYPE_ID,
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
NANOARROW_BUFFER_TYPE_DATA
};
/// \brief An non-owning view of a string
/// \ingroup nanoarrow-utils
struct ArrowStringView {
/// \brief A pointer to the start of the string
///
/// If size_bytes is 0, this value may be NULL.
const char* data;
/// \brief The size of the string in bytes,
///
/// (Not including the null terminator.)
int64_t size_bytes;
};
/// \brief Return a view of a const C string
/// \ingroup nanoarrow-utils
static inline struct ArrowStringView ArrowCharView(const char* value);
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;
out.data = value;
if (value) {
out.size_bytes = (int64_t)strlen(value);
} else {
out.size_bytes = 0;
}
return out;
}
union ArrowBufferViewData {
const void* data;
const int8_t* as_int8;
const uint8_t* as_uint8;
const int16_t* as_int16;
const uint16_t* as_uint16;
const int32_t* as_int32;
const uint32_t* as_uint32;
const int64_t* as_int64;
const uint64_t* as_uint64;
const double* as_double;
const float* as_float;
const char* as_char;
};
/// \brief An non-owning view of a buffer
/// \ingroup nanoarrow-utils
struct ArrowBufferView {
/// \brief A pointer to the start of the buffer
///
/// If size_bytes is 0, this value may be NULL.
union ArrowBufferViewData data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
};
/// \brief Array buffer allocation and deallocation
/// \ingroup nanoarrow-buffer
///
/// Container for allocate, reallocate, and free methods that can be used
/// to customize allocation and deallocation of buffers when constructing
/// an ArrowArray.
struct ArrowBufferAllocator {
/// \brief Reallocate a buffer or return NULL if it cannot be reallocated
uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
int64_t old_size, int64_t new_size);
/// \brief Deallocate a buffer allocated by this allocator
void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size);
/// \brief Opaque data specific to the allocator
void* private_data;
};
/// \brief An owning mutable view of a buffer
/// \ingroup nanoarrow-buffer
struct ArrowBuffer {
/// \brief A pointer to the start of the buffer
///
/// If capacity_bytes is 0, this value may be NULL.
uint8_t* data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
/// \brief The capacity of the buffer in bytes
int64_t capacity_bytes;
/// \brief The allocator that will be used to reallocate and/or free the buffer
struct ArrowBufferAllocator allocator;
};
/// \brief An owning mutable view of a bitmap
/// \ingroup nanoarrow-bitmap
struct ArrowBitmap {
/// \brief An ArrowBuffer to hold the allocated memory
struct ArrowBuffer buffer;
/// \brief The number of bits that have been appended to the bitmap
int64_t size_bits;
};
/// \brief A description of an arrangement of buffers
/// \ingroup nanoarrow-utils
///
/// Contains the minimum amount of information required to
/// calculate the size of each buffer in an ArrowArray knowing only
/// the length and offset of the array.
struct ArrowLayout {
/// \brief The function of each buffer
enum ArrowBufferType buffer_type[3];
/// \brief The data type of each buffer
enum ArrowType buffer_data_type[3];
/// \brief The size of an element each buffer or 0 if this size is variable or unknown
int64_t element_size_bits[3];
/// \brief The number of elements in the child array per element in this array for a
/// fixed-size list
int64_t child_size_elements;
};
/// \brief A non-owning view of an ArrowArray
/// \ingroup nanoarrow-array-view
///
/// This data structure provides access to the values contained within
/// an ArrowArray with fields provided in a more readily-extractible
/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
/// with the same storage type, use it to represent a hypothetical
/// ArrowArray that does not exist yet, or use it to validate the buffers
/// of a future ArrowArray.
struct ArrowArrayView {
/// \brief The underlying ArrowArray or NULL if it has not been set or
/// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
struct ArrowArray* array;
/// \brief The number of elements from the physical start of the buffers.
int64_t offset;
/// \brief The number of elements in this view.
int64_t length;
/// \brief A cached null count or -1 to indicate that this value is unknown.
int64_t null_count;
/// \brief The type used to store values in this array
///
/// This type represents only the minimum required information to
/// extract values from the array buffers (e.g., for a Date32 array,
/// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded
/// arrays, this will be the index type.
enum ArrowType storage_type;
/// \brief The buffer types, strides, and sizes of this Array's buffers
struct ArrowLayout layout;
/// \brief This Array's buffers as ArrowBufferView objects
struct ArrowBufferView buffer_views[3];
/// \brief The number of children of this view
int64_t n_children;
/// \brief Pointers to views of this array's children
struct ArrowArrayView** children;
/// \brief Pointer to a view of this array's dictionary
struct ArrowArrayView* dictionary;
/// \brief Union type id to child index mapping
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
/// such that child_index == union_type_id_map[type_id] and
/// type_id == union_type_id_map[128 + child_index]. This value may be
/// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;
};
// Used as the private data member for ArrowArrays allocated here and accessed
// internally within inline ArrowArray* helpers.
struct ArrowArrayPrivateData {
// Holder for the validity buffer (or first buffer for union types, which are
// the only type whose first buffer is not a valdiity buffer)
struct ArrowBitmap bitmap;
// Holder for additional buffers as required
struct ArrowBuffer buffers[2];
// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated uring that time)
const void* buffer_data[3];
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
// The buffer arrangement for the storage type
struct ArrowLayout layout;
// Flag to indicate if there are non-sequence union type ids.
// In the future this could be replaced with a type id<->child mapping
// to support constructing unions in append mode where type_id != child_index
int8_t union_type_id_is_child_index;
};
/// \brief A representation of an interval.
/// \ingroup nanoarrow-utils
struct ArrowInterval {
/// \brief The type of interval being used
enum ArrowType type;
/// \brief The number of months represented by the interval
int32_t months;
/// \brief The number of days represented by the interval
int32_t days;
/// \brief The number of ms represented by the interval
int32_t ms;
/// \brief The number of ns represented by the interval
int64_t ns;
};
/// \brief Zero initialize an Interval with a given unit
/// \ingroup nanoarrow-utils
static inline void ArrowIntervalInit(struct ArrowInterval* interval,
enum ArrowType type) {
memset(interval, 0, sizeof(struct ArrowInterval));
interval->type = type;
}
/// \brief A representation of a fixed-precision decimal number
/// \ingroup nanoarrow-utils
///
/// This structure should be initialized with ArrowDecimalInit() once and
/// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(),
/// or ArrowDecimalSetBytes256().
struct ArrowDecimal {
/// \brief An array of 64-bit integers of n_words length defined in native-endian order
uint64_t words[4];
/// \brief The number of significant digits this decimal number can represent
int32_t precision;
/// \brief The number of digits after the decimal point. This can be negative.
int32_t scale;
/// \brief The number of words in the words array
int n_words;
/// \brief Cached value used by the implementation
int high_word_index;
/// \brief Cached value used by the implementation
int low_word_index;
};
/// \brief Initialize a decimal with a given set of type parameters
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth,
int32_t precision, int32_t scale) {
memset(decimal->words, 0, sizeof(decimal->words));
decimal->precision = precision;
decimal->scale = scale;
decimal->n_words = bitwidth / 8 / sizeof(uint64_t);
if (_ArrowIsLittleEndian()) {
decimal->low_word_index = 0;
decimal->high_word_index = decimal->n_words - 1;
} else {
decimal->low_word_index = decimal->n_words - 1;
decimal->high_word_index = 0;
}
}
/// \brief Get a signed integer value of a sufficiently small ArrowDecimal
///
/// This does not check if the decimal's precision sufficiently small to fit
/// within the signed 64-bit integer range (A precision less than or equal
/// to 18 is sufficiently small).
static inline int64_t ArrowDecimalGetIntUnsafe(struct ArrowDecimal* decimal) {
return (int64_t)decimal->words[decimal->low_word_index];
}
/// \brief Copy the bytes of this decimal into a sufficiently large buffer
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalGetBytes(struct ArrowDecimal* decimal, uint8_t* out) {
memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t));
}
/// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise
/// \ingroup nanoarrow-utils
static inline int64_t ArrowDecimalSign(struct ArrowDecimal* decimal) {
return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63);
}
/// \brief Sets the integer value of this decimal
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) {
if (value < 0) {
memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t));
} else {
memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t));
}
decimal->words[decimal->low_word_index] = value;
}
/// \brief Copy bytes from a buffer into this decimal
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
const uint8_t* value) {
memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t));
}
#ifdef __cplusplus
}
#endif
#endif
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_H_INCLUDED
#define NANOARROW_H_INCLUDED
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this
// define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE
// MyNamespace here.
// This section remaps the non-prefixed symbols to the prefixed symbols so that
// code written against this build can be used independent of the value of
// NANOARROW_NAMESPACE.
#ifdef NANOARROW_NAMESPACE
#define NANOARROW_CAT(A, B) A##B
#define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B)
#define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion)
#define ArrowNanoarrowVersionInt \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt)
#define ArrowErrorMessage NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorMessage)
#define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc)
#define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc)
#define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree)
#define ArrowBufferAllocatorDefault \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault)
#define ArrowBufferDeallocator \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator)
#define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet)
#define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit)
#define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit)
#define ArrowSchemaInitFromType \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType)
#define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType)
#define ArrowSchemaSetTypeStruct \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct)
#define ArrowSchemaSetTypeFixedSize \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize)
#define ArrowSchemaSetTypeDecimal \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal)
#define ArrowSchemaSetTypeDateTime \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime)
#define ArrowSchemaSetTypeUnion \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion)
#define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy)
#define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat)
#define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName)
#define ArrowSchemaSetMetadata \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata)
#define ArrowSchemaAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren)
#define ArrowSchemaAllocateDictionary \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary)
#define ArrowMetadataReaderInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit)
#define ArrowMetadataReaderRead \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead)
#define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf)
#define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey)
#define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue)
#define ArrowMetadataBuilderInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit)
#define ArrowMetadataBuilderAppend \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend)
#define ArrowMetadataBuilderSet \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet)
#define ArrowMetadataBuilderRemove \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove)
#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit)
#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString)
#define ArrowArrayInitFromType \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType)
#define ArrowArrayInitFromSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema)
#define ArrowArrayInitFromArrayView \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
#define ArrowArrayInitFromArrayView \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
#define ArrowArrayAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren)
#define ArrowArrayAllocateDictionary \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary)
#define ArrowArraySetValidityBitmap \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap)
#define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer)
#define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve)
#define ArrowArrayFinishBuilding \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding)
#define ArrowArrayFinishBuildingDefault \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault)
#define ArrowArrayViewInitFromType \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType)
#define ArrowArrayViewInitFromSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema)
#define ArrowArrayViewAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren)
#define ArrowArrayViewAllocateDictionary \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary)
#define ArrowArrayViewSetLength \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
#define ArrowArrayViewSetArrayMinimal \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal)
#define ArrowArrayViewValidate \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
#define ArrowBasicArrayStreamSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray)
#define ArrowBasicArrayStreamValidate \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate)
#endif
#ifdef __cplusplus
extern "C" {
#endif
/// \defgroup nanoarrow Nanoarrow C library
///
/// Except where noted, objects are not thread-safe and clients should
/// take care to serialize accesses to methods.
///
/// Because this library is intended to be vendored, it provides full type
/// definitions and encourages clients to stack or statically allocate
/// where convenient.
/// \defgroup nanoarrow-malloc Memory management
///
/// Non-buffer members of a struct ArrowSchema and struct ArrowArray
/// must be allocated using ArrowMalloc() or ArrowRealloc() and freed
/// using ArrowFree() for schemas and arrays allocated here. Buffer members
/// are allocated using an ArrowBufferAllocator.
///
/// @{
/// \brief Allocate like malloc()
void* ArrowMalloc(int64_t size);
/// \brief Reallocate like realloc()
void* ArrowRealloc(void* ptr, int64_t size);
/// \brief Free a pointer allocated using ArrowMalloc() or ArrowRealloc().
void ArrowFree(void* ptr);
/// \brief Return the default allocator
///
/// The default allocator uses ArrowMalloc(), ArrowRealloc(), and
/// ArrowFree().
struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void);
/// \brief Create a custom deallocator
///
/// Creates a buffer allocator with only a free method that can be used to
/// attach a custom deallocator to an ArrowBuffer. This may be used to
/// avoid copying an existing buffer that was not allocated using the
/// infrastructure provided here (e.g., by an R or Python object).
struct ArrowBufferAllocator ArrowBufferDeallocator(
void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
int64_t size),
void* private_data);
/// @}
/// \defgroup nanoarrow-errors Error handling
///
/// Functions generally return an errno-compatible error code; functions that
/// need to communicate more verbose error information accept a pointer
/// to an ArrowError. This can be stack or statically allocated. The
/// content of the message is undefined unless an error code has been
/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the
/// ArrowError pointed to by the argument will be propagated with a
/// null-terminated error message. It is safe to pass a NULL ArrowError anywhere
/// in the nanoarrow API.
///
/// Except where documented, it is generally not safe to continue after a
/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and
/// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use
/// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms
/// for memory management and error propgagtion.
///
/// @{
/// \brief Error type containing a UTF-8 encoded message.
struct ArrowError {
/// \brief A character buffer with space for an error message.
char message[1024];
};
/// \brief Ensure an ArrowError is null-terminated by zeroing the first character.
///
/// If error is NULL, this function does nothing.
static inline void ArrowErrorInit(struct ArrowError* error) {
if (error) {
error->message[0] = '\0';
}
}
/// \brief Set the contents of an error using printf syntax.
///
/// If error is NULL, this function does nothing and returns NANOARROW_OK.
ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...);
/// \brief Get the contents of an error
///
/// If error is NULL, returns "", or returns the contents of the error message
/// otherwise.
const char* ArrowErrorMessage(struct ArrowError* error);
/// @}
/// \defgroup nanoarrow-utils Utility data structures
///
/// @{
/// \brief Return a version string in the form "major.minor.patch"
const char* ArrowNanoarrowVersion(void);
/// \brief Return an integer that can be used to compare versions sequentially
int ArrowNanoarrowVersionInt(void);
/// \brief Initialize a description of buffer arrangements from a storage type
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
/// \brief Create a string view from a null-terminated string
static inline struct ArrowStringView ArrowCharView(const char* value);
/// @}
/// \defgroup nanoarrow-schema Creating schemas
///
/// These functions allocate, copy, and destroy ArrowSchema structures
///
/// @{
/// \brief Initialize an ArrowSchema
///
/// Initializes the fields and release callback of schema_out. Caller
/// is responsible for calling the schema->release callback if
/// NANOARROW_OK is returned.
void ArrowSchemaInit(struct ArrowSchema* schema);
/// \brief Initialize an ArrowSchema from an ArrowType
///
/// A convenience constructor for that calls ArrowSchemaInit() and
/// ArrowSchemaSetType() for the common case of constructing an
/// unparameterized type. The caller is responsible for calling the schema->release
/// callback if NANOARROW_OK is returned.
ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type);
/// \brief Get a human-readable summary of a Schema
///
/// Writes a summary of an ArrowSchema to out (up to n - 1 characters)
/// and returns the number of characters required for the output if
/// n were sufficiently large. If recursive is non-zero, the result will
/// also include children.
int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
char recursive);
/// \brief Set the format field of a schema from an ArrowType
///
/// Initializes the fields and release callback of schema_out. For
/// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and
/// NANOARROW_TYPE_MAP, the appropriate number of children are
/// allocated, initialized, and named; however, the caller must
/// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized
/// using ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type);
/// \brief Set the format field and initialize children of a struct schema
///
/// The specified number of children are initialized; however, the caller is responsible
/// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child.
/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children);
/// \brief Set the format field of a fixed-size schema
///
/// Returns EINVAL for fixed_size <= 0 or for type that is not
/// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST.
/// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are
/// allocated, initialized, and named; however, the caller must
/// ArrowSchemaSetType() the first child. Schema must have been initialized using
/// ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema,
enum ArrowType type, int32_t fixed_size);
/// \brief Set the format field of a decimal schema
///
/// Returns EINVAL for scale <= 0 or for type that is not
/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been
/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type,
int32_t decimal_precision,
int32_t decimal_scale);
/// \brief Set the format field of a time, timestamp, or duration schema
///
/// Returns EINVAL for type that is not
/// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64,
/// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The
/// timezone parameter must be NULL for a non-timestamp type. Schema must have been
/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type,
enum ArrowTimeUnit time_unit,
const char* timezone);
/// \brief Seet the format field of a union schema
///
/// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION
/// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are
/// allocated, and initialized.
ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type,
int64_t n_children);
/// \brief Make a (recursive) copy of a schema
///
/// Allocates and copies fields of schema into schema_out.
ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
struct ArrowSchema* schema_out);
/// \brief Copy format into schema->format
///
/// schema must have been allocated using ArrowSchemaInitFromType() or
/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format);
/// \brief Copy name into schema->name
///
/// schema must have been allocated using ArrowSchemaInitFromType() or
/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name);
/// \brief Copy metadata into schema->metadata
///
/// schema must have been allocated using ArrowSchemaInitFromType() or
/// ArrowSchemaDeepCopy.
ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata);
/// \brief Allocate the schema->children array
///
/// Includes the memory for each child struct ArrowSchema.
/// schema must have been allocated using ArrowSchemaInitFromType() or
/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
int64_t n_children);
/// \brief Allocate the schema->dictionary member
///
/// schema must have been allocated using ArrowSchemaInitFromType() or
/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema);
/// @}
/// \defgroup nanoarrow-metadata Create, read, and modify schema metadata
///
/// @{
/// \brief Reader for key/value pairs in schema metadata
///
/// The ArrowMetadataReader does not own any data and is only valid
/// for the lifetime of the underlying metadata pointer.
struct ArrowMetadataReader {
/// \brief A metadata string from a schema->metadata field.
const char* metadata;
/// \brief The current offset into the metadata string
int64_t offset;
/// \brief The number of remaining keys
int32_t remaining_keys;
};
/// \brief Initialize an ArrowMetadataReader
ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
const char* metadata);
/// \brief Read the next key/value pair from an ArrowMetadataReader
ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
struct ArrowStringView* key_out,
struct ArrowStringView* value_out);
/// \brief The number of bytes in in a key/value metadata string
int64_t ArrowMetadataSizeOf(const char* metadata);
/// \brief Check for a key in schema metadata
char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key);
/// \brief Extract a value from schema metadata
///
/// If key does not exist in metadata, value_out is unmodified
ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key,
struct ArrowStringView* value_out);
/// \brief Initialize a builder for schema metadata from key/value pairs
///
/// metadata can be an existing metadata string or NULL to initialize
/// an empty metadata string.
ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata);
/// \brief Append a key/value pair to a buffer containing serialized metadata
ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
struct ArrowStringView key,
struct ArrowStringView value);
/// \brief Set a key/value pair to a buffer containing serialized metadata
///
/// Ensures that the only entry for key in the metadata is set to value.
/// This function maintains the existing position of (the first instance of)
/// key if present in the data.
ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
struct ArrowStringView key,
struct ArrowStringView value);
/// \brief Remove a key from a buffer containing serialized metadata
ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
struct ArrowStringView key);
/// @}
/// \defgroup nanoarrow-schema-view Reading schemas
///
/// @{
/// \brief A non-owning view of a parsed ArrowSchema
///
/// Contains more readily extractable values than a raw ArrowSchema.
/// Clients can stack or statically allocate this structure but are
/// encouraged to use the provided getters to ensure forward
/// compatibility.
struct ArrowSchemaView {
/// \brief A pointer to the schema represented by this view
struct ArrowSchema* schema;
/// \brief The data type represented by the schema
///
/// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a
/// non-null dictionary member; datetime types are valid values.
/// This value will never be NANOARROW_TYPE_EXTENSION (see
/// extension_name and/or extension_metadata to check for
/// an extension type).
enum ArrowType type;
/// \brief The storage data type represented by the schema
///
/// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION
/// or any datetime type. This value represents only the type required to
/// interpret the buffers in the array.
enum ArrowType storage_type;
/// \brief The storage layout represented by the schema
struct ArrowLayout layout;
/// \brief The extension type name if it exists
///
/// If the ARROW:extension:name key is present in schema.metadata,
/// extension_name.data will be non-NULL.
struct ArrowStringView extension_name;
/// \brief The extension type metadata if it exists
///
/// If the ARROW:extension:metadata key is present in schema.metadata,
/// extension_metadata.data will be non-NULL.
struct ArrowStringView extension_metadata;
/// \brief Format fixed size parameter
///
/// This value is set when parsing a fixed-size binary or fixed-size
/// list schema; this value is undefined for other types. For a
/// fixed-size binary schema this value is in bytes; for a fixed-size
/// list schema this value refers to the number of child elements for
/// each element of the parent.
int32_t fixed_size;
/// \brief Decimal bitwidth
///
/// This value is set when parsing a decimal type schema;
/// this value is undefined for other types.
int32_t decimal_bitwidth;
/// \brief Decimal precision
///
/// This value is set when parsing a decimal type schema;
/// this value is undefined for other types.
int32_t decimal_precision;
/// \brief Decimal scale
///
/// This value is set when parsing a decimal type schema;
/// this value is undefined for other types.
int32_t decimal_scale;
/// \brief Format time unit parameter
///
/// This value is set when parsing a date/time type. The value is
/// undefined for other types.
enum ArrowTimeUnit time_unit;
/// \brief Format timezone parameter
///
/// This value is set when parsing a timestamp type and represents
/// the timezone format parameter. This value points to
/// data within the schema and is undefined for other types.
const char* timezone;
/// \brief Union type ids parameter
///
/// This value is set when parsing a union type and represents
/// type ids parameter. This value points to
/// data within the schema and is undefined for other types.
const char* union_type_ids;
};
/// \brief Initialize an ArrowSchemaView
ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
struct ArrowSchema* schema, struct ArrowError* error);
/// @}
/// \defgroup nanoarrow-buffer Owning, growable buffers
///
/// @{
/// \brief Initialize an ArrowBuffer
///
/// Initialize a buffer with a NULL, zero-size buffer using the default
/// buffer allocator.
static inline void ArrowBufferInit(struct ArrowBuffer* buffer);
/// \brief Set a newly-initialized buffer's allocator
///
/// Returns EINVAL if the buffer has already been allocated.
static inline ArrowErrorCode ArrowBufferSetAllocator(
struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator);
/// \brief Reset an ArrowBuffer
///
/// Releases the buffer using the allocator's free method if
/// the buffer's data member is non-null, sets the data member
/// to NULL, and sets the buffer's size and capacity to 0.
static inline void ArrowBufferReset(struct ArrowBuffer* buffer);
/// \brief Move an ArrowBuffer
///
/// Transfers the buffer data and lifecycle management to another
/// address and resets buffer.
static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst);
/// \brief Grow or shrink a buffer to a given capacity
///
/// When shrinking the capacity of the buffer, the buffer is only reallocated
/// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not
/// adjust the buffer's size member except to ensure that the invariant
/// capacity >= size remains true.
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
int64_t new_capacity_bytes,
char shrink_to_fit);
/// \brief Ensure a buffer has at least a given additional capacity
///
/// Ensures that the buffer has space to append at least
/// additional_size_bytes, overallocating when required.
static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
int64_t additional_size_bytes);
/// \brief Write data to buffer and increment the buffer size
///
/// This function does not check that buffer has the required capacity
static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
int64_t size_bytes);
/// \brief Write data to buffer and increment the buffer size
///
/// This function writes and ensures that the buffer has the required capacity,
/// possibly by reallocating the buffer. Like ArrowBufferReserve, this will
/// overallocate when reallocation is required.
static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
const void* data, int64_t size_bytes);
/// \brief Write fill to buffer and increment the buffer size
///
/// This function writes the specified number of fill bytes and
/// ensures that the buffer has the required capacity,
static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
uint8_t value, int64_t size_bytes);
/// \brief Write an 8-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
int8_t value);
/// \brief Write an unsigned 8-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
uint8_t value);
/// \brief Write a 16-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
int16_t value);
/// \brief Write an unsigned 16-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
uint16_t value);
/// \brief Write a 32-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
int32_t value);
/// \brief Write an unsigned 32-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
uint32_t value);
/// \brief Write a 64-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
int64_t value);
/// \brief Write an unsigned 64-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
uint64_t value);
/// \brief Write a double to a buffer
static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
double value);
/// \brief Write a float to a buffer
static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
float value);
/// \brief Write an ArrowStringView to a buffer
static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer,
struct ArrowStringView value);
/// \brief Write an ArrowBufferView to a buffer
static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer,
struct ArrowBufferView value);
/// @}
/// \defgroup nanoarrow-bitmap Bitmap utilities
///
/// @{
/// \brief Extract a boolean value from a bitmap
static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i);
/// \brief Set a boolean value to a bitmap to true
static inline void ArrowBitSet(uint8_t* bits, int64_t i);
/// \brief Set a boolean value to a bitmap to false
static inline void ArrowBitClear(uint8_t* bits, int64_t i);
/// \brief Set a boolean value to a bitmap
static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value);
/// \brief Set a boolean value to a range in a bitmap
static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
uint8_t bits_are_set);
/// \brief Count true values in a bitmap
static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to);
/// \brief Extract int8 boolean values from a range in a bitmap
static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out);
/// \brief Extract int32 boolean values from a range in a bitmap
static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset,
int64_t length, int32_t* out);
/// \brief Initialize an ArrowBitmap
///
/// Initialize the builder's buffer, empty its cache, and reset the size to zero
static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap);
/// \brief Move an ArrowBitmap
///
/// Transfers the underlying buffer data and lifecycle management to another
/// address and resets the bitmap.
static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst);
/// \brief Ensure a bitmap builder has at least a given additional capacity
///
/// Ensures that the buffer has space to append at least
/// additional_size_bits, overallocating when required.
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
int64_t additional_size_bits);
/// \brief Grow or shrink a bitmap to a given capacity
///
/// When shrinking the capacity of the bitmap, the bitmap is only reallocated
/// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not
/// adjust the buffer's size member except when shrinking new_capacity_bits
/// to a value less than the current number of bits in the bitmap.
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
int64_t new_capacity_bits,
char shrink_to_fit);
/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap
static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length);
/// \brief Append zero or more of the same boolean value to a bitmap
static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length);
/// \brief Append boolean values encoded as int8_t to a bitmap
///
/// The values must all be 0 or 1.
static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
const int8_t* values, int64_t n_values);
/// \brief Append boolean values encoded as int32_t to a bitmap
///
/// The values must all be 0 or 1.
static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
const int32_t* values, int64_t n_values);
/// \brief Reset a bitmap builder
///
/// Releases any memory held by buffer, empties the cache, and resets the size to zero
static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap);
/// @}
/// \defgroup nanoarrow-array Creating arrays
///
/// These functions allocate, copy, and destroy ArrowArray structures.
/// Once an ArrowArray has been initialized via ArrowArrayInitFromType()
/// or ArrowArrayInitFromSchema(), the caller is responsible for releasing
/// it using the embedded release callback.
///
/// @{
/// \brief Initialize the fields of an array
///
/// Initializes the fields and release callback of array. Caller
/// is responsible for calling the array->release callback if
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
enum ArrowType storage_type);
/// \brief Initialize the contents of an ArrowArray from an ArrowSchema
///
/// Caller is responsible for calling the array->release callback if
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
struct ArrowSchema* schema,
struct ArrowError* error);
/// \brief Initialize the contents of an ArrowArray from an ArrowArrayView
///
/// Caller is responsible for calling the array->release callback if
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
struct ArrowArrayView* array_view,
struct ArrowError* error);
/// \brief Allocate the array->children array
///
/// Includes the memory for each child struct ArrowArray,
/// whose members are marked as released and may be subsequently initialized
/// with ArrowArrayInitFromType() or moved from an existing ArrowArray.
/// schema must have been allocated using ArrowArrayInitFromType().
ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children);
/// \brief Allocate the array->dictionary member
///
/// Includes the memory for the struct ArrowArray, whose contents
/// is marked as released and may be subsequently initialized
/// with ArrowArrayInitFromType() or moved from an existing ArrowArray.
/// array must have been allocated using ArrowArrayInitFromType()
ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array);
/// \brief Set the validity bitmap of an ArrowArray
///
/// array must have been allocated using ArrowArrayInitFromType()
void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap);
/// \brief Set a buffer of an ArrowArray
///
/// array must have been allocated using ArrowArrayInitFromType()
ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
struct ArrowBuffer* buffer);
/// \brief Get the validity bitmap of an ArrowArray
///
/// array must have been allocated using ArrowArrayInitFromType()
static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array);
/// \brief Get a buffer of an ArrowArray
///
/// array must have been allocated using ArrowArrayInitFromType()
static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i);
/// \brief Start element-wise appending to an ArrowArray
///
/// Initializes any values needed to use ArrowArrayAppend*() functions.
/// All element-wise appenders append by value and return EINVAL if the exact value
/// cannot be represented by the underlying storage type.
/// array must have been allocated using ArrowArrayInitFromType()
static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array);
/// \brief Reserve space for future appends
///
/// For buffer sizes that can be calculated (i.e., not string data buffers or
/// child array sizes for non-fixed-size arrays), recursively reserve space for
/// additional elements. This is useful for reducing the number of reallocations
/// that occur using the item-wise appenders.
ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
int64_t additional_size_elements);
/// \brief Append a null value to an array
static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n);
/// \brief Append an empty, non-null value to an array
static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n);
/// \brief Append a signed integer value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise (e.g., value
/// is outside the valid array range).
static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value);
/// \brief Append an unsigned integer value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise (e.g., value
/// is outside the valid array range).
static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
uint64_t value);
/// \brief Append a double value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise (e.g., value
/// is outside the valid array range or there is an attempt to append
/// a non-integer to an array with an integer storage type).
static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
double value);
/// \brief Append a string of bytes to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type, EOVERFLOW if appending value would overflow
/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a
/// binary, string, large binary, large string, or fixed-size binary array, or value is
/// the wrong size for a fixed-size binary array).
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView value);
/// \brief Append a string value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type, EOVERFLOW if appending value would overflow
/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
/// non-large string type), or EINVAL otherwise (e.g., the underlying array is not a
/// string or large string array).
static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
struct ArrowStringView value);
/// \brief Append a Interval to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise.
static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array,
struct ArrowInterval* value);
/// \brief Append a decimal value to an array
///
/// Returns NANOARROW_OK if array is a decimal array with the appropriate
/// bitwidth or EINVAL otherwise.
static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
struct ArrowDecimal* value);
/// \brief Finish a nested array element
///
/// Appends a non-null element to the array based on the first child's current
/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW
/// if the child of a list or map array would exceed INT_MAX elements, or EINVAL
/// if the underlying storage type is not a struct, list, large list, or fixed-size
/// list, or if there was an attempt to add a struct or fixed-size list element where the
/// length of the child array(s) did not match the expected length.
static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array);
/// \brief Finish a union array element
///
/// Appends an element to the union type ids buffer and increments array->length.
/// For sparse unions, up to one element is added to non type-id children. Returns
/// EINVAL if the underlying storage type is not a union, if type_id is not valid,
/// or if child sizes after appending are inconsistent.
static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array,
int8_t type_id);
/// \brief Shrink buffer capacity to the size required
///
/// Also applies shrinking to any child arrays. array must have been allocated using
/// ArrowArrayInitFromType
static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array);
/// \brief Finish building an ArrowArray
///
/// Flushes any pointers from internal buffers that may have been reallocated
/// into array->buffers and checks the actual size of the buffers
/// against the expected size based on the final length.
/// array must have been allocated using ArrowArrayInitFromType()
ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array,
struct ArrowError* error);
/// \brief Finish building an ArrowArray with explicit validation
///
/// Finish building with an explicit validation level. This could perform less validation
/// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU
/// buffer data access is not possible or more validation (i.e.,
/// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or
/// corruptible source.
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
enum ArrowValidationLevel validation_level,
struct ArrowError* error);
/// @}
/// \defgroup nanoarrow-array-view Reading arrays
///
/// These functions read and validate the contents ArrowArray structures.
///
/// @{
/// \brief Initialize the contents of an ArrowArrayView
void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view,
enum ArrowType storage_type);
/// \brief Move an ArrowArrayView
///
/// Transfers the ArrowArrayView data and lifecycle management to another
/// address and resets the contents of src.
static inline void ArrowArrayViewMove(struct ArrowArrayView* src,
struct ArrowArrayView* dst);
/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema
ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
struct ArrowSchema* schema,
struct ArrowError* error);
/// \brief Allocate the array_view->children array
///
/// Includes the memory for each child struct ArrowArrayView
ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
int64_t n_children);
/// \brief Allocate array_view->dictionary
ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view);
/// \brief Set data-independent buffer sizes from length
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
/// \brief Set buffer sizes and data pointers from an ArrowArray
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
struct ArrowArray* array, struct ArrowError* error);
/// \brief Set buffer sizes and data pointers from an ArrowArray except for those
/// that require dereferencing buffer content.
ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
struct ArrowArray* array,
struct ArrowError* error);
/// \brief Performs checks on the content of an ArrowArrayView
///
/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
/// the buffer sizes and some content (fist and last offset) have already
/// been validated at the "default" level. If setting the buffer pointers
/// and sizes otherwise, you may wish to perform checks at a different level. See
/// documentation for ArrowValidationLevel for the details of checks performed
/// at each level.
ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
enum ArrowValidationLevel validation_level,
struct ArrowError* error);
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
/// \brief Check for a null element in an ArrowArrayView
static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i);
/// \brief Get the type id of a union array element
static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get the child index of a union array element
static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get the index to use into the relevant union child array
static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get an element in an ArrowArrayView as an integer
///
/// This function does not check for null values, that values are actually integers, or
/// that values are within a valid range for an int64.
static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get an element in an ArrowArrayView as an unsigned integer
///
/// This function does not check for null values, that values are actually integers, or
/// that values are within a valid range for a uint64.
static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get an element in an ArrowArrayView as a double
///
/// This function does not check for null values, or
/// that values are within a valid range for a double.
static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowStringView
///
/// This function does not check for null values.
static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
///
/// This function does not check for null values.
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowDecimal
///
/// This function does not check for null values. The out parameter must
/// be initialized with ArrowDecimalInit() with the proper parameters for this
/// type before calling this for the first time.
static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view,
int64_t i, struct ArrowDecimal* out);
/// @}
/// \defgroup nanoarrow-basic-array-stream Basic ArrowArrayStream implementation
///
/// An implementation of an ArrowArrayStream based on a collection of
/// zero or more previously-existing ArrowArray objects. Users should
/// initialize and/or validate the contents before transferring the
/// responsibility of the ArrowArrayStream elsewhere.
///
/// @{
/// \brief Initialize an ArrowArrayStream backed by this implementation
///
/// This function moves the ownership of schema to the array_stream. If
/// this function returns NANOARROW_OK, the caller is responsible for
/// releasing the ArrowArrayStream.
ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream,
struct ArrowSchema* schema, int64_t n_arrays);
/// \brief Set the ith ArrowArray in this ArrowArrayStream.
///
/// array_stream must have been initialized with ArrowBasicArrayStreamInit().
/// This function move the ownership of array to the array_stream. i must
/// be greater than zero and less than the value of n_arrays passed in
/// ArrowBasicArrayStreamInit(). Callers are not required to fill all
/// n_arrays members (i.e., n_arrays is a maximum bound).
void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i,
struct ArrowArray* array);
/// \brief Validate the contents of this ArrowArrayStream
///
/// array_stream must have been initialized with ArrowBasicArrayStreamInit().
/// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray()
/// to validate the contents of the arrays.
ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream,
struct ArrowError* error);
/// @}
// Inline function definitions
#ifdef __cplusplus
}
#endif
#endif
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED
#define NANOARROW_BUFFER_INLINE_H_INCLUDED
#include <errno.h>
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) {
int64_t doubled_capacity = current_capacity * 2;
if (doubled_capacity > new_capacity) {
return doubled_capacity;
} else {
return new_capacity;
}
}
static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
buffer->data = NULL;
buffer->size_bytes = 0;
buffer->capacity_bytes = 0;
buffer->allocator = ArrowBufferAllocatorDefault();
}
static inline ArrowErrorCode ArrowBufferSetAllocator(
struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) {
if (buffer->data == NULL) {
buffer->allocator = allocator;
return NANOARROW_OK;
} else {
return EINVAL;
}
}
static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
if (buffer->data != NULL) {
buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data,
buffer->capacity_bytes);
buffer->data = NULL;
}
buffer->capacity_bytes = 0;
buffer->size_bytes = 0;
}
static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) {
memcpy(dst, src, sizeof(struct ArrowBuffer));
src->data = NULL;
ArrowBufferReset(src);
}
static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
int64_t new_capacity_bytes,
char shrink_to_fit) {
if (new_capacity_bytes < 0) {
return EINVAL;
}
if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
buffer->data = buffer->allocator.reallocate(
&buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
if (buffer->data == NULL && new_capacity_bytes > 0) {
buffer->capacity_bytes = 0;
buffer->size_bytes = 0;
return ENOMEM;
}
buffer->capacity_bytes = new_capacity_bytes;
}
// Ensures that when shrinking that size <= capacity
if (new_capacity_bytes < buffer->size_bytes) {
buffer->size_bytes = new_capacity_bytes;
}
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
int64_t additional_size_bytes) {
int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes;
if (min_capacity_bytes <= buffer->capacity_bytes) {
return NANOARROW_OK;
}
return ArrowBufferResize(
buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0);
}
static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
int64_t size_bytes) {
if (size_bytes > 0) {
memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
buffer->size_bytes += size_bytes;
}
}
static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
const void* data, int64_t size_bytes) {
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
ArrowBufferAppendUnsafe(buffer, data, size_bytes);
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
int8_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
}
static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
uint8_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
}
static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
int16_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
}
static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
uint16_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
}
static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
int32_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
}
static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
uint32_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
}
static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
int64_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
}
static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
uint64_t value) {
return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
}
static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
double value) {
return ArrowBufferAppend(buffer, &value, sizeof(double));
}
static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
float value) {
return ArrowBufferAppend(buffer, &value, sizeof(float));
}
static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer,
struct ArrowStringView value) {
return ArrowBufferAppend(buffer, value.data, value.size_bytes);
}
static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer,
struct ArrowBufferView value) {
return ArrowBufferAppend(buffer, value.data.data, value.size_bytes);
}
static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
uint8_t value, int64_t size_bytes) {
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
memset(buffer->data + buffer->size_bytes, value, size_bytes);
buffer->size_bytes += size_bytes;
return NANOARROW_OK;
}
static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
static const uint8_t _ArrowkBytePopcount[] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) {
return (value + 7) & ~((int64_t)7);
}
static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) {
return (value / 8) * 8;
}
static inline int64_t _ArrowBytesForBits(int64_t bits) {
return (bits >> 3) + ((bits & 7) != 0);
}
static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) {
out[0] = (word >> 0) & 1;
out[1] = (word >> 1) & 1;
out[2] = (word >> 2) & 1;
out[3] = (word >> 3) & 1;
out[4] = (word >> 4) & 1;
out[5] = (word >> 5) & 1;
out[6] = (word >> 6) & 1;
out[7] = (word >> 7) & 1;
}
static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
out[0] = (word >> 0) & 1;
out[1] = (word >> 1) & 1;
out[2] = (word >> 2) & 1;
out[3] = (word >> 3) & 1;
out[4] = (word >> 4) & 1;
out[5] = (word >> 5) & 1;
out[6] = (word >> 6) & 1;
out[7] = (word >> 7) & 1;
}
static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
*out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
values[5] << 5 | values[6] << 6 | values[7] << 7);
}
static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {
*out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
values[5] << 5 | values[6] << 6 | values[7] << 7);
}
static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
return (bits[i >> 3] >> (i & 0x07)) & 1;
}
static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out) {
if (length == 0) {
return;
}
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;
const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_last_valid = i_last_valid / 8;
if (bytes_begin == bytes_last_valid) {
for (int i = 0; i < length; i++) {
out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}
return;
}
// first byte
for (int i = 0; i < 8 - (i_begin % 8); i++) {
*out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}
// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
_ArrowBitsUnpackInt8(bits[i], out);
out += 8;
}
// last byte
const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
for (int i = 0; i < bits_remaining; i++) {
*out++ = ArrowBitGet(&bits[bytes_last_valid], i);
}
}
static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset,
int64_t length, int32_t* out) {
if (length == 0) {
return;
}
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;
const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_last_valid = i_last_valid / 8;
if (bytes_begin == bytes_last_valid) {
for (int i = 0; i < length; i++) {
out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}
return;
}
// first byte
for (int i = 0; i < 8 - (i_begin % 8); i++) {
*out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}
// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
_ArrowBitsUnpackInt32(bits[i], out);
out += 8;
}
// last byte
const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
for (int i = 0; i < bits_remaining; i++) {
*out++ = ArrowBitGet(&bits[bytes_last_valid], i);
}
}
static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
bits[i / 8] |= _ArrowkBitmask[i % 8];
}
static inline void ArrowBitClear(uint8_t* bits, int64_t i) {
bits[i / 8] &= _ArrowkFlippedBitmask[i % 8];
}
static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) {
bits[i / 8] ^=
((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8];
}
static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
uint8_t bits_are_set) {
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const uint8_t fill_byte = (uint8_t)(-bits_are_set);
const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_end = i_end / 8 + 1;
const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
if (bytes_end == bytes_begin + 1) {
// set bits within a single byte
const uint8_t only_byte_mask =
i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
bits[bytes_begin] &= only_byte_mask;
bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
return;
}
// set/clear trailing bits of first byte
bits[bytes_begin] &= first_byte_mask;
bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
if (bytes_end - bytes_begin > 2) {
// set/clear whole bytes
memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2));
}
if (i_end % 8 == 0) {
return;
}
// set/clear leading bits of last byte
bits[bytes_end - 1] &= last_byte_mask;
bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask);
}
static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset,
int64_t length) {
if (length == 0) {
return 0;
}
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;
const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_last_valid = i_last_valid / 8;
if (bytes_begin == bytes_last_valid) {
// count bits within a single byte
const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8];
const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8];
const uint8_t only_byte_mask =
i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
return _ArrowkBytePopcount[byte_masked];
}
const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8];
int64_t count = 0;
// first byte
count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
count += _ArrowkBytePopcount[bits[i]];
}
// last byte
count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask];
return count;
}
static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) {
ArrowBufferInit(&bitmap->buffer);
bitmap->size_bits = 0;
}
static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) {
ArrowBufferMove(&src->buffer, &dst->buffer);
dst->size_bits = src->size_bits;
src->size_bits = 0;
}
static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
int64_t additional_size_bits) {
int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
return NANOARROW_OK;
}
NANOARROW_RETURN_NOT_OK(
ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
int64_t new_capacity_bits,
char shrink_to_fit) {
if (new_capacity_bits < 0) {
return EINVAL;
}
int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
NANOARROW_RETURN_NOT_OK(
ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
if (new_capacity_bits < bitmap->size_bits) {
bitmap->size_bits = new_capacity_bits;
}
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length));
ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
return NANOARROW_OK;
}
static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length) {
ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set);
bitmap->size_bits += length;
bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits);
}
static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
const int8_t* values, int64_t n_values) {
if (n_values == 0) {
return;
}
const int8_t* values_cursor = values;
int64_t n_remaining = n_values;
int64_t out_i_cursor = bitmap->size_bits;
uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
// First byte
if ((out_i_cursor % 8) != 0) {
int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
for (int i = 0; i < n_partial_bits; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
}
out_cursor++;
values_cursor += n_partial_bits;
n_remaining -= n_partial_bits;
}
// Middle bytes
int64_t n_full_bytes = n_remaining / 8;
for (int64_t i = 0; i < n_full_bytes; i++) {
_ArrowBitmapPackInt8(values_cursor, out_cursor);
values_cursor += 8;
out_cursor++;
}
// Last byte
out_i_cursor += n_full_bytes * 8;
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
// Zero out the last byte
*out_cursor = 0x00;
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
out_cursor++;
}
bitmap->size_bits += n_values;
bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
}
static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
const int32_t* values, int64_t n_values) {
if (n_values == 0) {
return;
}
const int32_t* values_cursor = values;
int64_t n_remaining = n_values;
int64_t out_i_cursor = bitmap->size_bits;
uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
// First byte
if ((out_i_cursor % 8) != 0) {
int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
for (int i = 0; i < n_partial_bits; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
}
out_cursor++;
values_cursor += n_partial_bits;
n_remaining -= n_partial_bits;
}
// Middle bytes
int64_t n_full_bytes = n_remaining / 8;
for (int64_t i = 0; i < n_full_bytes; i++) {
_ArrowBitmapPackInt32(values_cursor, out_cursor);
values_cursor += 8;
out_cursor++;
}
// Last byte
out_i_cursor += n_full_bytes * 8;
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
// Zero out the last byte
*out_cursor = 0x00;
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
out_cursor++;
}
bitmap->size_bits += n_values;
bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
}
static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) {
ArrowBufferReset(&bitmap->buffer);
bitmap->size_bits = 0;
}
#ifdef __cplusplus
}
#endif
#endif
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
#define NANOARROW_ARRAY_INLINE_H_INCLUDED
#include <errno.h>
#include <float.h>
#include <limits.h>
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
return &private_data->bitmap;
}
static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
switch (i) {
case 0:
return &private_data->bitmap.buffer;
default:
return private_data->buffers + i - 1;
}
}
// We don't currently support the case of unions where type_id != child_index;
// however, these functions are used to keep track of where that assumption
// is made.
static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array,
int8_t type_id) {
return type_id;
}
static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array,
int8_t child_index) {
return child_index;
}
static inline int8_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) {
if (*type_ids == '\0') {
return 0;
}
int32_t i = 0;
long type_id;
char* end_ptr;
do {
type_id = strtol(type_ids, &end_ptr, 10);
if (end_ptr == type_ids || type_id < 0 || type_id > 127) {
return -1;
}
if (out != NULL) {
out[i] = (int8_t)type_id;
}
i++;
type_ids = end_ptr;
if (*type_ids == '\0') {
return i;
} else if (*type_ids != ',') {
return -1;
} else {
type_ids++;
}
} while (1);
return -1;
}
static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids,
int64_t n_type_ids,
int64_t n_children) {
if (n_type_ids != n_children) {
return 0;
}
for (int8_t i = 0; i < n_type_ids; i++) {
if (type_ids[i] != i) {
return 0;
}
}
return 1;
}
static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str,
int64_t n_children) {
int8_t type_ids[128];
int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children);
}
static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
switch (private_data->storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
return EINVAL;
case NANOARROW_TYPE_SPARSE_UNION:
case NANOARROW_TYPE_DENSE_UNION:
// Note that this value could be -1 if the type_ids string was invalid
if (private_data->union_type_id_is_child_index != 1) {
return EINVAL;
} else {
break;
}
default:
break;
}
if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
return EINVAL;
}
// Initialize any data offset buffer with a single zero
for (int i = 0; i < 3; i++) {
if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
private_data->layout.element_size_bits[i] == 64) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0));
} else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
private_data->layout.element_size_bits[i] == 32) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0));
}
}
// Start building any child arrays or dictionaries
for (int64_t i = 0; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
}
if (array->dictionary != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary));
}
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
for (int64_t i = 0; i < 3; i++) {
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
}
for (int64_t i = 0; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
}
if (array->dictionary != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary));
}
return NANOARROW_OK;
}
static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
int64_t buffer_i, uint8_t value,
int64_t n) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
int64_t bytes_required =
_ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
(array->length + 1)) /
8;
if (bytes_required > buffer->size_bytes) {
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
}
ArrowBitsSetTo(buffer->data, array->length, n, value);
return NANOARROW_OK;
}
static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array,
int64_t n, uint8_t is_valid) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
if (n == 0) {
return NANOARROW_OK;
}
// Some type-specific handling
switch (private_data->storage_type) {
case NANOARROW_TYPE_NA:
// (An empty value for a null array *is* a null)
array->null_count += n;
array->length += n;
return NANOARROW_OK;
case NANOARROW_TYPE_DENSE_UNION: {
// Add one null to the first child and append n references to that child
int8_t type_id = _ArrowArrayUnionTypeId(array, 0);
NANOARROW_RETURN_NOT_OK(
_ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid));
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n));
for (int64_t i = 0; i < n; i++) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1));
}
// For the purposes of array->null_count, union elements are never considered "null"
// even if some children contain nulls.
array->length += n;
return NANOARROW_OK;
}
case NANOARROW_TYPE_SPARSE_UNION: {
// Add n nulls to the first child and append n references to that child
int8_t type_id = _ArrowArrayUnionTypeId(array, 0);
NANOARROW_RETURN_NOT_OK(
_ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid));
for (int64_t i = 1; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n));
}
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n));
// For the purposes of array->null_count, union elements are never considered "null"
// even if some children contain nulls.
array->length += n;
return NANOARROW_OK;
}
case NANOARROW_TYPE_FIXED_SIZE_LIST:
NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(
array->children[0], n * private_data->layout.child_size_elements));
break;
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n));
}
break;
default:
break;
}
// Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet
// and we need to append nulls, do it now.
if (!is_valid && private_data->bitmap.buffer.data == NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n));
ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n);
} else if (private_data->bitmap.buffer.data != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n);
}
// Add appropriate buffer fill
struct ArrowBuffer* buffer;
int64_t size_bytes;
for (int i = 0; i < 3; i++) {
buffer = ArrowArrayBuffer(array, i);
size_bytes = private_data->layout.element_size_bits[i] / 8;
switch (private_data->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_NONE:
case NANOARROW_BUFFER_TYPE_VALIDITY:
continue;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
// Append the current value at the end of the offset buffer for each element
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
for (int64_t j = 0; j < n; j++) {
ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j),
size_bytes);
}
// Skip the data buffer
i++;
continue;
case NANOARROW_BUFFER_TYPE_DATA:
// Zero out the next bit of memory
if (private_data->layout.element_size_bits[i] % 8 == 0) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n));
} else {
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
}
continue;
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
// These cases return above
return EINVAL;
}
}
array->length += n;
array->null_count += n * !is_valid;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) {
return _ArrowArrayAppendEmptyInternal(array, n, 0);
}
static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) {
return _ArrowArrayAppendEmptyInternal(array, n, 1);
}
static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
int64_t value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
switch (private_data->storage_type) {
case NANOARROW_TYPE_INT64:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t)));
break;
case NANOARROW_TYPE_INT32:
_NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value));
break;
case NANOARROW_TYPE_INT16:
_NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value));
break;
case NANOARROW_TYPE_INT8:
_NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value));
break;
case NANOARROW_TYPE_UINT64:
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_UINT16:
case NANOARROW_TYPE_UINT8:
_NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
return ArrowArrayAppendUInt(array, value);
case NANOARROW_TYPE_DOUBLE:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
break;
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
default:
return EINVAL;
}
if (private_data->bitmap.buffer.data != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
}
array->length++;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
uint64_t value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
switch (private_data->storage_type) {
case NANOARROW_TYPE_UINT64:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t)));
break;
case NANOARROW_TYPE_UINT32:
_NANOARROW_CHECK_UPPER_LIMIT(value, UINT32_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value));
break;
case NANOARROW_TYPE_UINT16:
_NANOARROW_CHECK_UPPER_LIMIT(value, UINT16_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value));
break;
case NANOARROW_TYPE_UINT8:
_NANOARROW_CHECK_UPPER_LIMIT(value, UINT8_MAX);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value));
break;
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_INT32:
case NANOARROW_TYPE_INT16:
case NANOARROW_TYPE_INT8:
_NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX);
return ArrowArrayAppendInt(array, value);
case NANOARROW_TYPE_DOUBLE:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
break;
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
default:
return EINVAL;
}
if (private_data->bitmap.buffer.data != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
}
array->length++;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
double value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
switch (private_data->storage_type) {
case NANOARROW_TYPE_DOUBLE:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double)));
break;
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
break;
default:
return EINVAL;
}
if (private_data->bitmap.buffer.data != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
}
array->length++;
return NANOARROW_OK;
}
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY));
int32_t offset;
int64_t large_offset;
int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
switch (private_data->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
offset = ((int32_t*)offset_buffer->data)[array->length];
if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
return EOVERFLOW;
}
offset += (int32_t)value.size_bytes;
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
break;
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
large_offset = ((int64_t*)offset_buffer->data)[array->length];
large_offset += value.size_bytes;
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
break;
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
if (value.size_bytes != fixed_size_bytes) {
return EINVAL;
}
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
break;
default:
return EINVAL;
}