blob: d814a0566da3f420adabd0064dd95ea3cd84de1b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED
#define NANOARROW_NANOARROW_TYPES_H_INCLUDED
#include <stdint.h>
#include <string.h>
#include "nanoarrow_config.h"
#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
#include <stdio.h>
#include <stdlib.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
// Extra guard for versions of Arrow without the canonical guard
#ifndef ARROW_FLAG_DICTIONARY_ORDERED
/// \defgroup nanoarrow-arrow-cdata Arrow C Data interface
///
/// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html)
/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html)
/// interfaces are part of the
/// Arrow Columnar Format specification
/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
/// documentation of these structures.
///
/// @{
#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE
#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4
struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;
// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};
struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;
// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_DATA_INTERFACE
#ifndef ARROW_C_STREAM_INTERFACE
#define ARROW_C_STREAM_INTERFACE
struct ArrowArrayStream {
// Callback to get the stream type
// (will be the same for all arrays in the stream).
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowArray must be released independently from the stream.
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowArrayStream*);
// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowArrayStream*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_STREAM_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED
/// @}
// Utility macros
#define _NANOARROW_CONCAT(x, y) x##y
#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
do { \
const int NAME = (EXPR); \
if (NAME) return NAME; \
} while (0)
#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \
NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
#if defined(NANOARROW_DEBUG)
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \
NAME, __FILE__, __LINE__); \
return NAME; \
} \
} while (0)
#else
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) { \
ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \
return NAME; \
} \
} while (0)
#endif
#if defined(NANOARROW_DEBUG)
// For checking ArrowErrorSet() calls for valid printf format strings/arguments
// If using mingw's c99-compliant printf, we need a different format-checking attribute
#if defined(__USE_MINGW_ANSI_STDIO) && defined(__MINGW_PRINTF_FORMAT)
#define NANOARROW_CHECK_PRINTF_ATTRIBUTE \
__attribute__((format(__MINGW_PRINTF_FORMAT, 2, 3)))
#elif defined(__GNUC__)
#define NANOARROW_CHECK_PRINTF_ATTRIBUTE __attribute__((format(printf, 2, 3)))
#else
#define NANOARROW_CHECK_PRINTF_ATTRIBUTE
#endif
// For checking calls to functions that return ArrowErrorCode
#if defined(__GNUC__) && (__GNUC__ >= 4)
#define NANOARROW_CHECK_RETURN_ATTRIBUTE __attribute__((warn_unused_result))
#elif defined(_MSC_VER) && (_MSC_VER >= 1700)
#define NANOARROW_CHECK_RETURN_ATTRIBUTE _Check_return_
#else
#define NANOARROW_CHECK_RETURN_ATTRIBUTE
#endif
#else
#define NANOARROW_CHECK_RETURN_ATTRIBUTE
#define NANOARROW_CHECK_PRINTF_ATTRIBUTE
#endif
#define NANOARROW_UNUSED(x) (void)(x)
/// \brief Return code for success.
/// \ingroup nanoarrow-errors
#define NANOARROW_OK 0
/// \brief Represents an errno-compatible error code
/// \ingroup nanoarrow-errors
typedef int ArrowErrorCode;
#if defined(NANOARROW_DEBUG)
#define ArrowErrorCode NANOARROW_CHECK_RETURN_ATTRIBUTE ArrowErrorCode
#endif
/// \brief Flags supported by ArrowSchemaViewInit()
/// \ingroup nanoarrow-schema-view
#define NANOARROW_FLAG_ALL_SUPPORTED \
(ARROW_FLAG_DICTIONARY_ORDERED | ARROW_FLAG_NULLABLE | ARROW_FLAG_MAP_KEYS_SORTED)
/// \brief Error type containing a UTF-8 encoded message.
/// \ingroup nanoarrow-errors
struct ArrowError {
/// \brief A character buffer with space for an error message.
char message[1024];
};
/// \brief Ensure an ArrowError is null-terminated by zeroing the first character.
/// \ingroup nanoarrow-errors
///
/// If error is NULL, this function does nothing.
static inline void ArrowErrorInit(struct ArrowError* error) {
if (error != NULL) {
error->message[0] = '\0';
}
}
/// \brief Get the contents of an error
/// \ingroup nanoarrow-errors
///
/// If error is NULL, returns "", or returns the contents of the error message
/// otherwise.
static inline const char* ArrowErrorMessage(struct ArrowError* error) {
if (error == NULL) {
return "";
} else {
return error->message;
}
}
/// \brief Set the contents of an error from an existing null-terminated string
/// \ingroup nanoarrow-errors
///
/// If error is NULL, this function does nothing.
static inline void ArrowErrorSetString(struct ArrowError* error, const char* src) {
if (error == NULL) {
return;
}
int64_t src_len = strlen(src);
if (src_len >= ((int64_t)sizeof(error->message))) {
memcpy(error->message, src, sizeof(error->message) - 1);
error->message[sizeof(error->message) - 1] = '\0';
} else {
memcpy(error->message, src, src_len);
error->message[src_len] = '\0';
}
}
/// \brief Check the result of an expression and return it if not NANOARROW_OK
/// \ingroup nanoarrow-errors
#define NANOARROW_RETURN_NOT_OK(EXPR) \
_NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
/// \brief Check the result of an expression and return it if not NANOARROW_OK,
/// adding an auto-generated message to an ArrowError.
/// \ingroup nanoarrow-errors
///
/// This macro is used to ensure that functions that accept an ArrowError
/// as input always set its message when returning an error code (e.g., when calling
/// a nanoarrow function that does *not* accept ArrowError).
#define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \
_NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \
_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR)
#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \
do { \
fprintf(stderr, "%s failed with code %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \
__FILE__, (int)__LINE__); \
abort(); \
} while (0)
#endif
#if defined(NANOARROW_DEBUG)
#define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \
do { \
const int NAME = (EXPR); \
if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \
} while (0)
/// \brief Assert that an expression's value is NANOARROW_OK
/// \ingroup nanoarrow-errors
///
/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true),
/// print a message to stderr and abort. If nanoarrow was built in release mode,
/// this statement has no effect. You can customize fatal error behaviour
/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h
/// This macro is provided as a convenience for users and is not used internally.
#define NANOARROW_ASSERT_OK(EXPR) \
_NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR)
#define _NANOARROW_DCHECK_IMPL(EXPR, EXPR_STR) \
do { \
if (!(EXPR)) NANOARROW_PRINT_AND_DIE(-1, EXPR_STR); \
} while (0)
#define NANOARROW_DCHECK(EXPR) _NANOARROW_DCHECK_IMPL(EXPR, #EXPR)
#else
#define NANOARROW_ASSERT_OK(EXPR) (void)(EXPR)
#define NANOARROW_DCHECK(EXPR)
#endif
static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) {
NANOARROW_DCHECK(src != NULL);
NANOARROW_DCHECK(dst != NULL);
memcpy(dst, src, sizeof(struct ArrowSchema));
src->release = NULL;
}
static inline void ArrowSchemaRelease(struct ArrowSchema* schema) {
NANOARROW_DCHECK(schema != NULL);
schema->release(schema);
NANOARROW_DCHECK(schema->release == NULL);
}
static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) {
NANOARROW_DCHECK(src != NULL);
NANOARROW_DCHECK(dst != NULL);
memcpy(dst, src, sizeof(struct ArrowArray));
src->release = NULL;
}
static inline void ArrowArrayRelease(struct ArrowArray* array) {
NANOARROW_DCHECK(array != NULL);
array->release(array);
NANOARROW_DCHECK(array->release == NULL);
}
static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
struct ArrowArrayStream* dst) {
NANOARROW_DCHECK(src != NULL);
NANOARROW_DCHECK(dst != NULL);
memcpy(dst, src, sizeof(struct ArrowArrayStream));
src->release = NULL;
}
static inline const char* ArrowArrayStreamGetLastError(
struct ArrowArrayStream* array_stream) {
NANOARROW_DCHECK(array_stream != NULL);
const char* value = array_stream->get_last_error(array_stream);
if (value == NULL) {
return "";
} else {
return value;
}
}
static inline ArrowErrorCode ArrowArrayStreamGetSchema(
struct ArrowArrayStream* array_stream, struct ArrowSchema* out,
struct ArrowError* error) {
NANOARROW_DCHECK(array_stream != NULL);
int result = array_stream->get_schema(array_stream, out);
if (result != NANOARROW_OK && error != NULL) {
ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream));
}
return result;
}
static inline ArrowErrorCode ArrowArrayStreamGetNext(
struct ArrowArrayStream* array_stream, struct ArrowArray* out,
struct ArrowError* error) {
NANOARROW_DCHECK(array_stream != NULL);
int result = array_stream->get_next(array_stream, out);
if (result != NANOARROW_OK && error != NULL) {
ArrowErrorSetString(error, ArrowArrayStreamGetLastError(array_stream));
}
return result;
}
static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* array_stream) {
NANOARROW_DCHECK(array_stream != NULL);
array_stream->release(array_stream);
NANOARROW_DCHECK(array_stream->release == NULL);
}
static char _ArrowIsLittleEndian(void) {
uint32_t check = 1;
char first_byte;
memcpy(&first_byte, &check, sizeof(char));
return first_byte;
}
/// \brief Arrow type enumerator
/// \ingroup nanoarrow-utils
///
/// These names are intended to map to the corresponding arrow::Type::type
/// enumerator; however, the numeric values are specifically not equal
/// (i.e., do not rely on numeric comparison).
enum ArrowType {
NANOARROW_TYPE_UNINITIALIZED = 0,
NANOARROW_TYPE_NA = 1,
NANOARROW_TYPE_BOOL,
NANOARROW_TYPE_UINT8,
NANOARROW_TYPE_INT8,
NANOARROW_TYPE_UINT16,
NANOARROW_TYPE_INT16,
NANOARROW_TYPE_UINT32,
NANOARROW_TYPE_INT32,
NANOARROW_TYPE_UINT64,
NANOARROW_TYPE_INT64,
NANOARROW_TYPE_HALF_FLOAT,
NANOARROW_TYPE_FLOAT,
NANOARROW_TYPE_DOUBLE,
NANOARROW_TYPE_STRING,
NANOARROW_TYPE_BINARY,
NANOARROW_TYPE_FIXED_SIZE_BINARY,
NANOARROW_TYPE_DATE32,
NANOARROW_TYPE_DATE64,
NANOARROW_TYPE_TIMESTAMP,
NANOARROW_TYPE_TIME32,
NANOARROW_TYPE_TIME64,
NANOARROW_TYPE_INTERVAL_MONTHS,
NANOARROW_TYPE_INTERVAL_DAY_TIME,
NANOARROW_TYPE_DECIMAL128,
NANOARROW_TYPE_DECIMAL256,
NANOARROW_TYPE_LIST,
NANOARROW_TYPE_STRUCT,
NANOARROW_TYPE_SPARSE_UNION,
NANOARROW_TYPE_DENSE_UNION,
NANOARROW_TYPE_DICTIONARY,
NANOARROW_TYPE_MAP,
NANOARROW_TYPE_EXTENSION,
NANOARROW_TYPE_FIXED_SIZE_LIST,
NANOARROW_TYPE_DURATION,
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
};
/// \brief Get a string value of an enum ArrowType value
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for type
static inline const char* ArrowTypeString(enum ArrowType type);
static inline const char* ArrowTypeString(enum ArrowType type) {
switch (type) {
case NANOARROW_TYPE_NA:
return "na";
case NANOARROW_TYPE_BOOL:
return "bool";
case NANOARROW_TYPE_UINT8:
return "uint8";
case NANOARROW_TYPE_INT8:
return "int8";
case NANOARROW_TYPE_UINT16:
return "uint16";
case NANOARROW_TYPE_INT16:
return "int16";
case NANOARROW_TYPE_UINT32:
return "uint32";
case NANOARROW_TYPE_INT32:
return "int32";
case NANOARROW_TYPE_UINT64:
return "uint64";
case NANOARROW_TYPE_INT64:
return "int64";
case NANOARROW_TYPE_HALF_FLOAT:
return "half_float";
case NANOARROW_TYPE_FLOAT:
return "float";
case NANOARROW_TYPE_DOUBLE:
return "double";
case NANOARROW_TYPE_STRING:
return "string";
case NANOARROW_TYPE_BINARY:
return "binary";
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
return "fixed_size_binary";
case NANOARROW_TYPE_DATE32:
return "date32";
case NANOARROW_TYPE_DATE64:
return "date64";
case NANOARROW_TYPE_TIMESTAMP:
return "timestamp";
case NANOARROW_TYPE_TIME32:
return "time32";
case NANOARROW_TYPE_TIME64:
return "time64";
case NANOARROW_TYPE_INTERVAL_MONTHS:
return "interval_months";
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
return "interval_day_time";
case NANOARROW_TYPE_DECIMAL128:
return "decimal128";
case NANOARROW_TYPE_DECIMAL256:
return "decimal256";
case NANOARROW_TYPE_LIST:
return "list";
case NANOARROW_TYPE_STRUCT:
return "struct";
case NANOARROW_TYPE_SPARSE_UNION:
return "sparse_union";
case NANOARROW_TYPE_DENSE_UNION:
return "dense_union";
case NANOARROW_TYPE_DICTIONARY:
return "dictionary";
case NANOARROW_TYPE_MAP:
return "map";
case NANOARROW_TYPE_EXTENSION:
return "extension";
case NANOARROW_TYPE_FIXED_SIZE_LIST:
return "fixed_size_list";
case NANOARROW_TYPE_DURATION:
return "duration";
case NANOARROW_TYPE_LARGE_STRING:
return "large_string";
case NANOARROW_TYPE_LARGE_BINARY:
return "large_binary";
case NANOARROW_TYPE_LARGE_LIST:
return "large_list";
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
return "interval_month_day_nano";
default:
return NULL;
}
}
/// \brief Arrow time unit enumerator
/// \ingroup nanoarrow-utils
///
/// These names and values map to the corresponding arrow::TimeUnit::type
/// enumerator.
enum ArrowTimeUnit {
NANOARROW_TIME_UNIT_SECOND = 0,
NANOARROW_TIME_UNIT_MILLI = 1,
NANOARROW_TIME_UNIT_MICRO = 2,
NANOARROW_TIME_UNIT_NANO = 3
};
/// \brief Validation level enumerator
/// \ingroup nanoarrow-array
enum ArrowValidationLevel {
/// \brief Do not validate buffer sizes or content.
NANOARROW_VALIDATION_LEVEL_NONE = 0,
/// \brief Validate buffer sizes that depend on array length but do not validate buffer
/// sizes that depend on buffer data access.
NANOARROW_VALIDATION_LEVEL_MINIMAL = 1,
/// \brief Validate all buffer sizes, including those that require buffer data access,
/// but do not perform any checks that are O(1) along the length of the buffers.
NANOARROW_VALIDATION_LEVEL_DEFAULT = 2,
/// \brief Validate all buffer sizes and all buffer content. This is useful in the
/// context of untrusted input or input that may have been corrupted in transit.
NANOARROW_VALIDATION_LEVEL_FULL = 3
};
/// \brief Get a string value of an enum ArrowTimeUnit value
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for time_unit
static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
return "s";
case NANOARROW_TIME_UNIT_MILLI:
return "ms";
case NANOARROW_TIME_UNIT_MICRO:
return "us";
case NANOARROW_TIME_UNIT_NANO:
return "ns";
default:
return NULL;
}
}
/// \brief Functional types of buffers as described in the Arrow Columnar Specification
/// \ingroup nanoarrow-array-view
enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_NONE,
NANOARROW_BUFFER_TYPE_VALIDITY,
NANOARROW_BUFFER_TYPE_TYPE_ID,
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
NANOARROW_BUFFER_TYPE_DATA
};
/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
/// \ingroup nanoarrow-array-view
///
/// All currently supported types have 3 buffers or fewer; however, future types
/// may involve a variable number of buffers (e.g., string view). These buffers
/// will be represented by separate members of the ArrowArrayView or ArrowLayout.
#define NANOARROW_MAX_FIXED_BUFFERS 3
/// \brief An non-owning view of a string
/// \ingroup nanoarrow-utils
struct ArrowStringView {
/// \brief A pointer to the start of the string
///
/// If size_bytes is 0, this value may be NULL.
const char* data;
/// \brief The size of the string in bytes,
///
/// (Not including the null terminator.)
int64_t size_bytes;
};
/// \brief Return a view of a const C string
/// \ingroup nanoarrow-utils
static inline struct ArrowStringView ArrowCharView(const char* value);
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;
out.data = value;
if (value) {
out.size_bytes = (int64_t)strlen(value);
} else {
out.size_bytes = 0;
}
return out;
}
union ArrowBufferViewData {
const void* data;
const int8_t* as_int8;
const uint8_t* as_uint8;
const int16_t* as_int16;
const uint16_t* as_uint16;
const int32_t* as_int32;
const uint32_t* as_uint32;
const int64_t* as_int64;
const uint64_t* as_uint64;
const double* as_double;
const float* as_float;
const char* as_char;
};
/// \brief An non-owning view of a buffer
/// \ingroup nanoarrow-utils
struct ArrowBufferView {
/// \brief A pointer to the start of the buffer
///
/// If size_bytes is 0, this value may be NULL.
union ArrowBufferViewData data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
};
/// \brief Array buffer allocation and deallocation
/// \ingroup nanoarrow-buffer
///
/// Container for allocate, reallocate, and free methods that can be used
/// to customize allocation and deallocation of buffers when constructing
/// an ArrowArray.
struct ArrowBufferAllocator {
/// \brief Reallocate a buffer or return NULL if it cannot be reallocated
uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
int64_t old_size, int64_t new_size);
/// \brief Deallocate a buffer allocated by this allocator
void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size);
/// \brief Opaque data specific to the allocator
void* private_data;
};
typedef void (*ArrowBufferDeallocatorCallback)(struct ArrowBufferAllocator* allocator,
uint8_t* ptr, int64_t size);
/// \brief An owning mutable view of a buffer
/// \ingroup nanoarrow-buffer
struct ArrowBuffer {
/// \brief A pointer to the start of the buffer
///
/// If capacity_bytes is 0, this value may be NULL.
uint8_t* data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
/// \brief The capacity of the buffer in bytes
int64_t capacity_bytes;
/// \brief The allocator that will be used to reallocate and/or free the buffer
struct ArrowBufferAllocator allocator;
};
/// \brief An owning mutable view of a bitmap
/// \ingroup nanoarrow-bitmap
struct ArrowBitmap {
/// \brief An ArrowBuffer to hold the allocated memory
struct ArrowBuffer buffer;
/// \brief The number of bits that have been appended to the bitmap
int64_t size_bits;
};
/// \brief A description of an arrangement of buffers
/// \ingroup nanoarrow-utils
///
/// Contains the minimum amount of information required to
/// calculate the size of each buffer in an ArrowArray knowing only
/// the length and offset of the array.
struct ArrowLayout {
/// \brief The function of each buffer
enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The data type of each buffer
enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The size of an element each buffer or 0 if this size is variable or unknown
int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of elements in the child array per element in this array for a
/// fixed-size list
int64_t child_size_elements;
};
/// \brief A non-owning view of an ArrowArray
/// \ingroup nanoarrow-array-view
///
/// This data structure provides access to the values contained within
/// an ArrowArray with fields provided in a more readily-extractible
/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
/// with the same storage type, use it to represent a hypothetical
/// ArrowArray that does not exist yet, or use it to validate the buffers
/// of a future ArrowArray.
struct ArrowArrayView {
/// \brief The underlying ArrowArray or NULL if it has not been set or
/// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
const struct ArrowArray* array;
/// \brief The number of elements from the physical start of the buffers.
int64_t offset;
/// \brief The number of elements in this view.
int64_t length;
/// \brief A cached null count or -1 to indicate that this value is unknown.
int64_t null_count;
/// \brief The type used to store values in this array
///
/// This type represents only the minimum required information to
/// extract values from the array buffers (e.g., for a Date32 array,
/// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded
/// arrays, this will be the index type.
enum ArrowType storage_type;
/// \brief The buffer types, strides, and sizes of this Array's buffers
struct ArrowLayout layout;
/// \brief This Array's buffers as ArrowBufferView objects
struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of children of this view
int64_t n_children;
/// \brief Pointers to views of this array's children
struct ArrowArrayView** children;
/// \brief Pointer to a view of this array's dictionary
struct ArrowArrayView* dictionary;
/// \brief Union type id to child index mapping
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
/// such that child_index == union_type_id_map[type_id] and
/// type_id == union_type_id_map[128 + child_index]. This value may be
/// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;
};
// Used as the private data member for ArrowArrays allocated here and accessed
// internally within inline ArrowArray* helpers.
struct ArrowArrayPrivateData {
// Holder for the validity buffer (or first buffer for union types, which are
// the only type whose first buffer is not a valdiity buffer)
struct ArrowBitmap bitmap;
// Holder for additional buffers as required
struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1];
// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated uring that time)
const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
// The buffer arrangement for the storage type
struct ArrowLayout layout;
// Flag to indicate if there are non-sequence union type ids.
// In the future this could be replaced with a type id<->child mapping
// to support constructing unions in append mode where type_id != child_index
int8_t union_type_id_is_child_index;
};
/// \brief A representation of an interval.
/// \ingroup nanoarrow-utils
struct ArrowInterval {
/// \brief The type of interval being used
enum ArrowType type;
/// \brief The number of months represented by the interval
int32_t months;
/// \brief The number of days represented by the interval
int32_t days;
/// \brief The number of ms represented by the interval
int32_t ms;
/// \brief The number of ns represented by the interval
int64_t ns;
};
/// \brief Zero initialize an Interval with a given unit
/// \ingroup nanoarrow-utils
static inline void ArrowIntervalInit(struct ArrowInterval* interval,
enum ArrowType type) {
memset(interval, 0, sizeof(struct ArrowInterval));
interval->type = type;
}
/// \brief A representation of a fixed-precision decimal number
/// \ingroup nanoarrow-utils
///
/// This structure should be initialized with ArrowDecimalInit() once and
/// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(),
/// or ArrowDecimalSetBytes256().
struct ArrowDecimal {
/// \brief An array of 64-bit integers of n_words length defined in native-endian order
uint64_t words[4];
/// \brief The number of significant digits this decimal number can represent
int32_t precision;
/// \brief The number of digits after the decimal point. This can be negative.
int32_t scale;
/// \brief The number of words in the words array
int n_words;
/// \brief Cached value used by the implementation
int high_word_index;
/// \brief Cached value used by the implementation
int low_word_index;
};
/// \brief Initialize a decimal with a given set of type parameters
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth,
int32_t precision, int32_t scale) {
memset(decimal->words, 0, sizeof(decimal->words));
decimal->precision = precision;
decimal->scale = scale;
decimal->n_words = bitwidth / 8 / sizeof(uint64_t);
if (_ArrowIsLittleEndian()) {
decimal->low_word_index = 0;
decimal->high_word_index = decimal->n_words - 1;
} else {
decimal->low_word_index = decimal->n_words - 1;
decimal->high_word_index = 0;
}
}
/// \brief Get a signed integer value of a sufficiently small ArrowDecimal
///
/// This does not check if the decimal's precision sufficiently small to fit
/// within the signed 64-bit integer range (A precision less than or equal
/// to 18 is sufficiently small).
static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal* decimal) {
return (int64_t)decimal->words[decimal->low_word_index];
}
/// \brief Copy the bytes of this decimal into a sufficiently large buffer
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal,
uint8_t* out) {
memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t));
}
/// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise
/// \ingroup nanoarrow-utils
static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) {
return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63);
}
/// \brief Sets the integer value of this decimal
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) {
if (value < 0) {
memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t));
} else {
memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t));
}
decimal->words[decimal->low_word_index] = value;
}
/// \brief Negate the value of this decimal in place
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalNegate(struct ArrowDecimal* decimal) {
uint64_t carry = 1;
if (decimal->low_word_index == 0) {
for (int i = 0; i < decimal->n_words; i++) {
uint64_t elem = decimal->words[i];
elem = ~elem + carry;
carry &= (elem == 0);
decimal->words[i] = elem;
}
} else {
for (int i = decimal->low_word_index; i >= 0; i--) {
uint64_t elem = decimal->words[i];
elem = ~elem + carry;
carry &= (elem == 0);
decimal->words[i] = elem;
}
}
}
/// \brief Copy bytes from a buffer into this decimal
/// \ingroup nanoarrow-utils
static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
const uint8_t* value) {
memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t));
}
#ifdef __cplusplus
}
#endif
#endif