| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <errno.h> |
| #include <stdarg.h> |
| #include <stddef.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "nanoarrow.h" |
| |
| const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; } |
| |
| int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; } |
| |
| int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) { |
| if (error == NULL) { |
| return NANOARROW_OK; |
| } |
| |
| memset(error->message, 0, sizeof(error->message)); |
| |
| va_list args; |
| va_start(args, fmt); |
| int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args); |
| va_end(args); |
| |
| if (chars_needed < 0) { |
| return EINVAL; |
| } else if (((size_t)chars_needed) >= sizeof(error->message)) { |
| return ERANGE; |
| } else { |
| return NANOARROW_OK; |
| } |
| } |
| |
| const char* ArrowErrorMessage(struct ArrowError* error) { |
| if (error == NULL) { |
| return ""; |
| } else { |
| return error->message; |
| } |
| } |
| |
| void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) { |
| layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY; |
| layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL; |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA; |
| layout->buffer_data_type[1] = storage_type; |
| layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE; |
| layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED; |
| |
| layout->element_size_bits[0] = 1; |
| layout->element_size_bits[1] = 0; |
| layout->element_size_bits[2] = 0; |
| |
| layout->child_size_elements = 0; |
| |
| switch (storage_type) { |
| case NANOARROW_TYPE_UNINITIALIZED: |
| case NANOARROW_TYPE_NA: |
| layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE; |
| layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED; |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; |
| layout->element_size_bits[0] = 0; |
| break; |
| |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_MAP: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; |
| layout->element_size_bits[1] = 32; |
| break; |
| |
| case NANOARROW_TYPE_LARGE_LIST: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; |
| layout->element_size_bits[1] = 64; |
| break; |
| |
| case NANOARROW_TYPE_STRUCT: |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; |
| break; |
| |
| case NANOARROW_TYPE_BOOL: |
| layout->element_size_bits[1] = 1; |
| break; |
| |
| case NANOARROW_TYPE_UINT8: |
| case NANOARROW_TYPE_INT8: |
| layout->element_size_bits[1] = 8; |
| break; |
| |
| case NANOARROW_TYPE_UINT16: |
| case NANOARROW_TYPE_INT16: |
| case NANOARROW_TYPE_HALF_FLOAT: |
| layout->element_size_bits[1] = 16; |
| break; |
| |
| case NANOARROW_TYPE_UINT32: |
| case NANOARROW_TYPE_INT32: |
| case NANOARROW_TYPE_FLOAT: |
| layout->element_size_bits[1] = 32; |
| break; |
| case NANOARROW_TYPE_INTERVAL_MONTHS: |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; |
| layout->element_size_bits[1] = 32; |
| break; |
| |
| case NANOARROW_TYPE_UINT64: |
| case NANOARROW_TYPE_INT64: |
| case NANOARROW_TYPE_DOUBLE: |
| case NANOARROW_TYPE_INTERVAL_DAY_TIME: |
| layout->element_size_bits[1] = 64; |
| break; |
| |
| case NANOARROW_TYPE_DECIMAL128: |
| case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: |
| layout->element_size_bits[1] = 128; |
| break; |
| |
| case NANOARROW_TYPE_DECIMAL256: |
| layout->element_size_bits[1] = 256; |
| break; |
| |
| case NANOARROW_TYPE_FIXED_SIZE_BINARY: |
| layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY; |
| break; |
| |
| case NANOARROW_TYPE_DENSE_UNION: |
| layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; |
| layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; |
| layout->element_size_bits[0] = 8; |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; |
| layout->element_size_bits[1] = 32; |
| break; |
| |
| case NANOARROW_TYPE_SPARSE_UNION: |
| layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID; |
| layout->buffer_data_type[0] = NANOARROW_TYPE_INT8; |
| layout->element_size_bits[0] = 8; |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED; |
| break; |
| |
| case NANOARROW_TYPE_STRING: |
| case NANOARROW_TYPE_BINARY: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT32; |
| layout->element_size_bits[1] = 32; |
| layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; |
| layout->buffer_data_type[2] = storage_type; |
| break; |
| |
| case NANOARROW_TYPE_LARGE_STRING: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; |
| layout->element_size_bits[1] = 64; |
| layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; |
| layout->buffer_data_type[2] = NANOARROW_TYPE_STRING; |
| break; |
| case NANOARROW_TYPE_LARGE_BINARY: |
| layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET; |
| layout->buffer_data_type[1] = NANOARROW_TYPE_INT64; |
| layout->element_size_bits[1] = 64; |
| layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA; |
| layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| void* ArrowMalloc(int64_t size) { return malloc(size); } |
| |
| void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); } |
| |
| void ArrowFree(void* ptr) { free(ptr); } |
| |
| static uint8_t* ArrowBufferAllocatorMallocReallocate( |
| struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, |
| int64_t new_size) { |
| return (uint8_t*)ArrowRealloc(ptr, new_size); |
| } |
| |
| static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator, |
| uint8_t* ptr, int64_t size) { |
| ArrowFree(ptr); |
| } |
| |
| static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = { |
| &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL}; |
| |
| struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void) { |
| return ArrowBufferAllocatorMalloc; |
| } |
| |
| static uint8_t* ArrowBufferAllocatorNeverReallocate( |
| struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size, |
| int64_t new_size) { |
| return NULL; |
| } |
| |
| struct ArrowBufferAllocator ArrowBufferDeallocator( |
| void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, |
| int64_t size), |
| void* private_data) { |
| struct ArrowBufferAllocator allocator; |
| allocator.reallocate = &ArrowBufferAllocatorNeverReallocate; |
| allocator.free = custom_free; |
| allocator.private_data = private_data; |
| return allocator; |
| } |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <errno.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "nanoarrow.h" |
| |
| static void ArrowSchemaRelease(struct ArrowSchema* schema) { |
| if (schema->format != NULL) ArrowFree((void*)schema->format); |
| if (schema->name != NULL) ArrowFree((void*)schema->name); |
| if (schema->metadata != NULL) ArrowFree((void*)schema->metadata); |
| |
| // This object owns the memory for all the children, but those |
| // children may have been generated elsewhere and might have |
| // their own release() callback. |
| if (schema->children != NULL) { |
| for (int64_t i = 0; i < schema->n_children; i++) { |
| if (schema->children[i] != NULL) { |
| if (schema->children[i]->release != NULL) { |
| schema->children[i]->release(schema->children[i]); |
| } |
| |
| ArrowFree(schema->children[i]); |
| } |
| } |
| |
| ArrowFree(schema->children); |
| } |
| |
| // This object owns the memory for the dictionary but it |
| // may have been generated somewhere else and have its own |
| // release() callback. |
| if (schema->dictionary != NULL) { |
| if (schema->dictionary->release != NULL) { |
| schema->dictionary->release(schema->dictionary); |
| } |
| |
| ArrowFree(schema->dictionary); |
| } |
| |
| // private data not currently used |
| if (schema->private_data != NULL) { |
| ArrowFree(schema->private_data); |
| } |
| |
| schema->release = NULL; |
| } |
| |
| static const char* ArrowSchemaFormatTemplate(enum ArrowType type) { |
| switch (type) { |
| case NANOARROW_TYPE_UNINITIALIZED: |
| return NULL; |
| case NANOARROW_TYPE_NA: |
| return "n"; |
| case NANOARROW_TYPE_BOOL: |
| return "b"; |
| |
| case NANOARROW_TYPE_UINT8: |
| return "C"; |
| case NANOARROW_TYPE_INT8: |
| return "c"; |
| case NANOARROW_TYPE_UINT16: |
| return "S"; |
| case NANOARROW_TYPE_INT16: |
| return "s"; |
| case NANOARROW_TYPE_UINT32: |
| return "I"; |
| case NANOARROW_TYPE_INT32: |
| return "i"; |
| case NANOARROW_TYPE_UINT64: |
| return "L"; |
| case NANOARROW_TYPE_INT64: |
| return "l"; |
| |
| case NANOARROW_TYPE_HALF_FLOAT: |
| return "e"; |
| case NANOARROW_TYPE_FLOAT: |
| return "f"; |
| case NANOARROW_TYPE_DOUBLE: |
| return "g"; |
| |
| case NANOARROW_TYPE_STRING: |
| return "u"; |
| case NANOARROW_TYPE_LARGE_STRING: |
| return "U"; |
| case NANOARROW_TYPE_BINARY: |
| return "z"; |
| case NANOARROW_TYPE_LARGE_BINARY: |
| return "Z"; |
| |
| case NANOARROW_TYPE_DATE32: |
| return "tdD"; |
| case NANOARROW_TYPE_DATE64: |
| return "tdm"; |
| case NANOARROW_TYPE_INTERVAL_MONTHS: |
| return "tiM"; |
| case NANOARROW_TYPE_INTERVAL_DAY_TIME: |
| return "tiD"; |
| case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: |
| return "tin"; |
| |
| case NANOARROW_TYPE_LIST: |
| return "+l"; |
| case NANOARROW_TYPE_LARGE_LIST: |
| return "+L"; |
| case NANOARROW_TYPE_STRUCT: |
| return "+s"; |
| case NANOARROW_TYPE_MAP: |
| return "+m"; |
| |
| default: |
| return NULL; |
| } |
| } |
| |
| static int ArrowSchemaInitChildrenIfNeeded(struct ArrowSchema* schema, |
| enum ArrowType type) { |
| switch (type) { |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_LARGE_LIST: |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); |
| ArrowSchemaInit(schema->children[0]); |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item")); |
| break; |
| case NANOARROW_TYPE_MAP: |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1)); |
| NANOARROW_RETURN_NOT_OK( |
| ArrowSchemaInitFromType(schema->children[0], NANOARROW_TYPE_STRUCT)); |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "entries")); |
| schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE; |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema->children[0], 2)); |
| ArrowSchemaInit(schema->children[0]->children[0]); |
| ArrowSchemaInit(schema->children[0]->children[1]); |
| NANOARROW_RETURN_NOT_OK( |
| ArrowSchemaSetName(schema->children[0]->children[0], "key")); |
| schema->children[0]->children[0]->flags &= ~ARROW_FLAG_NULLABLE; |
| NANOARROW_RETURN_NOT_OK( |
| ArrowSchemaSetName(schema->children[0]->children[1], "value")); |
| break; |
| default: |
| break; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| void ArrowSchemaInit(struct ArrowSchema* schema) { |
| schema->format = NULL; |
| schema->name = NULL; |
| schema->metadata = NULL; |
| schema->flags = ARROW_FLAG_NULLABLE; |
| schema->n_children = 0; |
| schema->children = NULL; |
| schema->dictionary = NULL; |
| schema->private_data = NULL; |
| schema->release = &ArrowSchemaRelease; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type) { |
| // We don't allocate the dictionary because it has to be nullptr |
| // for non-dictionary-encoded arrays. |
| |
| // Set the format to a valid format string for type |
| const char* template_format = ArrowSchemaFormatTemplate(type); |
| |
| // If type isn't recognized and not explicitly unset |
| if (template_format == NULL && type != NANOARROW_TYPE_UNINITIALIZED) { |
| return EINVAL; |
| } |
| |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, template_format)); |
| |
| // For types with an umabiguous child structure, allocate children |
| return ArrowSchemaInitChildrenIfNeeded(schema, type); |
| } |
| |
| ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children) { |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRUCT)); |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); |
| for (int64_t i = 0; i < n_children; i++) { |
| ArrowSchemaInit(schema->children[i]); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type) { |
| ArrowSchemaInit(schema); |
| |
| int result = ArrowSchemaSetType(schema, type); |
| if (result != NANOARROW_OK) { |
| schema->release(schema); |
| return result; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, |
| enum ArrowType type, int32_t fixed_size) { |
| if (fixed_size <= 0) { |
| return EINVAL; |
| } |
| |
| char buffer[64]; |
| int n_chars; |
| switch (type) { |
| case NANOARROW_TYPE_FIXED_SIZE_BINARY: |
| n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size); |
| break; |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size); |
| break; |
| default: |
| return EINVAL; |
| } |
| |
| buffer[n_chars] = '\0'; |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer)); |
| |
| if (type == NANOARROW_TYPE_FIXED_SIZE_LIST) { |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaInitChildrenIfNeeded(schema, type)); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, |
| int32_t decimal_precision, |
| int32_t decimal_scale) { |
| if (decimal_precision <= 0) { |
| return EINVAL; |
| } |
| |
| char buffer[64]; |
| int n_chars; |
| switch (type) { |
| case NANOARROW_TYPE_DECIMAL128: |
| n_chars = |
| snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale); |
| break; |
| case NANOARROW_TYPE_DECIMAL256: |
| n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision, |
| decimal_scale); |
| break; |
| default: |
| return EINVAL; |
| } |
| |
| buffer[n_chars] = '\0'; |
| return ArrowSchemaSetFormat(schema, buffer); |
| } |
| |
| static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) { |
| switch (time_unit) { |
| case NANOARROW_TIME_UNIT_SECOND: |
| return "s"; |
| case NANOARROW_TIME_UNIT_MILLI: |
| return "m"; |
| case NANOARROW_TIME_UNIT_MICRO: |
| return "u"; |
| case NANOARROW_TIME_UNIT_NANO: |
| return "n"; |
| default: |
| return NULL; |
| } |
| } |
| |
| ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, |
| enum ArrowTimeUnit time_unit, |
| const char* timezone) { |
| const char* time_unit_str = ArrowTimeUnitFormatString(time_unit); |
| if (time_unit_str == NULL) { |
| return EINVAL; |
| } |
| |
| char buffer[128]; |
| int n_chars; |
| switch (type) { |
| case NANOARROW_TYPE_TIME32: |
| case NANOARROW_TYPE_TIME64: |
| if (timezone != NULL) { |
| return EINVAL; |
| } |
| n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str); |
| break; |
| case NANOARROW_TYPE_TIMESTAMP: |
| if (timezone == NULL) { |
| timezone = ""; |
| } |
| n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone); |
| break; |
| case NANOARROW_TYPE_DURATION: |
| if (timezone != NULL) { |
| return EINVAL; |
| } |
| n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str); |
| break; |
| default: |
| return EINVAL; |
| } |
| |
| if (((size_t)n_chars) >= sizeof(buffer)) { |
| return ERANGE; |
| } |
| |
| buffer[n_chars] = '\0'; |
| |
| return ArrowSchemaSetFormat(schema, buffer); |
| } |
| |
| ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, |
| int64_t n_children) { |
| if (n_children < 0 || n_children > 127) { |
| return EINVAL; |
| } |
| |
| // Max valid size would be +ud:0,1,...126 = 401 characters + null terminator |
| char format_out[512]; |
| int64_t format_out_size = 512; |
| memset(format_out, 0, format_out_size); |
| int n_chars; |
| char* format_cursor = format_out; |
| |
| switch (type) { |
| case NANOARROW_TYPE_SPARSE_UNION: |
| n_chars = snprintf(format_cursor, format_out_size, "+us:"); |
| format_cursor += n_chars; |
| format_out_size -= n_chars; |
| break; |
| case NANOARROW_TYPE_DENSE_UNION: |
| n_chars = snprintf(format_cursor, format_out_size, "+ud:"); |
| format_cursor += n_chars; |
| format_out_size -= n_chars; |
| break; |
| default: |
| return EINVAL; |
| } |
| |
| if (n_children > 0) { |
| n_chars = snprintf(format_cursor, format_out_size, "0"); |
| format_cursor += n_chars; |
| format_out_size -= n_chars; |
| |
| for (int64_t i = 1; i < n_children; i++) { |
| n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i); |
| format_cursor += n_chars; |
| format_out_size -= n_chars; |
| } |
| } |
| |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out)); |
| |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children)); |
| for (int64_t i = 0; i < n_children; i++) { |
| ArrowSchemaInit(schema->children[i]); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) { |
| if (schema->format != NULL) { |
| ArrowFree((void*)schema->format); |
| } |
| |
| if (format != NULL) { |
| size_t format_size = strlen(format) + 1; |
| schema->format = (const char*)ArrowMalloc(format_size); |
| if (schema->format == NULL) { |
| return ENOMEM; |
| } |
| |
| memcpy((void*)schema->format, format, format_size); |
| } else { |
| schema->format = NULL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) { |
| if (schema->name != NULL) { |
| ArrowFree((void*)schema->name); |
| } |
| |
| if (name != NULL) { |
| size_t name_size = strlen(name) + 1; |
| schema->name = (const char*)ArrowMalloc(name_size); |
| if (schema->name == NULL) { |
| return ENOMEM; |
| } |
| |
| memcpy((void*)schema->name, name, name_size); |
| } else { |
| schema->name = NULL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) { |
| if (schema->metadata != NULL) { |
| ArrowFree((void*)schema->metadata); |
| } |
| |
| if (metadata != NULL) { |
| size_t metadata_size = ArrowMetadataSizeOf(metadata); |
| schema->metadata = (const char*)ArrowMalloc(metadata_size); |
| if (schema->metadata == NULL) { |
| return ENOMEM; |
| } |
| |
| memcpy((void*)schema->metadata, metadata, metadata_size); |
| } else { |
| schema->metadata = NULL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, |
| int64_t n_children) { |
| if (schema->children != NULL) { |
| return EEXIST; |
| } |
| |
| if (n_children > 0) { |
| schema->children = |
| (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*)); |
| |
| if (schema->children == NULL) { |
| return ENOMEM; |
| } |
| |
| schema->n_children = n_children; |
| |
| memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*)); |
| |
| for (int64_t i = 0; i < n_children; i++) { |
| schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); |
| |
| if (schema->children[i] == NULL) { |
| return ENOMEM; |
| } |
| |
| schema->children[i]->release = NULL; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) { |
| if (schema->dictionary != NULL) { |
| return EEXIST; |
| } |
| |
| schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); |
| if (schema->dictionary == NULL) { |
| return ENOMEM; |
| } |
| |
| schema->dictionary->release = NULL; |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, |
| struct ArrowSchema* schema_out) { |
| ArrowSchemaInit(schema_out); |
| |
| int result = ArrowSchemaSetFormat(schema_out, schema->format); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| |
| schema_out->flags = schema->flags; |
| |
| result = ArrowSchemaSetName(schema_out, schema->name); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| |
| result = ArrowSchemaSetMetadata(schema_out, schema->metadata); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| |
| result = ArrowSchemaAllocateChildren(schema_out, schema->n_children); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| |
| for (int64_t i = 0; i < schema->n_children; i++) { |
| result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| } |
| |
| if (schema->dictionary != NULL) { |
| result = ArrowSchemaAllocateDictionary(schema_out); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| |
| result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary); |
| if (result != NANOARROW_OK) { |
| schema_out->release(schema_out); |
| return result; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view, |
| enum ArrowType type) { |
| schema_view->type = type; |
| schema_view->storage_type = type; |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view, |
| const char* format, |
| const char** format_end_out, |
| struct ArrowError* error) { |
| *format_end_out = format; |
| |
| // needed for decimal parsing |
| const char* parse_start; |
| char* parse_end; |
| |
| switch (format[0]) { |
| case 'n': |
| schema_view->type = NANOARROW_TYPE_NA; |
| schema_view->storage_type = NANOARROW_TYPE_NA; |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'b': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'c': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'C': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 's': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'S': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'i': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'I': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'l': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'L': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'e': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'f': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'g': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE); |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| |
| // decimal |
| case 'd': |
| if (format[1] != ':' || format[2] == '\0') { |
| ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'", |
| format + 3); |
| return EINVAL; |
| } |
| |
| parse_start = format + 2; |
| schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10); |
| if (parse_end == parse_start || parse_end[0] != ',') { |
| ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'"); |
| return EINVAL; |
| } |
| |
| parse_start = parse_end + 1; |
| schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10); |
| if (parse_end == parse_start) { |
| ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'"); |
| return EINVAL; |
| } else if (parse_end[0] != ',') { |
| schema_view->decimal_bitwidth = 128; |
| } else { |
| parse_start = parse_end + 1; |
| schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10); |
| if (parse_start == parse_end) { |
| ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'"); |
| return EINVAL; |
| } |
| } |
| |
| *format_end_out = parse_end; |
| |
| switch (schema_view->decimal_bitwidth) { |
| case 128: |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128); |
| return NANOARROW_OK; |
| case 256: |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256); |
| return NANOARROW_OK; |
| default: |
| ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but found %d", |
| (int)schema_view->decimal_bitwidth); |
| return EINVAL; |
| } |
| |
| // validity + data |
| case 'w': |
| schema_view->type = NANOARROW_TYPE_FIXED_SIZE_BINARY; |
| schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_BINARY; |
| if (format[1] != ':' || format[2] == '\0') { |
| ArrowErrorSet(error, "Expected ':<width>' following 'w'"); |
| return EINVAL; |
| } |
| |
| schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10); |
| return NANOARROW_OK; |
| |
| // validity + offset + data |
| case 'z': |
| schema_view->type = NANOARROW_TYPE_BINARY; |
| schema_view->storage_type = NANOARROW_TYPE_BINARY; |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'u': |
| schema_view->type = NANOARROW_TYPE_STRING; |
| schema_view->storage_type = NANOARROW_TYPE_STRING; |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| |
| // validity + large_offset + data |
| case 'Z': |
| schema_view->type = NANOARROW_TYPE_LARGE_BINARY; |
| schema_view->storage_type = NANOARROW_TYPE_LARGE_BINARY; |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| case 'U': |
| schema_view->type = NANOARROW_TYPE_LARGE_STRING; |
| schema_view->storage_type = NANOARROW_TYPE_LARGE_STRING; |
| *format_end_out = format + 1; |
| return NANOARROW_OK; |
| |
| // nested types |
| case '+': |
| switch (format[1]) { |
| // list has validity + offset or offset |
| case 'l': |
| schema_view->storage_type = NANOARROW_TYPE_LIST; |
| schema_view->type = NANOARROW_TYPE_LIST; |
| *format_end_out = format + 2; |
| return NANOARROW_OK; |
| |
| // large list has validity + large_offset or large_offset |
| case 'L': |
| schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST; |
| schema_view->type = NANOARROW_TYPE_LARGE_LIST; |
| *format_end_out = format + 2; |
| return NANOARROW_OK; |
| |
| // just validity buffer |
| case 'w': |
| if (format[2] != ':' || format[3] == '\0') { |
| ArrowErrorSet(error, "Expected ':<width>' following '+w'"); |
| return EINVAL; |
| } |
| |
| schema_view->storage_type = NANOARROW_TYPE_FIXED_SIZE_LIST; |
| schema_view->type = NANOARROW_TYPE_FIXED_SIZE_LIST; |
| schema_view->fixed_size = |
| (int32_t)strtol(format + 3, (char**)format_end_out, 10); |
| return NANOARROW_OK; |
| case 's': |
| schema_view->storage_type = NANOARROW_TYPE_STRUCT; |
| schema_view->type = NANOARROW_TYPE_STRUCT; |
| *format_end_out = format + 2; |
| return NANOARROW_OK; |
| case 'm': |
| schema_view->storage_type = NANOARROW_TYPE_MAP; |
| schema_view->type = NANOARROW_TYPE_MAP; |
| *format_end_out = format + 2; |
| return NANOARROW_OK; |
| |
| // unions |
| case 'u': |
| switch (format[2]) { |
| case 'd': |
| schema_view->storage_type = NANOARROW_TYPE_DENSE_UNION; |
| schema_view->type = NANOARROW_TYPE_DENSE_UNION; |
| break; |
| case 's': |
| schema_view->storage_type = NANOARROW_TYPE_SPARSE_UNION; |
| schema_view->type = NANOARROW_TYPE_SPARSE_UNION; |
| break; |
| default: |
| ArrowErrorSet(error, |
| "Expected union format string +us:<type_ids> or " |
| "+ud:<type_ids> but found '%s'", |
| format); |
| return EINVAL; |
| } |
| |
| if (format[3] == ':') { |
| schema_view->union_type_ids = format + 4; |
| int64_t n_type_ids = |
| _ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL); |
| if (n_type_ids != schema_view->schema->n_children) { |
| ArrowErrorSet( |
| error, |
| "Expected union type_ids parameter to be a comma-separated list of %ld " |
| "values between 0 and 127 but found '%s'", |
| (long)schema_view->schema->n_children, schema_view->union_type_ids); |
| return EINVAL; |
| } |
| *format_end_out = format + strlen(format); |
| return NANOARROW_OK; |
| } else { |
| ArrowErrorSet(error, |
| "Expected union format string +us:<type_ids> or +ud:<type_ids> " |
| "but found '%s'", |
| format); |
| return EINVAL; |
| } |
| |
| default: |
| ArrowErrorSet(error, "Expected nested type format string but found '%s'", |
| format); |
| return EINVAL; |
| } |
| |
| // date/time types |
| case 't': |
| switch (format[1]) { |
| // date |
| case 'd': |
| switch (format[2]) { |
| case 'D': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); |
| schema_view->type = NANOARROW_TYPE_DATE32; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'm': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_DATE64; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| default: |
| ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'", |
| format + 2); |
| return EINVAL; |
| } |
| |
| // time of day |
| case 't': |
| switch (format[2]) { |
| case 's': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); |
| schema_view->type = NANOARROW_TYPE_TIME32; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'm': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32); |
| schema_view->type = NANOARROW_TYPE_TIME32; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'u': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIME64; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'n': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIME64; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| default: |
| ArrowErrorSet( |
| error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'", |
| format + 2); |
| return EINVAL; |
| } |
| |
| // timestamp |
| case 's': |
| switch (format[2]) { |
| case 's': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIMESTAMP; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; |
| break; |
| case 'm': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIMESTAMP; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; |
| break; |
| case 'u': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIMESTAMP; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; |
| break; |
| case 'n': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_TIMESTAMP; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; |
| break; |
| default: |
| ArrowErrorSet( |
| error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'", |
| format + 2); |
| return EINVAL; |
| } |
| |
| if (format[3] != ':') { |
| ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format, |
| format + 3); |
| return EINVAL; |
| } |
| |
| schema_view->timezone = format + 4; |
| *format_end_out = format + strlen(format); |
| return NANOARROW_OK; |
| |
| // duration |
| case 'D': |
| switch (format[2]) { |
| case 's': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_DURATION; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'm': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_DURATION; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'u': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_DURATION; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'n': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64); |
| schema_view->type = NANOARROW_TYPE_DURATION; |
| schema_view->time_unit = NANOARROW_TIME_UNIT_NANO; |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| default: |
| ArrowErrorSet(error, |
| "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'", |
| format + 2); |
| return EINVAL; |
| } |
| |
| // interval |
| case 'i': |
| switch (format[2]) { |
| case 'M': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS); |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'D': |
| ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME); |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| case 'n': |
| ArrowSchemaViewSetPrimitive(schema_view, |
| NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO); |
| *format_end_out = format + 3; |
| return NANOARROW_OK; |
| default: |
| ArrowErrorSet(error, |
| "Expected 'M', 'D', or 'n' following 'ti' but found '%s'", |
| format + 2); |
| return EINVAL; |
| } |
| |
| default: |
| ArrowErrorSet( |
| error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'", |
| format + 1); |
| return EINVAL; |
| } |
| |
| default: |
| ArrowErrorSet(error, "Unknown format: '%s'", format); |
| return EINVAL; |
| } |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewValidateNChildren( |
| struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) { |
| if (n_children != -1 && schema_view->schema->n_children != n_children) { |
| ArrowErrorSet(error, "Expected schema with %d children but found %d children", |
| (int)n_children, (int)schema_view->schema->n_children); |
| return EINVAL; |
| } |
| |
| // Don't do a full validation of children but do check that they won't |
| // segfault if inspected |
| struct ArrowSchema* child; |
| for (int64_t i = 0; i < schema_view->schema->n_children; i++) { |
| child = schema_view->schema->children[i]; |
| if (child == NULL) { |
| ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but found NULL", |
| i); |
| return EINVAL; |
| } else if (child->release == NULL) { |
| ArrowErrorSet( |
| error, |
| "Expected valid schema at schema->children[%d] but found a released schema", i); |
| return EINVAL; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view, |
| struct ArrowError* error) { |
| return ArrowSchemaViewValidateNChildren(schema_view, -1, error); |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view, |
| struct ArrowError* error) { |
| NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error)); |
| |
| if (schema_view->schema->children[0]->n_children != 2) { |
| ArrowErrorSet(error, "Expected child of map type to have 2 children but found %d", |
| (int)schema_view->schema->children[0]->n_children); |
| return EINVAL; |
| } |
| |
| if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) { |
| ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'", |
| schema_view->schema->children[0]->format); |
| return EINVAL; |
| } |
| |
| if (schema_view->schema->children[0]->flags & ARROW_FLAG_NULLABLE) { |
| ArrowErrorSet(error, |
| "Expected child of map type to be non-nullable but was nullable"); |
| return EINVAL; |
| } |
| |
| if (schema_view->schema->children[0]->children[0]->flags & ARROW_FLAG_NULLABLE) { |
| ArrowErrorSet(error, "Expected key of map type to be non-nullable but was nullable"); |
| return EINVAL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewValidateDictionary( |
| struct ArrowSchemaView* schema_view, struct ArrowError* error) { |
| // check for valid index type |
| switch (schema_view->storage_type) { |
| case NANOARROW_TYPE_UINT8: |
| case NANOARROW_TYPE_INT8: |
| case NANOARROW_TYPE_UINT16: |
| case NANOARROW_TYPE_INT16: |
| case NANOARROW_TYPE_UINT32: |
| case NANOARROW_TYPE_INT32: |
| case NANOARROW_TYPE_UINT64: |
| case NANOARROW_TYPE_INT64: |
| break; |
| default: |
| ArrowErrorSet( |
| error, |
| "Expected dictionary schema index type to be an integral type but found '%s'", |
| schema_view->schema->format); |
| return EINVAL; |
| } |
| |
| struct ArrowSchemaView dictionary_schema_view; |
| return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary, |
| error); |
| } |
| |
| static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view, |
| enum ArrowType type, |
| struct ArrowError* error) { |
| switch (type) { |
| case NANOARROW_TYPE_NA: |
| case NANOARROW_TYPE_BOOL: |
| case NANOARROW_TYPE_UINT8: |
| case NANOARROW_TYPE_INT8: |
| case NANOARROW_TYPE_UINT16: |
| case NANOARROW_TYPE_INT16: |
| case NANOARROW_TYPE_UINT32: |
| case NANOARROW_TYPE_INT32: |
| case NANOARROW_TYPE_UINT64: |
| case NANOARROW_TYPE_INT64: |
| case NANOARROW_TYPE_HALF_FLOAT: |
| case NANOARROW_TYPE_FLOAT: |
| case NANOARROW_TYPE_DOUBLE: |
| case NANOARROW_TYPE_DECIMAL128: |
| case NANOARROW_TYPE_DECIMAL256: |
| case NANOARROW_TYPE_STRING: |
| case NANOARROW_TYPE_LARGE_STRING: |
| case NANOARROW_TYPE_BINARY: |
| case NANOARROW_TYPE_LARGE_BINARY: |
| case NANOARROW_TYPE_DATE32: |
| case NANOARROW_TYPE_DATE64: |
| case NANOARROW_TYPE_INTERVAL_MONTHS: |
| case NANOARROW_TYPE_INTERVAL_DAY_TIME: |
| case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: |
| case NANOARROW_TYPE_TIMESTAMP: |
| case NANOARROW_TYPE_TIME32: |
| case NANOARROW_TYPE_TIME64: |
| case NANOARROW_TYPE_DURATION: |
| return ArrowSchemaViewValidateNChildren(schema_view, 0, error); |
| |
| case NANOARROW_TYPE_FIXED_SIZE_BINARY: |
| if (schema_view->fixed_size <= 0) { |
| ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d", |
| schema_view->fixed_size); |
| return EINVAL; |
| } |
| return ArrowSchemaViewValidateNChildren(schema_view, 0, error); |
| |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_LARGE_LIST: |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| return ArrowSchemaViewValidateNChildren(schema_view, 1, error); |
| |
| case NANOARROW_TYPE_STRUCT: |
| return ArrowSchemaViewValidateNChildren(schema_view, -1, error); |
| |
| case NANOARROW_TYPE_SPARSE_UNION: |
| case NANOARROW_TYPE_DENSE_UNION: |
| return ArrowSchemaViewValidateUnion(schema_view, error); |
| |
| case NANOARROW_TYPE_MAP: |
| return ArrowSchemaViewValidateMap(schema_view, error); |
| |
| case NANOARROW_TYPE_DICTIONARY: |
| return ArrowSchemaViewValidateDictionary(schema_view, error); |
| |
| default: |
| ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d", |
| (int)schema_view->type); |
| return EINVAL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, |
| struct ArrowSchema* schema, struct ArrowError* error) { |
| if (schema == NULL) { |
| ArrowErrorSet(error, "Expected non-NULL schema"); |
| return EINVAL; |
| } |
| |
| if (schema->release == NULL) { |
| ArrowErrorSet(error, "Expected non-released schema"); |
| return EINVAL; |
| } |
| |
| schema_view->schema = schema; |
| |
| const char* format = schema->format; |
| if (format == NULL) { |
| ArrowErrorSet( |
| error, |
| "Error parsing schema->format: Expected a null-terminated string but found NULL"); |
| return EINVAL; |
| } |
| |
| size_t format_len = strlen(format); |
| if (format_len == 0) { |
| ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0"); |
| return EINVAL; |
| } |
| |
| const char* format_end_out; |
| ArrowErrorCode result = |
| ArrowSchemaViewParse(schema_view, format, &format_end_out, error); |
| |
| if (result != NANOARROW_OK) { |
| if (error != NULL) { |
| char child_error[1024]; |
| memcpy(child_error, ArrowErrorMessage(error), 1024); |
| ArrowErrorSet(error, "Error parsing schema->format: %s", child_error); |
| } |
| |
| return result; |
| } |
| |
| if ((format + format_len) != format_end_out) { |
| ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d characters", |
| format, (int)(format_end_out - format), (int)(format_len)); |
| return EINVAL; |
| } |
| |
| if (schema->dictionary != NULL) { |
| schema_view->type = NANOARROW_TYPE_DICTIONARY; |
| } |
| |
| result = ArrowSchemaViewValidate(schema_view, schema_view->storage_type, error); |
| if (result != NANOARROW_OK) { |
| return result; |
| } |
| |
| if (schema_view->storage_type != schema_view->type) { |
| result = ArrowSchemaViewValidate(schema_view, schema_view->type, error); |
| if (result != NANOARROW_OK) { |
| return result; |
| } |
| } |
| |
| ArrowLayoutInit(&schema_view->layout, schema_view->storage_type); |
| if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) { |
| schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8; |
| } else if (schema_view->storage_type == NANOARROW_TYPE_FIXED_SIZE_LIST) { |
| schema_view->layout.child_size_elements = schema_view->fixed_size; |
| } |
| |
| schema_view->extension_name = ArrowCharView(NULL); |
| schema_view->extension_metadata = ArrowCharView(NULL); |
| ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"), |
| &schema_view->extension_name); |
| ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"), |
| &schema_view->extension_metadata); |
| |
| return NANOARROW_OK; |
| } |
| |
| static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view, |
| char* out, int64_t n) { |
| const char* type_string = ArrowTypeString(schema_view->type); |
| switch (schema_view->type) { |
| case NANOARROW_TYPE_DECIMAL128: |
| case NANOARROW_TYPE_DECIMAL256: |
| return snprintf(out, n, "%s(%d, %d)", type_string, |
| (int)schema_view->decimal_precision, |
| (int)schema_view->decimal_scale); |
| case NANOARROW_TYPE_TIMESTAMP: |
| return snprintf(out, n, "%s('%s', '%s')", type_string, |
| ArrowTimeUnitString(schema_view->time_unit), schema_view->timezone); |
| case NANOARROW_TYPE_TIME32: |
| case NANOARROW_TYPE_TIME64: |
| case NANOARROW_TYPE_DURATION: |
| return snprintf(out, n, "%s('%s')", type_string, |
| ArrowTimeUnitString(schema_view->time_unit)); |
| case NANOARROW_TYPE_FIXED_SIZE_BINARY: |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| return snprintf(out, n, "%s(%ld)", type_string, (long)schema_view->fixed_size); |
| case NANOARROW_TYPE_SPARSE_UNION: |
| case NANOARROW_TYPE_DENSE_UNION: |
| return snprintf(out, n, "%s([%s])", type_string, schema_view->union_type_ids); |
| default: |
| return snprintf(out, n, "%s", type_string); |
| } |
| } |
| |
| // Helper for bookkeeping to emulate sprintf()-like behaviour spread |
| // among multiple sprintf calls. |
| static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last, |
| int64_t* n_remaining, int64_t* n_chars) { |
| *n_chars += n_chars_last; |
| *n_remaining -= n_chars_last; |
| |
| // n_remaining is never less than 0 |
| if (*n_remaining < 0) { |
| *n_remaining = 0; |
| } |
| |
| // Can't do math on a NULL pointer |
| if (*out != NULL) { |
| *out += n_chars_last; |
| } |
| } |
| |
| int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n, |
| char recursive) { |
| if (schema == NULL) { |
| return snprintf(out, n, "[invalid: pointer is null]"); |
| } |
| |
| if (schema->release == NULL) { |
| return snprintf(out, n, "[invalid: schema is released]"); |
| } |
| |
| struct ArrowSchemaView schema_view; |
| struct ArrowError error; |
| |
| if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) { |
| return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error)); |
| } |
| |
| // Extension type and dictionary should include both the top-level type |
| // and the storage type. |
| int is_extension = schema_view.extension_name.size_bytes > 0; |
| int is_dictionary = schema->dictionary != NULL; |
| int64_t n_chars = 0; |
| int64_t n_chars_last = 0; |
| |
| // Uncommon but not technically impossible that both are true |
| if (is_extension && is_dictionary) { |
| n_chars_last = snprintf( |
| out, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.size_bytes, |
| schema_view.extension_name.data, ArrowTypeString(schema_view.storage_type)); |
| } else if (is_extension) { |
| n_chars_last = snprintf(out, n, "%.*s{", (int)schema_view.extension_name.size_bytes, |
| schema_view.extension_name.data); |
| } else if (is_dictionary) { |
| n_chars_last = |
| snprintf(out, n, "dictionary(%s)<", ArrowTypeString(schema_view.storage_type)); |
| } |
| |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| |
| if (!is_dictionary) { |
| n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n); |
| } else { |
| n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive); |
| } |
| |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| |
| if (recursive && schema->format[0] == '+') { |
| n_chars_last = snprintf(out, n, "<"); |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| |
| for (int64_t i = 0; i < schema->n_children; i++) { |
| if (i > 0) { |
| n_chars_last = snprintf(out, n, ", "); |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| } |
| |
| // ArrowSchemaToStringInternal() will validate the child and print the error, |
| // but we need the name first |
| if (schema->children[i] != NULL && schema->children[i]->release != NULL && |
| schema->children[i]->name != NULL) { |
| n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name); |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| } |
| |
| n_chars_last = ArrowSchemaToString(schema->children[i], out, n, recursive); |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| } |
| |
| n_chars_last = snprintf(out, n, ">"); |
| ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars); |
| } |
| |
| if (is_extension && is_dictionary) { |
| n_chars += snprintf(out, n, ">}"); |
| } else if (is_extension) { |
| n_chars += snprintf(out, n, "}"); |
| } else if (is_dictionary) { |
| n_chars += snprintf(out, n, ">"); |
| } |
| |
| return n_chars; |
| } |
| |
| ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, |
| const char* metadata) { |
| reader->metadata = metadata; |
| |
| if (reader->metadata == NULL) { |
| reader->offset = 0; |
| reader->remaining_keys = 0; |
| } else { |
| memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t)); |
| reader->offset = sizeof(int32_t); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, |
| struct ArrowStringView* key_out, |
| struct ArrowStringView* value_out) { |
| if (reader->remaining_keys <= 0) { |
| return EINVAL; |
| } |
| |
| int64_t pos = 0; |
| |
| int32_t key_size; |
| memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); |
| pos += sizeof(int32_t); |
| |
| key_out->data = reader->metadata + reader->offset + pos; |
| key_out->size_bytes = key_size; |
| pos += key_size; |
| |
| int32_t value_size; |
| memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t)); |
| pos += sizeof(int32_t); |
| |
| value_out->data = reader->metadata + reader->offset + pos; |
| value_out->size_bytes = value_size; |
| pos += value_size; |
| |
| reader->offset += pos; |
| reader->remaining_keys--; |
| return NANOARROW_OK; |
| } |
| |
| int64_t ArrowMetadataSizeOf(const char* metadata) { |
| if (metadata == NULL) { |
| return 0; |
| } |
| |
| struct ArrowMetadataReader reader; |
| struct ArrowStringView key; |
| struct ArrowStringView value; |
| ArrowMetadataReaderInit(&reader, metadata); |
| |
| int64_t size = sizeof(int32_t); |
| while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) { |
| size += sizeof(int32_t) + key.size_bytes + sizeof(int32_t) + value.size_bytes; |
| } |
| |
| return size; |
| } |
| |
| static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata, |
| struct ArrowStringView* key, |
| struct ArrowStringView* value_out) { |
| struct ArrowMetadataReader reader; |
| struct ArrowStringView existing_key; |
| struct ArrowStringView existing_value; |
| ArrowMetadataReaderInit(&reader, metadata); |
| |
| while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) == |
| NANOARROW_OK) { |
| int key_equal = key->size_bytes == existing_key.size_bytes && |
| strncmp(key->data, existing_key.data, existing_key.size_bytes) == 0; |
| if (key_equal) { |
| value_out->data = existing_value.data; |
| value_out->size_bytes = existing_value.size_bytes; |
| break; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, |
| struct ArrowStringView* value_out) { |
| if (value_out == NULL) { |
| return EINVAL; |
| } |
| |
| return ArrowMetadataGetValueInternal(metadata, &key, value_out); |
| } |
| |
| char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) { |
| struct ArrowStringView value = ArrowCharView(NULL); |
| ArrowMetadataGetValue(metadata, key, &value); |
| return value.data != NULL; |
| } |
| |
| ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, |
| const char* metadata) { |
| ArrowBufferInit(buffer); |
| return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata)); |
| } |
| |
| static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer, |
| struct ArrowStringView* key, |
| struct ArrowStringView* value) { |
| if (value == NULL) { |
| return NANOARROW_OK; |
| } |
| |
| if (buffer->capacity_bytes == 0) { |
| NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0)); |
| } |
| |
| if (((size_t)buffer->capacity_bytes) < sizeof(int32_t)) { |
| return EINVAL; |
| } |
| |
| int32_t n_keys; |
| memcpy(&n_keys, buffer->data, sizeof(int32_t)); |
| |
| int32_t key_size = (int32_t)key->size_bytes; |
| int32_t value_size = (int32_t)value->size_bytes; |
| NANOARROW_RETURN_NOT_OK(ArrowBufferReserve( |
| buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size)); |
| |
| ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t)); |
| ArrowBufferAppendUnsafe(buffer, key->data, key_size); |
| ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t)); |
| ArrowBufferAppendUnsafe(buffer, value->data, value_size); |
| |
| n_keys++; |
| memcpy(buffer->data, &n_keys, sizeof(int32_t)); |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer, |
| struct ArrowStringView* key, |
| struct ArrowStringView* value) { |
| // Inspect the current value to see if we can avoid copying the buffer |
| struct ArrowStringView current_value = ArrowCharView(NULL); |
| NANOARROW_RETURN_NOT_OK( |
| ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value)); |
| |
| // The key should be removed but no key exists |
| if (value == NULL && current_value.data == NULL) { |
| return NANOARROW_OK; |
| } |
| |
| // The key/value can be appended because no key exists |
| if (value != NULL && current_value.data == NULL) { |
| return ArrowMetadataBuilderAppendInternal(buffer, key, value); |
| } |
| |
| struct ArrowMetadataReader reader; |
| struct ArrowStringView existing_key; |
| struct ArrowStringView existing_value; |
| NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data)); |
| |
| struct ArrowBuffer new_buffer; |
| NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL)); |
| |
| while (reader.remaining_keys > 0) { |
| int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value); |
| if (result != NANOARROW_OK) { |
| ArrowBufferReset(&new_buffer); |
| return result; |
| } |
| |
| if (key->size_bytes == existing_key.size_bytes && |
| strncmp((const char*)key->data, (const char*)existing_key.data, |
| existing_key.size_bytes) == 0) { |
| result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value); |
| value = NULL; |
| } else { |
| result = |
| ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value); |
| } |
| |
| if (result != NANOARROW_OK) { |
| ArrowBufferReset(&new_buffer); |
| return result; |
| } |
| } |
| |
| ArrowBufferReset(buffer); |
| ArrowBufferMove(&new_buffer, buffer); |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, |
| struct ArrowStringView key, |
| struct ArrowStringView value) { |
| return ArrowMetadataBuilderAppendInternal(buffer, &key, &value); |
| } |
| |
| ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, |
| struct ArrowStringView key, |
| struct ArrowStringView value) { |
| return ArrowMetadataBuilderSetInternal(buffer, &key, &value); |
| } |
| |
| ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, |
| struct ArrowStringView key) { |
| return ArrowMetadataBuilderSetInternal(buffer, &key, NULL); |
| } |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <errno.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "nanoarrow.h" |
| |
| static void ArrowArrayRelease(struct ArrowArray* array) { |
| // Release buffers held by this array |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| if (private_data != NULL) { |
| ArrowBitmapReset(&private_data->bitmap); |
| ArrowBufferReset(&private_data->buffers[0]); |
| ArrowBufferReset(&private_data->buffers[1]); |
| ArrowFree(private_data); |
| } |
| |
| // This object owns the memory for all the children, but those |
| // children may have been generated elsewhere and might have |
| // their own release() callback. |
| if (array->children != NULL) { |
| for (int64_t i = 0; i < array->n_children; i++) { |
| if (array->children[i] != NULL) { |
| if (array->children[i]->release != NULL) { |
| array->children[i]->release(array->children[i]); |
| } |
| |
| ArrowFree(array->children[i]); |
| } |
| } |
| |
| ArrowFree(array->children); |
| } |
| |
| // This object owns the memory for the dictionary but it |
| // may have been generated somewhere else and have its own |
| // release() callback. |
| if (array->dictionary != NULL) { |
| if (array->dictionary->release != NULL) { |
| array->dictionary->release(array->dictionary); |
| } |
| |
| ArrowFree(array->dictionary); |
| } |
| |
| // Mark released |
| array->release = NULL; |
| } |
| |
| static ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, |
| enum ArrowType storage_type) { |
| switch (storage_type) { |
| case NANOARROW_TYPE_UNINITIALIZED: |
| case NANOARROW_TYPE_NA: |
| array->n_buffers = 0; |
| break; |
| |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| case NANOARROW_TYPE_STRUCT: |
| case NANOARROW_TYPE_SPARSE_UNION: |
| array->n_buffers = 1; |
| break; |
| |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_LARGE_LIST: |
| case NANOARROW_TYPE_MAP: |
| case NANOARROW_TYPE_BOOL: |
| case NANOARROW_TYPE_UINT8: |
| case NANOARROW_TYPE_INT8: |
| case NANOARROW_TYPE_UINT16: |
| case NANOARROW_TYPE_INT16: |
| case NANOARROW_TYPE_UINT32: |
| case NANOARROW_TYPE_INT32: |
| case NANOARROW_TYPE_UINT64: |
| case NANOARROW_TYPE_INT64: |
| case NANOARROW_TYPE_HALF_FLOAT: |
| case NANOARROW_TYPE_FLOAT: |
| case NANOARROW_TYPE_DOUBLE: |
| case NANOARROW_TYPE_DECIMAL128: |
| case NANOARROW_TYPE_DECIMAL256: |
| case NANOARROW_TYPE_INTERVAL_MONTHS: |
| case NANOARROW_TYPE_INTERVAL_DAY_TIME: |
| case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: |
| case NANOARROW_TYPE_FIXED_SIZE_BINARY: |
| case NANOARROW_TYPE_DENSE_UNION: |
| array->n_buffers = 2; |
| break; |
| |
| case NANOARROW_TYPE_STRING: |
| case NANOARROW_TYPE_LARGE_STRING: |
| case NANOARROW_TYPE_BINARY: |
| case NANOARROW_TYPE_LARGE_BINARY: |
| array->n_buffers = 3; |
| break; |
| |
| default: |
| return EINVAL; |
| |
| return NANOARROW_OK; |
| } |
| |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| private_data->storage_type = storage_type; |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, |
| enum ArrowType storage_type) { |
| array->length = 0; |
| array->null_count = 0; |
| array->offset = 0; |
| array->n_buffers = 0; |
| array->n_children = 0; |
| array->buffers = NULL; |
| array->children = NULL; |
| array->dictionary = NULL; |
| array->release = &ArrowArrayRelease; |
| array->private_data = NULL; |
| |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData)); |
| if (private_data == NULL) { |
| array->release = NULL; |
| return ENOMEM; |
| } |
| |
| ArrowBitmapInit(&private_data->bitmap); |
| ArrowBufferInit(&private_data->buffers[0]); |
| ArrowBufferInit(&private_data->buffers[1]); |
| private_data->buffer_data[0] = NULL; |
| private_data->buffer_data[1] = NULL; |
| private_data->buffer_data[2] = NULL; |
| |
| array->private_data = private_data; |
| array->buffers = (const void**)(&private_data->buffer_data); |
| |
| int result = ArrowArraySetStorageType(array, storage_type); |
| if (result != NANOARROW_OK) { |
| array->release(array); |
| return result; |
| } |
| |
| ArrowLayoutInit(&private_data->layout, storage_type); |
| // We can only know this not to be true when initializing based on a schema |
| // so assume this to be true. |
| private_data->union_type_id_is_child_index = 1; |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array, |
| struct ArrowArrayView* array_view, |
| struct ArrowError* error) { |
| NANOARROW_RETURN_NOT_OK_WITH_ERROR( |
| ArrowArrayInitFromType(array, array_view->storage_type), error); |
| int result; |
| |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| private_data->layout = array_view->layout; |
| |
| if (array_view->n_children > 0) { |
| result = ArrowArrayAllocateChildren(array, array_view->n_children); |
| if (result != NANOARROW_OK) { |
| array->release(array); |
| return result; |
| } |
| |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| result = |
| ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error); |
| if (result != NANOARROW_OK) { |
| array->release(array); |
| return result; |
| } |
| } |
| } |
| |
| if (array_view->dictionary != NULL) { |
| result = ArrowArrayAllocateDictionary(array); |
| if (result != NANOARROW_OK) { |
| array->release(array); |
| return result; |
| } |
| |
| result = |
| ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error); |
| if (result != NANOARROW_OK) { |
| array->release(array); |
| return result; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, |
| struct ArrowSchema* schema, |
| struct ArrowError* error) { |
| struct ArrowArrayView array_view; |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error)); |
| NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error)); |
| if (array_view.storage_type == NANOARROW_TYPE_DENSE_UNION || |
| array_view.storage_type == NANOARROW_TYPE_SPARSE_UNION) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| // We can still build arrays if this isn't true; however, the append |
| // functions won't work. Instead, we store this value and error only |
| // when StartAppending is called. |
| private_data->union_type_id_is_child_index = |
| _ArrowUnionTypeIdsWillEqualChildIndices(schema->format + 4, schema->n_children); |
| } |
| |
| ArrowArrayViewReset(&array_view); |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) { |
| if (array->children != NULL) { |
| return EINVAL; |
| } |
| |
| if (n_children == 0) { |
| return NANOARROW_OK; |
| } |
| |
| array->children = |
| (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*)); |
| if (array->children == NULL) { |
| return ENOMEM; |
| } |
| |
| memset(array->children, 0, n_children * sizeof(struct ArrowArray*)); |
| |
| for (int64_t i = 0; i < n_children; i++) { |
| array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); |
| if (array->children[i] == NULL) { |
| return ENOMEM; |
| } |
| array->children[i]->release = NULL; |
| } |
| |
| array->n_children = n_children; |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) { |
| if (array->dictionary != NULL) { |
| return EINVAL; |
| } |
| |
| array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); |
| if (array->dictionary == NULL) { |
| return ENOMEM; |
| } |
| |
| array->dictionary->release = NULL; |
| return NANOARROW_OK; |
| } |
| |
| void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer); |
| private_data->bitmap.size_bits = bitmap->size_bits; |
| bitmap->size_bits = 0; |
| private_data->buffer_data[0] = private_data->bitmap.buffer.data; |
| array->null_count = -1; |
| } |
| |
| ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, |
| struct ArrowBuffer* buffer) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| |
| switch (i) { |
| case 0: |
| ArrowBufferMove(buffer, &private_data->bitmap.buffer); |
| private_data->buffer_data[i] = private_data->bitmap.buffer.data; |
| break; |
| case 1: |
| case 2: |
| ArrowBufferMove(buffer, &private_data->buffers[i - 1]); |
| private_data->buffer_data[i] = private_data->buffers[i - 1].data; |
| break; |
| default: |
| return EINVAL; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view, |
| struct ArrowArray* array) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| |
| ArrowArrayViewInitFromType(array_view, private_data->storage_type); |
| array_view->layout = private_data->layout; |
| array_view->array = array; |
| array_view->length = array->length; |
| array_view->offset = array->offset; |
| array_view->null_count = array->null_count; |
| |
| array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data; |
| array_view->buffer_views[0].size_bytes = private_data->bitmap.buffer.size_bytes; |
| array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data; |
| array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes; |
| array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data; |
| array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes; |
| |
| int result = ArrowArrayViewAllocateChildren(array_view, array->n_children); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| |
| for (int64_t i = 0; i < array->n_children; i++) { |
| result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| } |
| |
| if (array->dictionary != NULL) { |
| result = ArrowArrayViewAllocateDictionary(array_view); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| |
| result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array, |
| struct ArrowArrayView* array_view) { |
| // Loop through buffers and reserve the extra space that we know about |
| for (int64_t i = 0; i < array->n_buffers; i++) { |
| // Don't reserve on a validity buffer that hasn't been allocated yet |
| if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY && |
| ArrowArrayBuffer(array, i)->data == NULL) { |
| continue; |
| } |
| |
| int64_t additional_size_bytes = |
| array_view->buffer_views[i].size_bytes - ArrowArrayBuffer(array, i)->size_bytes; |
| |
| if (additional_size_bytes > 0) { |
| NANOARROW_RETURN_NOT_OK( |
| ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes)); |
| } |
| } |
| |
| // Recursively reserve children |
| for (int64_t i = 0; i < array->n_children; i++) { |
| NANOARROW_RETURN_NOT_OK( |
| ArrowArrayReserveInternal(array->children[i], array_view->children[i])); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, |
| int64_t additional_size_elements) { |
| struct ArrowArrayView array_view; |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array)); |
| |
| // Calculate theoretical buffer sizes (recursively) |
| ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements); |
| |
| // Walk the structure (recursively) |
| int result = ArrowArrayReserveInternal(array, &array_view); |
| ArrowArrayViewReset(&array_view); |
| if (result != NANOARROW_OK) { |
| return result; |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| |
| // The only buffer finalizing this currently does is make sure the data |
| // buffer for (Large)String|Binary is never NULL |
| switch (private_data->storage_type) { |
| case NANOARROW_TYPE_BINARY: |
| case NANOARROW_TYPE_STRING: |
| case NANOARROW_TYPE_LARGE_BINARY: |
| case NANOARROW_TYPE_LARGE_STRING: |
| if (ArrowArrayBuffer(array, 2)->data == NULL) { |
| ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0); |
| } |
| break; |
| default: |
| break; |
| } |
| |
| for (int64_t i = 0; i < array->n_children; i++) { |
| NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i])); |
| } |
| |
| if (array->dictionary != NULL) { |
| NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary)); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) { |
| struct ArrowArrayPrivateData* private_data = |
| (struct ArrowArrayPrivateData*)array->private_data; |
| |
| for (int64_t i = 0; i < 3; i++) { |
| private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data; |
| } |
| |
| for (int64_t i = 0; i < array->n_children; i++) { |
| ArrowArrayFlushInternalPointers(array->children[i]); |
| } |
| |
| if (array->dictionary != NULL) { |
| ArrowArrayFlushInternalPointers(array->dictionary); |
| } |
| } |
| |
| ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, |
| enum ArrowValidationLevel validation_level, |
| struct ArrowError* error) { |
| // Even if the data buffer is size zero, the pointer value needed to be non-null |
| // in some implementations (at least one version of Arrow C++ at the time this |
| // was added). Only do this fix if we can assume CPU data access. |
| if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) { |
| NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error); |
| } |
| |
| // Make sure the value we get with array->buffers[i] is set to the actual |
| // pointer (which may have changed from the original due to reallocation) |
| ArrowArrayFlushInternalPointers(array); |
| |
| if (validation_level == NANOARROW_VALIDATION_LEVEL_NONE) { |
| return NANOARROW_OK; |
| } |
| |
| // For validation, initialize an ArrowArrayView with our known buffer sizes |
| struct ArrowArrayView array_view; |
| NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view, array), |
| error); |
| int result = ArrowArrayViewValidate(&array_view, validation_level, error); |
| ArrowArrayViewReset(&array_view); |
| return result; |
| } |
| |
| ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, |
| struct ArrowError* error) { |
| return ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_DEFAULT, error); |
| } |
| |
| void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, |
| enum ArrowType storage_type) { |
| memset(array_view, 0, sizeof(struct ArrowArrayView)); |
| array_view->storage_type = storage_type; |
| ArrowLayoutInit(&array_view->layout, storage_type); |
| } |
| |
| ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, |
| int64_t n_children) { |
| if (array_view->children != NULL) { |
| return EINVAL; |
| } |
| |
| array_view->children = |
| (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*)); |
| if (array_view->children == NULL) { |
| return ENOMEM; |
| } |
| |
| for (int64_t i = 0; i < n_children; i++) { |
| array_view->children[i] = NULL; |
| } |
| |
| array_view->n_children = n_children; |
| |
| for (int64_t i = 0; i < n_children; i++) { |
| array_view->children[i] = |
| (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); |
| if (array_view->children[i] == NULL) { |
| return ENOMEM; |
| } |
| ArrowArrayViewInitFromType(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) { |
| if (array_view->dictionary != NULL) { |
| return EINVAL; |
| } |
| |
| array_view->dictionary = |
| (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView)); |
| if (array_view->dictionary == NULL) { |
| return ENOMEM; |
| } |
| |
| ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED); |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, |
| struct ArrowSchema* schema, |
| struct ArrowError* error) { |
| struct ArrowSchemaView schema_view; |
| int result = ArrowSchemaViewInit(&schema_view, schema, error); |
| if (result != NANOARROW_OK) { |
| return result; |
| } |
| |
| ArrowArrayViewInitFromType(array_view, schema_view.storage_type); |
| array_view->layout = schema_view.layout; |
| |
| result = ArrowArrayViewAllocateChildren(array_view, schema->n_children); |
| if (result != NANOARROW_OK) { |
| ArrowErrorSet(error, "ArrowArrayViewAllocateChildren() failed"); |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| |
| for (int64_t i = 0; i < schema->n_children; i++) { |
| result = |
| ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| } |
| |
| if (schema->dictionary != NULL) { |
| result = ArrowArrayViewAllocateDictionary(array_view); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| |
| result = |
| ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error); |
| if (result != NANOARROW_OK) { |
| ArrowArrayViewReset(array_view); |
| return result; |
| } |
| } |
| |
| if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION || |
| array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) { |
| array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t)); |
| if (array_view->union_type_id_map == NULL) { |
| return ENOMEM; |
| } |
| |
| memset(array_view->union_type_id_map, -1, 256); |
| int8_t n_type_ids = _ArrowParseUnionTypeIds(schema_view.union_type_ids, |
| array_view->union_type_id_map + 128); |
| for (int8_t child_index = 0; child_index < n_type_ids; child_index++) { |
| int8_t type_id = array_view->union_type_id_map[128 + child_index]; |
| array_view->union_type_id_map[type_id] = child_index; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| void ArrowArrayViewReset(struct ArrowArrayView* array_view) { |
| if (array_view->children != NULL) { |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| if (array_view->children[i] != NULL) { |
| ArrowArrayViewReset(array_view->children[i]); |
| ArrowFree(array_view->children[i]); |
| } |
| } |
| |
| ArrowFree(array_view->children); |
| } |
| |
| if (array_view->dictionary != NULL) { |
| ArrowArrayViewReset(array_view->dictionary); |
| ArrowFree(array_view->dictionary); |
| } |
| |
| if (array_view->union_type_id_map != NULL) { |
| ArrowFree(array_view->union_type_id_map); |
| } |
| |
| ArrowArrayViewInitFromType(array_view, NANOARROW_TYPE_UNINITIALIZED); |
| } |
| |
| void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) { |
| for (int i = 0; i < 3; i++) { |
| int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; |
| |
| switch (array_view->layout.buffer_type[i]) { |
| case NANOARROW_BUFFER_TYPE_VALIDITY: |
| array_view->buffer_views[i].size_bytes = _ArrowBytesForBits(length); |
| continue; |
| case NANOARROW_BUFFER_TYPE_DATA_OFFSET: |
| // Probably don't want/need to rely on the producer to have allocated an |
| // offsets buffer of length 1 for a zero-size array |
| array_view->buffer_views[i].size_bytes = |
| (length != 0) * element_size_bytes * (length + 1); |
| continue; |
| case NANOARROW_BUFFER_TYPE_DATA: |
| array_view->buffer_views[i].size_bytes = |
| _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) / |
| 8; |
| continue; |
| case NANOARROW_BUFFER_TYPE_TYPE_ID: |
| case NANOARROW_BUFFER_TYPE_UNION_OFFSET: |
| array_view->buffer_views[i].size_bytes = element_size_bytes * length; |
| continue; |
| case NANOARROW_BUFFER_TYPE_NONE: |
| array_view->buffer_views[i].size_bytes = 0; |
| continue; |
| } |
| } |
| |
| switch (array_view->storage_type) { |
| case NANOARROW_TYPE_STRUCT: |
| case NANOARROW_TYPE_SPARSE_UNION: |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| ArrowArrayViewSetLength(array_view->children[i], length); |
| } |
| break; |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| if (array_view->n_children >= 1) { |
| ArrowArrayViewSetLength(array_view->children[0], |
| length * array_view->layout.child_size_elements); |
| } |
| default: |
| break; |
| } |
| } |
| |
| // This version recursively extracts information from the array and stores it |
| // in the array view, performing any checks that require the original array. |
| static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view, |
| struct ArrowArray* array, |
| struct ArrowError* error) { |
| // Check length and offset |
| if (array->offset < 0) { |
| ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of %ld", |
| (long)array->offset); |
| return EINVAL; |
| } |
| |
| if (array->length < 0) { |
| ArrowErrorSet(error, "Expected array length >= 0 but found array length of %ld", |
| (long)array->length); |
| return EINVAL; |
| } |
| |
| array_view->array = array; |
| array_view->offset = array->offset; |
| array_view->length = array->length; |
| array_view->null_count = array->null_count; |
| |
| int64_t buffers_required = 0; |
| for (int i = 0; i < 3; i++) { |
| if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { |
| break; |
| } |
| |
| buffers_required++; |
| |
| // Set buffer pointer |
| array_view->buffer_views[i].data.data = array->buffers[i]; |
| |
| // If non-null, set buffer size to unknown. |
| if (array->buffers[i] == NULL) { |
| array_view->buffer_views[i].size_bytes = 0; |
| } else { |
| array_view->buffer_views[i].size_bytes = -1; |
| } |
| } |
| |
| // Check the number of buffers |
| if (buffers_required != array->n_buffers) { |
| ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)", |
| (int)buffers_required, (int)array->n_buffers); |
| return EINVAL; |
| } |
| |
| // Check number of children |
| if (array_view->n_children != array->n_children) { |
| ArrowErrorSet(error, "Expected %ld children but found %ld children", |
| (long)array_view->n_children, (long)array->n_children); |
| return EINVAL; |
| } |
| |
| // Recurse for children |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i], |
| array->children[i], error)); |
| } |
| |
| // Check dictionary |
| if (array->dictionary == NULL && array_view->dictionary != NULL) { |
| ArrowErrorSet(error, "Expected dictionary but found NULL"); |
| return EINVAL; |
| } |
| |
| if (array->dictionary != NULL && array_view->dictionary == NULL) { |
| ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member"); |
| return EINVAL; |
| } |
| |
| if (array->dictionary != NULL) { |
| NANOARROW_RETURN_NOT_OK( |
| ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error)); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view, |
| struct ArrowError* error) { |
| // Calculate buffer sizes that do not require buffer access. If marked as |
| // unknown, assign the buffer size; otherwise, validate it. |
| int64_t offset_plus_length = array_view->offset + array_view->length; |
| |
| // Only loop over the first two buffers because the size of the third buffer |
| // is always data dependent for all current Arrow types. |
| for (int i = 0; i < 2; i++) { |
| int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8; |
| // Initialize with a value that will cause an error if accidentally used uninitialized |
| int64_t min_buffer_size_bytes = array_view->buffer_views[i].size_bytes + 1; |
| |
| switch (array_view->layout.buffer_type[i]) { |
| case NANOARROW_BUFFER_TYPE_VALIDITY: |
| if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) { |
| continue; |
| } |
| |
| min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length); |
| break; |
| case NANOARROW_BUFFER_TYPE_DATA_OFFSET: |
| // Probably don't want/need to rely on the producer to have allocated an |
| // offsets buffer of length 1 for a zero-size array |
| min_buffer_size_bytes = |
| (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1); |
| break; |
| case NANOARROW_BUFFER_TYPE_DATA: |
| min_buffer_size_bytes = |
| _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * |
| offset_plus_length) / |
| 8; |
| break; |
| case NANOARROW_BUFFER_TYPE_TYPE_ID: |
| case NANOARROW_BUFFER_TYPE_UNION_OFFSET: |
| min_buffer_size_bytes = element_size_bytes * offset_plus_length; |
| break; |
| case NANOARROW_BUFFER_TYPE_NONE: |
| continue; |
| } |
| |
| // Assign or validate buffer size |
| if (array_view->buffer_views[i].size_bytes == -1) { |
| array_view->buffer_views[i].size_bytes = min_buffer_size_bytes; |
| } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) { |
| ArrowErrorSet(error, |
| "Expected %s array buffer %d to have size >= %ld bytes but found " |
| "buffer with %ld bytes", |
| ArrowTypeString(array_view->storage_type), (int)i, |
| (long)min_buffer_size_bytes, |
| (long)array_view->buffer_views[i].size_bytes); |
| return EINVAL; |
| } |
| } |
| |
| // For list, fixed-size list and map views, we can validate the number of children |
| switch (array_view->storage_type) { |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_LARGE_LIST: |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| case NANOARROW_TYPE_MAP: |
| if (array_view->n_children != 1) { |
| ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child arrays", |
| ArrowTypeString(array_view->storage_type), |
| (long)array_view->n_children); |
| return EINVAL; |
| } |
| default: |
| break; |
| } |
| |
| // For struct, the sparse union, and the fixed-size list views, we can validate child |
| // lengths. |
| int64_t child_min_length; |
| switch (array_view->storage_type) { |
| case NANOARROW_TYPE_SPARSE_UNION: |
| case NANOARROW_TYPE_STRUCT: |
| child_min_length = (array_view->offset + array_view->length); |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| if (array_view->children[i]->length < child_min_length) { |
| ArrowErrorSet( |
| error, |
| "Expected struct child %d to have length >= %ld but found child with " |
| "length %ld", |
| (int)(i + 1), (long)(child_min_length), |
| (long)array_view->children[i]->length); |
| return EINVAL; |
| } |
| } |
| break; |
| |
| case NANOARROW_TYPE_FIXED_SIZE_LIST: |
| child_min_length = (array_view->offset + array_view->length) * |
| array_view->layout.child_size_elements; |
| if (array_view->children[0]->length < child_min_length) { |
| ArrowErrorSet(error, |
| "Expected child of fixed_size_list array to have length >= %ld but " |
| "found array with length %ld", |
| (long)child_min_length, (long)array_view->children[0]->length); |
| return EINVAL; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| // Recurse for children |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| NANOARROW_RETURN_NOT_OK( |
| ArrowArrayViewValidateMinimal(array_view->children[i], error)); |
| } |
| |
| // Recurse for dictionary |
| if (array_view->dictionary != NULL) { |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error)); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view, |
| struct ArrowError* error) { |
| // Perform minimal validation. This will validate or assign |
| // buffer sizes as long as buffer access is not required. |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); |
| |
| // Calculate buffer sizes or child lengths that require accessing the offsets |
| // buffer. Where appropriate, validate that the first offset is >= 0. |
| // If a buffer size is marked as unknown, assign it; otherwise, validate it. |
| int64_t offset_plus_length = array_view->offset + array_view->length; |
| |
| int64_t first_offset; |
| int64_t last_offset; |
| switch (array_view->storage_type) { |
| case NANOARROW_TYPE_STRING: |
| case NANOARROW_TYPE_BINARY: |
| if (array_view->buffer_views[1].size_bytes != 0) { |
| first_offset = array_view->buffer_views[1].data.as_int32[0]; |
| if (first_offset < 0) { |
| ArrowErrorSet(error, "Expected first offset >= 0 but found %ld", |
| (long)first_offset); |
| return EINVAL; |
| } |
| |
| last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; |
| |
| // If the data buffer size is unknown, assign it; otherwise, check it |
| if (array_view->buffer_views[2].size_bytes == -1) { |
| array_view->buffer_views[2].size_bytes = last_offset; |
| } else if (array_view->buffer_views[2].size_bytes < last_offset) { |
| ArrowErrorSet(error, |
| "Expected %s array buffer 2 to have size >= %ld bytes but found " |
| "buffer with %ld bytes", |
| ArrowTypeString(array_view->storage_type), (long)last_offset, |
| (long)array_view->buffer_views[2].size_bytes); |
| return EINVAL; |
| } |
| } |
| break; |
| |
| case NANOARROW_TYPE_LARGE_STRING: |
| case NANOARROW_TYPE_LARGE_BINARY: |
| if (array_view->buffer_views[1].size_bytes != 0) { |
| first_offset = array_view->buffer_views[1].data.as_int64[0]; |
| if (first_offset < 0) { |
| ArrowErrorSet(error, "Expected first offset >= 0 but found %ld", |
| (long)first_offset); |
| return EINVAL; |
| } |
| |
| last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; |
| |
| // If the data buffer size is unknown, assign it; otherwise, check it |
| if (array_view->buffer_views[2].size_bytes == -1) { |
| array_view->buffer_views[2].size_bytes = last_offset; |
| } else if (array_view->buffer_views[2].size_bytes < last_offset) { |
| ArrowErrorSet(error, |
| "Expected %s array buffer 2 to have size >= %ld bytes but found " |
| "buffer with %ld bytes", |
| ArrowTypeString(array_view->storage_type), (long)last_offset, |
| (long)array_view->buffer_views[2].size_bytes); |
| return EINVAL; |
| } |
| } |
| break; |
| |
| case NANOARROW_TYPE_STRUCT: |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| if (array_view->children[i]->length < offset_plus_length) { |
| ArrowErrorSet( |
| error, |
| "Expected struct child %d to have length >= %ld but found child with " |
| "length %ld", |
| (int)(i + 1), (long)offset_plus_length, |
| (long)array_view->children[i]->length); |
| return EINVAL; |
| } |
| } |
| break; |
| |
| case NANOARROW_TYPE_LIST: |
| case NANOARROW_TYPE_MAP: |
| if (array_view->buffer_views[1].size_bytes != 0) { |
| first_offset = array_view->buffer_views[1].data.as_int32[0]; |
| if (first_offset < 0) { |
| ArrowErrorSet(error, "Expected first offset >= 0 but found %ld", |
| (long)first_offset); |
| return EINVAL; |
| } |
| |
| last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length]; |
| if (array_view->children[0]->length < last_offset) { |
| ArrowErrorSet( |
| error, |
| "Expected child of %s array to have length >= %ld but found array with " |
| "length %ld", |
| ArrowTypeString(array_view->storage_type), (long)last_offset, |
| (long)array_view->children[0]->length); |
| return EINVAL; |
| } |
| } |
| break; |
| |
| case NANOARROW_TYPE_LARGE_LIST: |
| if (array_view->buffer_views[1].size_bytes != 0) { |
| first_offset = array_view->buffer_views[1].data.as_int64[0]; |
| if (first_offset < 0) { |
| ArrowErrorSet(error, "Expected first offset >= 0 but found %ld", |
| (long)first_offset); |
| return EINVAL; |
| } |
| |
| last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length]; |
| if (array_view->children[0]->length < last_offset) { |
| ArrowErrorSet( |
| error, |
| "Expected child of large list array to have length >= %ld but found array " |
| "with length %ld", |
| (long)last_offset, (long)array_view->children[0]->length); |
| return EINVAL; |
| } |
| } |
| break; |
| default: |
| break; |
| } |
| |
| // Recurse for children |
| for (int64_t i = 0; i < array_view->n_children; i++) { |
| NANOARROW_RETURN_NOT_OK( |
| ArrowArrayViewValidateDefault(array_view->children[i], error)); |
| } |
| |
| // Recurse for dictionary |
| if (array_view->dictionary != NULL) { |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error)); |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, |
| struct ArrowArray* array, |
| struct ArrowError* error) { |
| // Extract information from the array into the array view |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); |
| |
| // Run default validation. Because we've marked all non-NULL buffers as having unknown |
| // size, validation will also update the buffer sizes as it goes. |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error)); |
| |
| return NANOARROW_OK; |
| } |
| |
| ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view, |
| struct ArrowArray* array, |
| struct ArrowError* error) { |
| // Extract information from the array into the array view |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error)); |
| |
| // Run default validation. Because we've marked all non-NULL buffers as having unknown |
| // size, validation will also update the buffer sizes as it goes. |
| NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error)); |
| |
| return NANOARROW_OK; |
| } |
| |
| static int ArrowAssertIncreasingInt32(struct ArrowBufferView view, |
| struct ArrowError* error) { |
| if (view.size_bytes <= (int64_t)sizeof(int32_t)) { |
| return NANOARROW_OK; |
| } |
| |
| for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) { |
| if (view.data.as_int32[i] < view.data.as_int32[i - 1]) { |
| ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i); |
| return EINVAL; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static int ArrowAssertIncreasingInt64(struct ArrowBufferView view, |
| struct ArrowError* error) { |
| if (view.size_bytes <= (int64_t)sizeof(int64_t)) { |
| return NANOARROW_OK; |
| } |
| |
| for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) { |
| if (view.data.as_int64[i] < view.data.as_int64[i - 1]) { |
| ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i); |
| return EINVAL; |
| } |
| } |
| |
| return NANOARROW_OK; |
| } |
| |
| static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value, |
| int8_t max_value, struct ArrowError* error) { |
| for (int64_t i = 0; i < view.size_bytes; i++) { |
| if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) { |
| ArrowErrorSet(error, |
| "[%ld] Expected buffer value between %d and %d but found value %d", |
| (long)i, |