blob: c1deaec72857daea6c42457d976ac4d0578a8024 [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: language_level = 3
from libc.stdint cimport int32_t, int64_t, uintptr_t
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString, PyBytes_Size
from cpython.pycapsule cimport PyCapsule_GetPointer
from nanoarrow_c cimport (
ARROW_FLAG_DICTIONARY_ORDERED,
ARROW_FLAG_MAP_KEYS_SORTED,
ARROW_FLAG_NULLABLE,
ArrowFree,
ArrowLayout,
ArrowMalloc,
ArrowMetadataBuilderAppend,
ArrowMetadataBuilderInit,
ArrowMetadataReaderInit,
ArrowMetadataReaderRead,
ArrowSchema,
ArrowSchemaAllocateChildren,
ArrowSchemaAllocateDictionary,
ArrowSchemaDeepCopy,
ArrowSchemaInit,
ArrowSchemaMove,
ArrowSchemaRelease,
ArrowSchemaSetMetadata,
ArrowSchemaSetType,
ArrowSchemaSetTypeDateTime,
ArrowSchemaSetTypeDecimal,
ArrowSchemaSetTypeFixedSize,
ArrowSchemaSetFormat,
ArrowSchemaSetName,
ArrowSchemaToString,
ArrowSchemaViewInit,
ArrowStringView,
ArrowTimeUnit,
ArrowTimeUnitString,
ArrowType,
ArrowTypeString,
NANOARROW_BUFFER_TYPE_NONE,
NANOARROW_MAX_FIXED_BUFFERS,
NANOARROW_TIME_UNIT_SECOND,
NANOARROW_TIME_UNIT_MILLI,
NANOARROW_TIME_UNIT_MICRO,
NANOARROW_TIME_UNIT_NANO,
)
from nanoarrow cimport _types
from nanoarrow._buffer cimport CBuffer
from nanoarrow._utils cimport alloc_c_schema, Error
from typing import Iterable, List, Mapping, Tuple, Union
from nanoarrow import _repr_utils
# This is likely a better fit for a dedicated testing module; however, we need
# it here to produce nice error messages when ensuring that one or
# more arrays conform to a given or inferred schema.
cpdef assert_type_equal(actual, expected, bint check_nullability):
"""Test two schemas for data type equality
Checks two CSchema objects for type equality (i.e., that an array with
schema ``actual`` contains elements with the same logical meaning as and
array with schema ``expected``). Notably, this excludes metadata from
all nodes in the schema.
Parameters
----------
actual : CSchema
The schema to be tested for equality
expected : CSchema
The schema against which to test
check_nullability : bool
If True, actual and expected will be considered equal if their
data type information and marked nullability are identical.
"""
if not isinstance(actual, CSchema):
raise TypeError(f"actual is {type(actual).__name__}, not CSchema")
if not isinstance(expected, CSchema):
raise TypeError(f"expected is {type(expected).__name__}, not CSchema")
if not actual.type_equals(expected, check_nullability=check_nullability):
actual_label = actual._to_string(max_chars=80, recursive=True)
expected_label = expected._to_string(max_chars=80, recursive=True)
raise ValueError(
f"Expected schema\n '{expected_label}'"
f"\nbut got\n '{actual_label}'"
)
cdef class CArrowTimeUnit:
"""
Wrapper around ArrowTimeUnit to provide implementations in Python access
to the values.
"""
SECOND = NANOARROW_TIME_UNIT_SECOND
MILLI = NANOARROW_TIME_UNIT_MILLI
MICRO = NANOARROW_TIME_UNIT_MICRO
NANO = NANOARROW_TIME_UNIT_NANO
cdef class CLayout:
"""Abstract buffer information for Arrow types
Provides accessors for buffer counts, types, and attributes.
"""
def __cinit__(self, base, uintptr_t ptr):
self._base = base
self._layout = <ArrowLayout*>ptr
self._n_buffers = NANOARROW_MAX_FIXED_BUFFERS
for i in range(NANOARROW_MAX_FIXED_BUFFERS):
if self._layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE:
self._n_buffers = i
break
@property
def n_buffers(self) -> int:
return self._n_buffers
@property
def buffer_data_type_id(self) -> int:
return tuple(self._layout.buffer_data_type[i] for i in range(self._n_buffers))
@property
def element_size_bits(self) -> int:
return tuple(self._layout.element_size_bits[i] for i in range(self._n_buffers))
@property
def child_size_elements(self) -> int:
return self._layout.child_size_elements
cdef class SchemaMetadata:
"""Dictionary-like wrapper around a lazily-parsed CSchema.metadata string
The Arrow C Data interface encodes key/value metadata as a bytes-to-bytes
mapping using a specific packed binary encoding. This class maintains a
reference to the underlying storage and parses it as required. Note that
unlike a Python dictionary, ``SchemaMetadata`` can contain duplicate
keys.
"""
def __cinit__(self, object base, uintptr_t ptr):
self._base = base
self._metadata = <const char*>ptr
@staticmethod
def empty():
"""Create an empty SchemaMetadata with no keys or values"""
return SchemaMetadata(None, 0)
cdef _init_reader(self):
cdef int code = ArrowMetadataReaderInit(&self._reader, self._metadata)
Error.raise_error_not_ok("ArrowMetadataReaderInit()", code)
def __len__(self):
self._init_reader()
return self._reader.remaining_keys
def __contains__(self, item):
for key, _ in self.items():
if item == key:
return True
return False
def __getitem__(self, k) -> bytes:
"""Get the value associated with a unique key
Retrieves the unique value associated with k. Raises KeyError if
k does not point to exactly one value in the metadata.
"""
out = None
for key, value in self.items():
if k == key:
if out is None:
out = value
else:
raise KeyError(f"key {k} matches more than one value in metadata")
if out is None:
raise KeyError(f"Key {k} not found")
return out
def __iter__(self):
for key, _ in self.items():
yield key
def keys(self) -> List[bytes]:
"""List meadata keys
The result may contain duplicate keys if they exist in the metadata.
"""
return list(self)
def values(self) -> List[bytes]:
"""List metadata values"""
return [value for _, value in self.items()]
def items(self) -> Iterable[bytes, bytes]:
"""Iterate over key/value pairs
The result may contain duplicate keys if they exist in the metadata."""
cdef ArrowStringView key
cdef ArrowStringView value
self._init_reader()
while self._reader.remaining_keys > 0:
ArrowMetadataReaderRead(&self._reader, &key, &value)
key_obj = PyBytes_FromStringAndSize(key.data, key.size_bytes)
value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
yield key_obj, value_obj
def __repr__(self) -> str:
lines = [
f"<{_repr_utils.make_class_label(self)}>",
_repr_utils.metadata_repr(self)
]
return "\n".join(lines)
cdef class CSchema:
"""Low-level ArrowSchema wrapper
This object is a literal wrapper around a read-only ArrowSchema. It provides field accessors
that return Python objects and handles the C Data interface lifecycle (i.e., initialized
ArrowSchema structures are always released).
See ``nanoarrow.c_schema()`` for construction and usage examples.
"""
@staticmethod
def allocate() -> CSchema:
"""Allocate a released CSchema"""
cdef ArrowSchema* c_schema_out
base = alloc_c_schema(&c_schema_out)
return CSchema(base, <uintptr_t>(c_schema_out))
def __cinit__(self, object base, uintptr_t addr):
self._base = base
self._ptr = <ArrowSchema*>addr
def __deepcopy__(self, memo=None) -> CSchema:
cdef CSchema out = CSchema.allocate()
cdef int code = ArrowSchemaDeepCopy(self._ptr, out._ptr)
Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)
return out
@staticmethod
def _import_from_c_capsule(schema_capsule) -> CSchema:
"""Import from a ArrowSchema PyCapsule
Parameters
----------
schema_capsule : PyCapsule
A valid PyCapsule with name 'arrow_schema' containing an
ArrowSchema pointer.
"""
return CSchema(
schema_capsule,
<uintptr_t>PyCapsule_GetPointer(schema_capsule, "arrow_schema")
)
def __arrow_c_schema__(self):
"""
Export to a ArrowSchema PyCapsule
"""
self._assert_valid()
cdef ArrowSchema* c_schema_out
schema_capsule = alloc_c_schema(&c_schema_out)
cdef int code = ArrowSchemaDeepCopy(self._ptr, c_schema_out)
Error.raise_error_not_ok("ArrowSchemaDeepCopy", code)
return schema_capsule
@property
def _capsule(self):
"""
Returns the capsule backing this CSchema or None if it does not exist
or points to a parent ArrowSchema.
"""
cdef ArrowSchema* maybe_capsule_ptr
maybe_capsule_ptr = <ArrowSchema*>PyCapsule_GetPointer(self._base, 'arrow_schema')
# This will return False if this is a child CSchema whose capsule holds
# the parent ArrowSchema
if maybe_capsule_ptr == self._ptr:
return self._base
return None
def _addr(self) -> int:
return <uintptr_t>self._ptr
def is_valid(self) -> bool:
"""Check for a non-null and non-released underlying ArrowSchema"""
return self._ptr != NULL and self._ptr.release != NULL
def _assert_valid(self):
if self._ptr == NULL:
raise RuntimeError("schema is NULL")
if self._ptr.release == NULL:
raise RuntimeError("schema is released")
def _to_string(self, int64_t max_chars=0, recursive=False) -> str:
cdef int64_t n_chars
if max_chars == 0:
n_chars = ArrowSchemaToString(self._ptr, NULL, 0, recursive)
else:
n_chars = max_chars
cdef char* out = <char*>ArrowMalloc(n_chars + 1)
if not out:
raise MemoryError()
ArrowSchemaToString(self._ptr, out, n_chars + 1, recursive)
out_str = out.decode("UTF-8")
ArrowFree(out)
return out_str
def __repr__(self) -> str:
return _repr_utils.schema_repr(self)
def type_equals(self, CSchema other, check_nullability: bool=False) -> bool:
"""Test two schemas for data type equality
Checks two CSchema objects for type equality (i.e., that an array with
schema ``actual`` contains elements with the same logical meaning as and
array with schema ``expected``). Notably, this excludes metadata from
all nodes in the schema.
Parameters
----------
other : CSchema
The schema against which to test
check_nullability : bool
If True, actual and expected will be considered equal if their
data type information and marked nullability are identical.
"""
self._assert_valid()
if self._ptr == other._ptr:
return True
if self.format != other.format:
return False
# Nullability is not strictly part of the "type"; however, performing
# this check recursively is verbose to otherwise accomplish and
# sometimes this does matter.
cdef int64_t flags = self.flags
cdef int64_t other_flags = other.flags
if not check_nullability:
flags &= ~ARROW_FLAG_NULLABLE
other_flags &= ~ARROW_FLAG_NULLABLE
if flags != other_flags:
return False
if self.n_children != other.n_children:
return False
for child, other_child in zip(self.children, other.children):
if not child.type_equals(other_child, check_nullability=check_nullability):
return False
if (self.dictionary is None) != (other.dictionary is None):
return False
if self.dictionary is not None:
if not self.dictionary.type_equals(
other.dictionary,
check_nullability=check_nullability
):
return False
return True
@property
def format(self) -> str:
self._assert_valid()
if self._ptr.format != NULL:
return self._ptr.format.decode()
@property
def name(self) -> Union[str, None]:
self._assert_valid()
if self._ptr.name != NULL:
return self._ptr.name.decode()
else:
return None
@property
def flags(self) -> int:
return self._ptr.flags
@property
def metadata(self) -> SchemaMetadata:
self._assert_valid()
if self._ptr.metadata != NULL:
return SchemaMetadata(self._base, <uintptr_t>self._ptr.metadata)
else:
return None
@property
def n_children(self) -> int:
self._assert_valid()
return self._ptr.n_children
def child(self, int64_t i):
self._assert_valid()
if i < 0 or i >= self._ptr.n_children:
raise IndexError(f"{i} out of range [0, {self._ptr.n_children})")
return CSchema(self._base, <uintptr_t>self._ptr.children[i])
@property
def children(self) -> Iterable[CSchema]:
for i in range(self.n_children):
yield self.child(i)
@property
def dictionary(self) -> Union[CSchema, None]:
self._assert_valid()
if self._ptr.dictionary != NULL:
return CSchema(self, <uintptr_t>self._ptr.dictionary)
else:
return None
def modify(self, *, format=None, name=None, flags=None, nullable=None,
metadata=None, children=None, dictionary=None, validate=True) -> CSchema:
cdef CSchemaBuilder builder = CSchemaBuilder.allocate()
if format is None:
builder.set_format(self.format)
else:
builder.set_format(format)
if name is None:
builder.set_name(self.name)
elif name is not False:
builder.set_name(name)
if flags is None:
builder.set_flags(self.flags)
else:
builder.set_flags(flags)
if nullable is not None:
builder.set_nullable(nullable)
if metadata is None:
if self.metadata is not None:
builder.append_metadata(self.metadata)
else:
builder.append_metadata(metadata)
if children is None:
if self.n_children > 0:
builder.allocate_children(self.n_children)
for i, child in enumerate(self.children):
builder.set_child(i, None, child)
elif hasattr(children, "items"):
builder.allocate_children(len(children))
for i, item in enumerate(children.items()):
name, child = item
builder.set_child(i, name, child)
else:
builder.allocate_children(len(children))
for i, child in enumerate(children):
builder.set_child(i, None, child)
if dictionary is None:
if self.dictionary:
builder.set_dictionary(self.dictionary)
elif dictionary is not False:
builder.set_dictionary(dictionary)
if validate:
builder.validate()
return builder.finish()
cdef class CSchemaView:
"""Low-level ArrowSchemaView wrapper
This object is a literal wrapper around a read-only ArrowSchemaView. It provides field accessors
that return Python objects and handles structure lifecycle. Compared to an ArrowSchema,
the nanoarrow ArrowSchemaView facilitates access to the deserialized content of an ArrowSchema
(e.g., parameter values for parameterized types).
See `nanoarrow.c_schema_view()` for construction and usage examples.
"""
def __cinit__(self, CSchema schema):
self._base = schema
self._schema_view.type = <ArrowType>_types.UNINITIALIZED
self._schema_view.storage_type = <ArrowType>_types.UNINITIALIZED
cdef Error error = Error()
cdef int code = ArrowSchemaViewInit(&self._schema_view, schema._ptr, &error.c_error)
error.raise_message_not_ok("ArrowSchemaViewInit()", code)
self._dictionary_ordered = schema._ptr.flags & ARROW_FLAG_DICTIONARY_ORDERED
self._nullable = schema._ptr.flags & ARROW_FLAG_NULLABLE
self._map_keys_sorted = schema._ptr.flags & ARROW_FLAG_MAP_KEYS_SORTED
@property
def layout(self) -> CLayout:
return CLayout(self, <uintptr_t>&self._schema_view.layout)
@property
def type_id(self) -> int:
return self._schema_view.type
@property
def storage_type_id(self) -> int:
return self._schema_view.storage_type
@property
def storage_buffer_format(self) -> Union[str, None]:
if self.buffer_format is not None:
return self.buffer_format
elif _types.equal(self._schema_view.type, _types.DATE32):
return 'i'
elif _types.one_of(
self._schema_view.type,
(_types.TIMESTAMP, _types.DATE64, _types.DURATION)
):
return 'q'
elif self.extension_name:
return self._get_buffer_format()
else:
return None
@property
def buffer_format(self) -> Union[str, None]:
"""The Python struct format representing an element of this type
or None if there is no Python format string that can represent this
type without loosing information.
"""
if self.extension_name:
return None
else:
return self._get_buffer_format()
def _get_buffer_format(self):
if self._schema_view.type != self._schema_view.storage_type:
return None
# String/binary types do not have format strings as far as the Python
# buffer protocol is concerned
if self.layout.n_buffers != 2:
return None
cdef char out[128]
cdef int element_size_bits = 0
if _types.equal(self._schema_view.type, _types.FIXED_SIZE_BINARY):
element_size_bits = self._schema_view.fixed_size * 8
try:
_types.to_format(self._schema_view.type, element_size_bits, sizeof(out), out)
return out.decode()
except ValueError:
return None
@property
def type(self) -> str:
cdef const char* type_str = ArrowTypeString(self._schema_view.type)
if type_str != NULL:
return type_str.decode()
else:
raise ValueError("ArrowTypeString() returned NULL")
@property
def storage_type(self) -> str:
cdef const char* type_str = ArrowTypeString(self._schema_view.storage_type)
if type_str != NULL:
return type_str.decode()
else:
raise ValueError("ArrowTypeString() returned NULL")
@property
def dictionary_ordered(self) -> Union[bool, None]:
if _types.equal(self._schema_view.type, _types.DICTIONARY):
return self._dictionary_ordered != 0
else:
return None
@property
def nullable(self) -> bool:
return self._nullable != 0
@property
def map_keys_sorted(self) -> Union[bool, None]:
if _types.equal(self._schema_view.type, _types.MAP):
return self._map_keys_sorted != 0
else:
return None
@property
def fixed_size(self) -> Union[bool, None]:
if _types.is_fixed_size(self._schema_view.type):
return self._schema_view.fixed_size
else:
return None
@property
def decimal_bitwidth(self) -> Union[int, None]:
if _types.is_decimal(self._schema_view.type):
return self._schema_view.decimal_bitwidth
else:
return None
@property
def decimal_precision(self) -> Union[int, None]:
if _types.is_decimal(self._schema_view.type):
return self._schema_view.decimal_precision
else:
return None
@property
def decimal_scale(self) -> Union[int, None]:
if _types.is_decimal(self._schema_view.type):
return self._schema_view.decimal_scale
else:
return None
@property
def time_unit_id(self) -> Union[int, None]:
if _types.has_time_unit(self._schema_view.type):
return self._schema_view.time_unit
else:
return None
@property
def time_unit(self) -> Union[str, None]:
if _types.has_time_unit(self._schema_view.type):
return ArrowTimeUnitString(self._schema_view.time_unit).decode()
else:
return None
@property
def timezone(self) -> Union[str, None]:
if _types.equal(self._schema_view.type, _types.TIMESTAMP):
return self._schema_view.timezone.decode()
else:
return None
@property
def union_type_ids(self) -> Union[Tuple[int, ...], None]:
if _types.is_union(self._schema_view.type):
type_ids_str = self._schema_view.union_type_ids.decode().split(',')
return (int(type_id) for type_id in type_ids_str)
else:
return None
@property
def extension_name(self) -> Union[str, None]:
if self._schema_view.extension_name.data != NULL:
name_bytes = PyBytes_FromStringAndSize(
self._schema_view.extension_name.data,
self._schema_view.extension_name.size_bytes
)
return name_bytes.decode()
else:
return None
@property
def extension_metadata(self) -> Union[bytes, None]:
if self._schema_view.extension_name.data != NULL:
return PyBytes_FromStringAndSize(
self._schema_view.extension_metadata.data,
self._schema_view.extension_metadata.size_bytes
)
else:
return None
def __repr__(self) -> str:
return _repr_utils.schema_view_repr(self)
cdef class CSchemaBuilder:
"""Helper for constructing an ArrowSchema
The primary function of this class is to wrap the nanoarrow C library calls
that build up the components of an ArrowSchema.
"""
def __cinit__(self, CSchema schema):
self.c_schema = schema
self._ptr = schema._ptr
if self._ptr.release == NULL:
ArrowSchemaInit(self._ptr)
@staticmethod
def allocate() -> CSchemaBuilder:
"""Create a CSchemaBuilder
Allocates memory for an ArrowSchema and populates it with nanoarrow's
ArrowSchema private_data/release callback implementation. This should
usually be followed by :meth:`set_type` or :meth:`set_format`.
"""
return CSchemaBuilder(CSchema.allocate())
def append_metadata(self, metadata: Mapping[bytes, bytes]) -> CSchemaBuilder:
"""Append key/value metadata"""
cdef CBuffer buffer = CBuffer.empty()
cdef const char* existing_metadata = self.c_schema._ptr.metadata
cdef int code = ArrowMetadataBuilderInit(buffer._ptr, existing_metadata)
Error.raise_error_not_ok("ArrowMetadataBuilderInit()", code)
cdef ArrowStringView key
cdef ArrowStringView value
cdef int32_t keys_added = 0
for k, v in metadata.items():
k = k.encode() if isinstance(k, str) else bytes(k)
key.data = PyBytes_AsString(k)
key.size_bytes = PyBytes_Size(k)
v = v.encode() if isinstance(v, str) else bytes(v)
value.data = PyBytes_AsString(v)
value.size_bytes = PyBytes_Size(v)
code = ArrowMetadataBuilderAppend(buffer._ptr, key, value)
Error.raise_error_not_ok("ArrowMetadataBuilderAppend()", code)
keys_added += 1
if keys_added > 0:
code = ArrowSchemaSetMetadata(self.c_schema._ptr, <const char*>buffer._ptr.data)
Error.raise_error_not_ok("ArrowSchemaSetMetadata()", code)
return self
def child(self, int64_t i) -> CSchemaBuilder:
return CSchemaBuilder(self.c_schema.child(i))
def set_type(self, int type_id) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code = ArrowSchemaSetType(self._ptr, <ArrowType>type_id)
Error.raise_error_not_ok("ArrowSchemaSetType()", code)
return self
def set_type_decimal(self, int type_id, int precision, int scale) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code = ArrowSchemaSetTypeDecimal(self._ptr, <ArrowType>type_id, precision, scale)
Error.raise_error_not_ok("ArrowSchemaSetType()", code)
def set_type_fixed_size(self, int type_id, int fixed_size) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code = ArrowSchemaSetTypeFixedSize(self._ptr, <ArrowType>type_id, fixed_size)
Error.raise_error_not_ok("ArrowSchemaSetTypeFixedSize()", code)
return self
def set_type_date_time(self, int type_id, int time_unit, timezone) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code
if timezone is None:
code = ArrowSchemaSetTypeDateTime(self._ptr, <ArrowType>type_id, <ArrowTimeUnit>time_unit, NULL)
else:
timezone = str(timezone)
code = ArrowSchemaSetTypeDateTime(self._ptr, <ArrowType>type_id, <ArrowTimeUnit>time_unit, timezone.encode("UTF-8"))
Error.raise_error_not_ok("ArrowSchemaSetTypeDateTime()", code)
return self
def set_format(self, str format) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code = ArrowSchemaSetFormat(self._ptr, format.encode("UTF-8"))
Error.raise_error_not_ok("ArrowSchemaSetFormat()", code)
return self
def set_name(self, name) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code
if name is None:
code = ArrowSchemaSetName(self._ptr, NULL)
else:
name = str(name)
code = ArrowSchemaSetName(self._ptr, name.encode("UTF-8"))
Error.raise_error_not_ok("ArrowSchemaSetName()", code)
return self
def allocate_children(self, int n) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code = ArrowSchemaAllocateChildren(self._ptr, n)
Error.raise_error_not_ok("ArrowSchemaAllocateChildren()", code)
return self
def set_child(self, int64_t i, name, CSchema child_src) -> CSchemaBuilder:
self.c_schema._assert_valid()
if i < 0 or i >= self._ptr.n_children:
raise IndexError(f"Index out of range: {i}")
if self._ptr.children[i].release != NULL:
ArrowSchemaRelease(self._ptr.children[i])
cdef int code = ArrowSchemaDeepCopy(child_src._ptr, self._ptr.children[i])
Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)
if name is not None:
name = str(name)
code = ArrowSchemaSetName(self._ptr.children[i], name.encode("UTF-8"))
Error.raise_error_not_ok("ArrowSchemaSetName()", code)
return self
def set_dictionary(self, CSchema dictionary) -> CSchemaBuilder:
self.c_schema._assert_valid()
cdef int code
if self._ptr.dictionary == NULL:
code = ArrowSchemaAllocateDictionary(self._ptr)
Error.raise_error_not_ok("ArrowSchemaAllocateDictionary()", code)
if self._ptr.dictionary.release != NULL:
ArrowSchemaRelease(self._ptr.dictionary)
code = ArrowSchemaDeepCopy(dictionary._ptr, self._ptr.dictionary)
Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)
return self
def set_flags(self, flags) -> CSchemaBuilder:
self._ptr.flags = flags
return self
def set_nullable(self, nullable) -> CSchemaBuilder:
if nullable:
self._ptr.flags = self._ptr.flags | ARROW_FLAG_NULLABLE
else:
self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_NULLABLE
return self
def set_dictionary_ordered(self, dictionary_ordered) -> CSchemaBuilder:
if dictionary_ordered:
self._ptr.flags = self._ptr.flags | ARROW_FLAG_DICTIONARY_ORDERED
else:
self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_DICTIONARY_ORDERED
return self
def set_map_keys_sorted(self, map_keys_sorted) -> CSchemaBuilder:
if map_keys_sorted:
self._ptr.flags = self._ptr.flags | ARROW_FLAG_MAP_KEYS_SORTED
else:
self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_MAP_KEYS_SORTED
return self
def validate(self) -> CSchemaView:
return CSchemaView(self.c_schema)
def finish(self) -> CSchema:
self.c_schema._assert_valid()
cdef CSchema out = CSchema.allocate()
ArrowSchemaMove(self.c_schema._ptr, out._ptr)
ArrowSchemaInit(self.c_schema._ptr)
return out