python/src/nanoarrow/_schema.pyx - arrow-nanoarrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 # cython: language_level = 3

 from libc.stdint cimport int32_t, int64_t, uintptr_t
 from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AsString, PyBytes_Size
 from cpython.pycapsule cimport PyCapsule_GetPointer

 from nanoarrow_c cimport (
     ARROW_FLAG_DICTIONARY_ORDERED,
     ARROW_FLAG_MAP_KEYS_SORTED,
     ARROW_FLAG_NULLABLE,
     ArrowFree,
     ArrowLayout,
     ArrowMalloc,
     ArrowMetadataBuilderAppend,
     ArrowMetadataBuilderInit,
     ArrowMetadataReaderInit,
     ArrowMetadataReaderRead,
     ArrowSchema,
     ArrowSchemaAllocateChildren,
     ArrowSchemaAllocateDictionary,
     ArrowSchemaDeepCopy,
     ArrowSchemaInit,
     ArrowSchemaMove,
     ArrowSchemaRelease,
     ArrowSchemaSetMetadata,
     ArrowSchemaSetType,
     ArrowSchemaSetTypeDateTime,
     ArrowSchemaSetTypeDecimal,
     ArrowSchemaSetTypeFixedSize,
     ArrowSchemaSetFormat,
     ArrowSchemaSetName,
     ArrowSchemaToString,
     ArrowSchemaViewInit,
     ArrowStringView,
     ArrowTimeUnit,
     ArrowTimeUnitString,
     ArrowType,
     ArrowTypeString,
     NANOARROW_BUFFER_TYPE_NONE,
     NANOARROW_MAX_FIXED_BUFFERS,
     NANOARROW_TIME_UNIT_SECOND,
     NANOARROW_TIME_UNIT_MILLI,
     NANOARROW_TIME_UNIT_MICRO,
     NANOARROW_TIME_UNIT_NANO,
 )

 from nanoarrow cimport _types
 from nanoarrow._buffer cimport CBuffer
 from nanoarrow._utils cimport alloc_c_schema, Error

 from typing import Iterable, List, Mapping, Tuple, Union

 from nanoarrow import _repr_utils


 # This is likely a better fit for a dedicated testing module; however, we need
 # it here to produce nice error messages when ensuring that one or
 # more arrays conform to a given or inferred schema.
 cpdef assert_type_equal(actual, expected, bint check_nullability):
     """Test two schemas for data type equality

     Checks two CSchema objects for type equality (i.e., that an array with
     schema ``actual`` contains elements with the same logical meaning as and
     array with schema ``expected``). Notably, this excludes metadata from
     all nodes in the schema.

     Parameters
     ----------
     actual : CSchema
         The schema to be tested for equality
     expected : CSchema
         The schema against which to test
     check_nullability : bool
         If True, actual and expected will be considered equal if their
         data type information and marked nullability are identical.
     """
     if not isinstance(actual, CSchema):
         raise TypeError(f"actual is {type(actual).__name__}, not CSchema")

     if not isinstance(expected, CSchema):
         raise TypeError(f"expected is {type(expected).__name__}, not CSchema")

     if not actual.type_equals(expected, check_nullability=check_nullability):
         actual_label = actual._to_string(max_chars=80, recursive=True)
         expected_label = expected._to_string(max_chars=80, recursive=True)
         raise ValueError(
             f"Expected schema\n  '{expected_label}'"
             f"\nbut got\n  '{actual_label}'"
         )


 cdef class CArrowTimeUnit:
     """
     Wrapper around ArrowTimeUnit to provide implementations in Python access
     to the values.
     """

     SECOND = NANOARROW_TIME_UNIT_SECOND
     MILLI = NANOARROW_TIME_UNIT_MILLI
     MICRO = NANOARROW_TIME_UNIT_MICRO
     NANO = NANOARROW_TIME_UNIT_NANO


 cdef class CLayout:
     """Abstract buffer information for Arrow types

     Provides accessors for buffer counts, types, and attributes.
     """

     def __cinit__(self, base, uintptr_t ptr):
         self._base = base
         self._layout = <ArrowLayout*>ptr

         self._n_buffers = NANOARROW_MAX_FIXED_BUFFERS
         for i in range(NANOARROW_MAX_FIXED_BUFFERS):
             if self._layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE:
                 self._n_buffers = i
                 break

     @property
     def n_buffers(self) -> int:
         return self._n_buffers

     @property
     def buffer_data_type_id(self) -> int:
         return tuple(self._layout.buffer_data_type[i] for i in range(self._n_buffers))

     @property
     def element_size_bits(self) -> int:
         return tuple(self._layout.element_size_bits[i] for i in range(self._n_buffers))

     @property
     def child_size_elements(self) -> int:
         return self._layout.child_size_elements


 cdef class SchemaMetadata:
     """Dictionary-like wrapper around a lazily-parsed CSchema.metadata string

     The Arrow C Data interface encodes key/value metadata as a bytes-to-bytes
     mapping using a specific packed binary encoding. This class maintains a
     reference to the underlying storage and parses it as required. Note that
     unlike a Python dictionary, ``SchemaMetadata`` can contain duplicate
     keys.
     """

     def __cinit__(self, object base, uintptr_t ptr):
         self._base = base
         self._metadata = <const char*>ptr

     @staticmethod
     def empty():
         """Create an empty SchemaMetadata with no keys or values"""
         return SchemaMetadata(None, 0)

     cdef _init_reader(self):
         cdef int code = ArrowMetadataReaderInit(&self._reader, self._metadata)
         Error.raise_error_not_ok("ArrowMetadataReaderInit()", code)

     def __len__(self):
         self._init_reader()
         return self._reader.remaining_keys

     def __contains__(self, item):
         for key, _ in self.items():
             if item == key:
                 return True

         return False

     def __getitem__(self, k) -> bytes:
         """Get the value associated with a unique key

         Retrieves the unique value associated with k. Raises KeyError if
         k does not point to exactly one value in the metadata.
         """
         out = None

         for key, value in self.items():
             if k == key:
                 if out is None:
                     out = value
                 else:
                     raise KeyError(f"key {k} matches more than one value in metadata")

         if out is None:
             raise KeyError(f"Key {k} not found")

         return out

     def __iter__(self):
         for key, _ in self.items():
             yield key

     def keys(self) -> List[bytes]:
         """List meadata keys

         The result may contain duplicate keys if they exist in the metadata.
         """
         return list(self)

     def values(self) -> List[bytes]:
         """List metadata values"""
         return [value for _, value in self.items()]

     def items(self) -> Iterable[bytes, bytes]:
         """Iterate over key/value pairs

         The result may contain duplicate keys if they exist in the metadata."""
         cdef ArrowStringView key
         cdef ArrowStringView value
         self._init_reader()
         while self._reader.remaining_keys > 0:
             ArrowMetadataReaderRead(&self._reader, &key, &value)
             key_obj = PyBytes_FromStringAndSize(key.data, key.size_bytes)
             value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
             yield key_obj, value_obj

     def __repr__(self) -> str:
         lines = [
             f"<{_repr_utils.make_class_label(self)}>",
             _repr_utils.metadata_repr(self)
         ]
         return "\n".join(lines)


 cdef class CSchema:
     """Low-level ArrowSchema wrapper

     This object is a literal wrapper around a read-only ArrowSchema. It provides field accessors
     that return Python objects and handles the C Data interface lifecycle (i.e., initialized
     ArrowSchema structures are always released).

     See ``nanoarrow.c_schema()`` for construction and usage examples.
     """

     @staticmethod
     def allocate() -> CSchema:
         """Allocate a released CSchema"""
         cdef ArrowSchema* c_schema_out
         base = alloc_c_schema(&c_schema_out)
         return CSchema(base, <uintptr_t>(c_schema_out))

     def __cinit__(self, object base, uintptr_t addr):
         self._base = base
         self._ptr = <ArrowSchema*>addr

     def __deepcopy__(self, memo=None) -> CSchema:
         cdef CSchema out = CSchema.allocate()
         cdef int code = ArrowSchemaDeepCopy(self._ptr, out._ptr)
         Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)

         return out

     @staticmethod
     def _import_from_c_capsule(schema_capsule) -> CSchema:
         """Import from a ArrowSchema PyCapsule

         Parameters
         ----------
         schema_capsule : PyCapsule
             A valid PyCapsule with name 'arrow_schema' containing an
             ArrowSchema pointer.
         """
         return CSchema(
             schema_capsule,
             <uintptr_t>PyCapsule_GetPointer(schema_capsule, "arrow_schema")
         )

     def __arrow_c_schema__(self):
         """
         Export to a ArrowSchema PyCapsule
         """
         self._assert_valid()

         cdef ArrowSchema* c_schema_out
         schema_capsule = alloc_c_schema(&c_schema_out)

         cdef int code = ArrowSchemaDeepCopy(self._ptr, c_schema_out)
         Error.raise_error_not_ok("ArrowSchemaDeepCopy", code)
         return schema_capsule

     @property
     def _capsule(self):
         """
         Returns the capsule backing this CSchema or None if it does not exist
         or points to a parent ArrowSchema.
         """
         cdef ArrowSchema* maybe_capsule_ptr
         maybe_capsule_ptr = <ArrowSchema*>PyCapsule_GetPointer(self._base, 'arrow_schema')

         # This will return False if this is a child CSchema whose capsule holds
         # the parent ArrowSchema
         if maybe_capsule_ptr == self._ptr:
             return self._base

         return None

     def _addr(self) -> int:
         return <uintptr_t>self._ptr

     def is_valid(self) -> bool:
         """Check for a non-null and non-released underlying ArrowSchema"""
         return self._ptr != NULL and self._ptr.release != NULL

     def _assert_valid(self):
         if self._ptr == NULL:
             raise RuntimeError("schema is NULL")
         if self._ptr.release == NULL:
             raise RuntimeError("schema is released")

     def _to_string(self, int64_t max_chars=0, recursive=False) -> str:
         cdef int64_t n_chars
         if max_chars == 0:
             n_chars = ArrowSchemaToString(self._ptr, NULL, 0, recursive)
         else:
             n_chars = max_chars

         cdef char* out = <char*>ArrowMalloc(n_chars + 1)
         if not out:
             raise MemoryError()

         ArrowSchemaToString(self._ptr, out, n_chars + 1, recursive)
         out_str = out.decode("UTF-8")
         ArrowFree(out)

         return out_str

     def __repr__(self) -> str:
         return _repr_utils.schema_repr(self)

     def type_equals(self, CSchema other, check_nullability: bool=False) -> bool:
         """Test two schemas for data type equality

         Checks two CSchema objects for type equality (i.e., that an array with
         schema ``actual`` contains elements with the same logical meaning as and
         array with schema ``expected``). Notably, this excludes metadata from
         all nodes in the schema.

         Parameters
         ----------
         other : CSchema
             The schema against which to test
         check_nullability : bool
             If True, actual and expected will be considered equal if their
             data type information and marked nullability are identical.
         """
         self._assert_valid()

         if self._ptr == other._ptr:
             return True

         if self.format != other.format:
             return False

         # Nullability is not strictly part of the "type"; however, performing
         # this check recursively is verbose to otherwise accomplish and
         # sometimes this does matter.
         cdef int64_t flags = self.flags
         cdef int64_t other_flags = other.flags
         if not check_nullability:
             flags &= ~ARROW_FLAG_NULLABLE
             other_flags &= ~ARROW_FLAG_NULLABLE

         if flags != other_flags:
             return False

         if self.n_children != other.n_children:
             return False

         for child, other_child in zip(self.children, other.children):
             if not child.type_equals(other_child, check_nullability=check_nullability):
                 return False

         if (self.dictionary is None) != (other.dictionary is None):
             return False

         if self.dictionary is not None:
             if not self.dictionary.type_equals(
                 other.dictionary,
                 check_nullability=check_nullability
             ):
                 return False

         return True


     @property
     def format(self) -> str:
         self._assert_valid()
         if self._ptr.format != NULL:
             return self._ptr.format.decode()

     @property
     def name(self) -> Union[str, None]:
         self._assert_valid()
         if self._ptr.name != NULL:
             return self._ptr.name.decode()
         else:
             return None

     @property
     def flags(self) -> int:
         return self._ptr.flags

     @property
     def metadata(self) -> SchemaMetadata:
         self._assert_valid()
         if self._ptr.metadata != NULL:
             return SchemaMetadata(self._base, <uintptr_t>self._ptr.metadata)
         else:
             return None

     @property
     def n_children(self) -> int:
         self._assert_valid()
         return self._ptr.n_children

     def child(self, int64_t i):
         self._assert_valid()
         if i < 0 or i >= self._ptr.n_children:
             raise IndexError(f"{i} out of range [0, {self._ptr.n_children})")

         return CSchema(self._base, <uintptr_t>self._ptr.children[i])

     @property
     def children(self) -> Iterable[CSchema]:
         for i in range(self.n_children):
             yield self.child(i)

     @property
     def dictionary(self) -> Union[CSchema, None]:
         self._assert_valid()
         if self._ptr.dictionary != NULL:
             return CSchema(self, <uintptr_t>self._ptr.dictionary)
         else:
             return None

     def modify(self, *, format=None, name=None, flags=None, nullable=None,
                metadata=None, children=None, dictionary=None, validate=True) -> CSchema:
         cdef CSchemaBuilder builder = CSchemaBuilder.allocate()

         if format is None:
             builder.set_format(self.format)
         else:
             builder.set_format(format)

         if name is None:
             builder.set_name(self.name)
         elif name is not False:
             builder.set_name(name)

         if flags is None:
             builder.set_flags(self.flags)
         else:
             builder.set_flags(flags)

         if nullable is not None:
             builder.set_nullable(nullable)

         if metadata is None:
             if self.metadata is not None:
                 builder.append_metadata(self.metadata)
         else:
             builder.append_metadata(metadata)

         if children is None:
             if self.n_children > 0:
                 builder.allocate_children(self.n_children)
                 for i, child in enumerate(self.children):
                     builder.set_child(i, None, child)
         elif hasattr(children, "items"):
             builder.allocate_children(len(children))
             for i, item in enumerate(children.items()):
                 name, child = item
                 builder.set_child(i, name, child)
         else:
             builder.allocate_children(len(children))
             for i, child in enumerate(children):
                 builder.set_child(i, None, child)

         if dictionary is None:
             if self.dictionary:
                 builder.set_dictionary(self.dictionary)
         elif dictionary is not False:
             builder.set_dictionary(dictionary)

         if validate:
             builder.validate()

         return builder.finish()


 cdef class CSchemaView:
     """Low-level ArrowSchemaView wrapper

     This object is a literal wrapper around a read-only ArrowSchemaView. It provides field accessors
     that return Python objects and handles structure lifecycle. Compared to an ArrowSchema,
     the nanoarrow ArrowSchemaView facilitates access to the deserialized content of an ArrowSchema
     (e.g., parameter values for parameterized types).

     See `nanoarrow.c_schema_view()` for construction and usage examples.
     """

     def __cinit__(self, CSchema schema):
         self._base = schema
         self._schema_view.type = <ArrowType>_types.UNINITIALIZED
         self._schema_view.storage_type = <ArrowType>_types.UNINITIALIZED

         cdef Error error = Error()
         cdef int code = ArrowSchemaViewInit(&self._schema_view, schema._ptr, &error.c_error)
         error.raise_message_not_ok("ArrowSchemaViewInit()", code)

         self._dictionary_ordered = schema._ptr.flags & ARROW_FLAG_DICTIONARY_ORDERED
         self._nullable = schema._ptr.flags & ARROW_FLAG_NULLABLE
         self._map_keys_sorted = schema._ptr.flags & ARROW_FLAG_MAP_KEYS_SORTED

     @property
     def layout(self) -> CLayout:
         return CLayout(self, <uintptr_t>&self._schema_view.layout)

     @property
     def type_id(self) -> int:
         return self._schema_view.type

     @property
     def storage_type_id(self) -> int:
         return self._schema_view.storage_type

     @property
     def storage_buffer_format(self) -> Union[str, None]:
         if self.buffer_format is not None:
             return self.buffer_format
         elif _types.equal(self._schema_view.type, _types.DATE32):
             return 'i'
         elif _types.one_of(
             self._schema_view.type,
             (_types.TIMESTAMP, _types.DATE64, _types.DURATION)
         ):
             return 'q'
         elif self.extension_name:
             return self._get_buffer_format()
         else:
             return None

     @property
     def buffer_format(self) -> Union[str, None]:
         """The Python struct format representing an element of this type
         or None if there is no Python format string that can represent this
         type without loosing information.
         """
         if self.extension_name:
             return None
         else:
             return self._get_buffer_format()

     def _get_buffer_format(self):
         if self._schema_view.type != self._schema_view.storage_type:
             return None

         # String/binary types do not have format strings as far as the Python
         # buffer protocol is concerned
         if self.layout.n_buffers != 2:
             return None

         cdef char out[128]
         cdef int element_size_bits = 0
         if _types.equal(self._schema_view.type, _types.FIXED_SIZE_BINARY):
             element_size_bits = self._schema_view.fixed_size * 8

         try:
             _types.to_format(self._schema_view.type, element_size_bits, sizeof(out), out)
             return out.decode()
         except ValueError:
             return None

     @property
     def type(self) -> str:
         cdef const char* type_str = ArrowTypeString(self._schema_view.type)
         if type_str != NULL:
             return type_str.decode()
         else:
             raise ValueError("ArrowTypeString() returned NULL")

     @property
     def storage_type(self) -> str:
         cdef const char* type_str = ArrowTypeString(self._schema_view.storage_type)
         if type_str != NULL:
             return type_str.decode()
         else:
             raise ValueError("ArrowTypeString() returned NULL")

     @property
     def dictionary_ordered(self) -> Union[bool, None]:
         if _types.equal(self._schema_view.type, _types.DICTIONARY):
             return self._dictionary_ordered != 0
         else:
             return None

     @property
     def nullable(self) -> bool:
         return self._nullable != 0

     @property
     def map_keys_sorted(self) -> Union[bool, None]:
         if _types.equal(self._schema_view.type, _types.MAP):
             return self._map_keys_sorted != 0
         else:
             return None

     @property
     def fixed_size(self) -> Union[bool, None]:
         if _types.is_fixed_size(self._schema_view.type):
             return self._schema_view.fixed_size
         else:
             return None

     @property
     def decimal_bitwidth(self) -> Union[int, None]:
         if _types.is_decimal(self._schema_view.type):
             return self._schema_view.decimal_bitwidth
         else:
             return None

     @property
     def decimal_precision(self) -> Union[int, None]:
         if _types.is_decimal(self._schema_view.type):
             return self._schema_view.decimal_precision
         else:
             return None

     @property
     def decimal_scale(self) -> Union[int, None]:
         if _types.is_decimal(self._schema_view.type):
             return self._schema_view.decimal_scale
         else:
             return None

     @property
     def time_unit_id(self) -> Union[int, None]:
         if _types.has_time_unit(self._schema_view.type):
             return self._schema_view.time_unit
         else:
             return None

     @property
     def time_unit(self) -> Union[str, None]:
         if _types.has_time_unit(self._schema_view.type):
             return ArrowTimeUnitString(self._schema_view.time_unit).decode()
         else:
             return None

     @property
     def timezone(self) -> Union[str, None]:
         if _types.equal(self._schema_view.type, _types.TIMESTAMP):
             return self._schema_view.timezone.decode()
         else:
             return None

     @property
     def union_type_ids(self) -> Union[Tuple[int, ...], None]:
         if _types.is_union(self._schema_view.type):
             type_ids_str = self._schema_view.union_type_ids.decode().split(',')
             return (int(type_id) for type_id in type_ids_str)
         else:
             return None

     @property
     def extension_name(self) -> Union[str, None]:
         if self._schema_view.extension_name.data != NULL:
             name_bytes = PyBytes_FromStringAndSize(
                 self._schema_view.extension_name.data,
                 self._schema_view.extension_name.size_bytes
             )
             return name_bytes.decode()
         else:
             return None

     @property
     def extension_metadata(self) -> Union[bytes, None]:
         if self._schema_view.extension_name.data != NULL:
             return PyBytes_FromStringAndSize(
                 self._schema_view.extension_metadata.data,
                 self._schema_view.extension_metadata.size_bytes
             )
         else:
             return None

     def __repr__(self) -> str:
         return _repr_utils.schema_view_repr(self)


 cdef class CSchemaBuilder:
     """Helper for constructing an ArrowSchema

     The primary function of this class is to wrap the nanoarrow C library calls
     that build up the components of an ArrowSchema.
     """

     def __cinit__(self, CSchema schema):
         self.c_schema = schema
         self._ptr = schema._ptr
         if self._ptr.release == NULL:
             ArrowSchemaInit(self._ptr)

     @staticmethod
     def allocate() -> CSchemaBuilder:
         """Create a CSchemaBuilder

         Allocates memory for an ArrowSchema and populates it with nanoarrow's
         ArrowSchema private_data/release callback implementation. This should
         usually be followed by :meth:`set_type` or :meth:`set_format`.
         """
         return CSchemaBuilder(CSchema.allocate())

     def append_metadata(self, metadata: Mapping[bytes, bytes]) -> CSchemaBuilder:
         """Append key/value metadata"""
         cdef CBuffer buffer = CBuffer.empty()

         cdef const char* existing_metadata = self.c_schema._ptr.metadata
         cdef int code = ArrowMetadataBuilderInit(buffer._ptr, existing_metadata)
         Error.raise_error_not_ok("ArrowMetadataBuilderInit()", code)

         cdef ArrowStringView key
         cdef ArrowStringView value
         cdef int32_t keys_added = 0

         for k, v in metadata.items():
             k = k.encode() if isinstance(k, str) else bytes(k)
             key.data = PyBytes_AsString(k)
             key.size_bytes = PyBytes_Size(k)

             v = v.encode() if isinstance(v, str) else bytes(v)
             value.data = PyBytes_AsString(v)
             value.size_bytes = PyBytes_Size(v)

             code = ArrowMetadataBuilderAppend(buffer._ptr, key, value)
             Error.raise_error_not_ok("ArrowMetadataBuilderAppend()", code)

             keys_added += 1

         if keys_added > 0:
             code = ArrowSchemaSetMetadata(self.c_schema._ptr, <const char*>buffer._ptr.data)
             Error.raise_error_not_ok("ArrowSchemaSetMetadata()", code)

         return self

     def child(self, int64_t i) -> CSchemaBuilder:
         return CSchemaBuilder(self.c_schema.child(i))

     def set_type(self, int type_id) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code = ArrowSchemaSetType(self._ptr, <ArrowType>type_id)
         Error.raise_error_not_ok("ArrowSchemaSetType()", code)

         return self

     def set_type_decimal(self, int type_id, int precision, int scale) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code = ArrowSchemaSetTypeDecimal(self._ptr, <ArrowType>type_id, precision, scale)
         Error.raise_error_not_ok("ArrowSchemaSetType()", code)

     def set_type_fixed_size(self, int type_id, int fixed_size) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code = ArrowSchemaSetTypeFixedSize(self._ptr, <ArrowType>type_id, fixed_size)
         Error.raise_error_not_ok("ArrowSchemaSetTypeFixedSize()", code)

         return self

     def set_type_date_time(self, int type_id, int time_unit, timezone) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code
         if timezone is None:
             code = ArrowSchemaSetTypeDateTime(self._ptr, <ArrowType>type_id, <ArrowTimeUnit>time_unit, NULL)
         else:
             timezone = str(timezone)
             code = ArrowSchemaSetTypeDateTime(self._ptr, <ArrowType>type_id, <ArrowTimeUnit>time_unit, timezone.encode("UTF-8"))

         Error.raise_error_not_ok("ArrowSchemaSetTypeDateTime()", code)

         return self

     def set_format(self, str format) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code = ArrowSchemaSetFormat(self._ptr, format.encode("UTF-8"))
         Error.raise_error_not_ok("ArrowSchemaSetFormat()", code)

         return self

     def set_name(self, name) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code
         if name is None:
             code = ArrowSchemaSetName(self._ptr, NULL)
         else:
             name = str(name)
             code = ArrowSchemaSetName(self._ptr, name.encode("UTF-8"))

         Error.raise_error_not_ok("ArrowSchemaSetName()", code)

         return self

     def allocate_children(self, int n) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code = ArrowSchemaAllocateChildren(self._ptr, n)
         Error.raise_error_not_ok("ArrowSchemaAllocateChildren()", code)

         return self

     def set_child(self, int64_t i, name, CSchema child_src) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         if i < 0 or i >= self._ptr.n_children:
             raise IndexError(f"Index out of range: {i}")

         if self._ptr.children[i].release != NULL:
             ArrowSchemaRelease(self._ptr.children[i])

         cdef int code = ArrowSchemaDeepCopy(child_src._ptr, self._ptr.children[i])
         Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)

         if name is not None:
             name = str(name)
             code = ArrowSchemaSetName(self._ptr.children[i], name.encode("UTF-8"))
             Error.raise_error_not_ok("ArrowSchemaSetName()", code)

         return self

     def set_dictionary(self, CSchema dictionary) -> CSchemaBuilder:
         self.c_schema._assert_valid()

         cdef int code
         if self._ptr.dictionary == NULL:
             code = ArrowSchemaAllocateDictionary(self._ptr)
             Error.raise_error_not_ok("ArrowSchemaAllocateDictionary()", code)

         if self._ptr.dictionary.release != NULL:
             ArrowSchemaRelease(self._ptr.dictionary)

         code = ArrowSchemaDeepCopy(dictionary._ptr, self._ptr.dictionary)
         Error.raise_error_not_ok("ArrowSchemaDeepCopy()", code)

         return self

     def set_flags(self, flags) -> CSchemaBuilder:
         self._ptr.flags = flags
         return self

     def set_nullable(self, nullable) -> CSchemaBuilder:
         if nullable:
             self._ptr.flags = self._ptr.flags | ARROW_FLAG_NULLABLE
         else:
             self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_NULLABLE

         return self

     def set_dictionary_ordered(self, dictionary_ordered) -> CSchemaBuilder:
         if dictionary_ordered:
             self._ptr.flags = self._ptr.flags | ARROW_FLAG_DICTIONARY_ORDERED
         else:
             self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_DICTIONARY_ORDERED

         return self

     def set_map_keys_sorted(self, map_keys_sorted) -> CSchemaBuilder:
         if map_keys_sorted:
             self._ptr.flags = self._ptr.flags | ARROW_FLAG_MAP_KEYS_SORTED
         else:
             self._ptr.flags = self._ptr.flags & ~ARROW_FLAG_MAP_KEYS_SORTED

         return self

     def validate(self) -> CSchemaView:
         return CSchemaView(self.c_schema)

     def finish(self) -> CSchema:
         self.c_schema._assert_valid()
         cdef CSchema out = CSchema.allocate()
         ArrowSchemaMove(self.c_schema._ptr, out._ptr)
         ArrowSchemaInit(self.c_schema._ptr)
         return out