| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import atexit |
| from collections.abc import Mapping |
| import re |
| import sys |
| import warnings |
| |
| |
| # These are imprecise because the type (in pandas 0.x) depends on the presence |
| # of nulls |
| cdef dict _pandas_type_map = { |
| _Type_NA: np.object_, # NaNs |
| _Type_BOOL: np.bool_, |
| _Type_INT8: np.int8, |
| _Type_INT16: np.int16, |
| _Type_INT32: np.int32, |
| _Type_INT64: np.int64, |
| _Type_UINT8: np.uint8, |
| _Type_UINT16: np.uint16, |
| _Type_UINT32: np.uint32, |
| _Type_UINT64: np.uint64, |
| _Type_HALF_FLOAT: np.float16, |
| _Type_FLOAT: np.float32, |
| _Type_DOUBLE: np.float64, |
| _Type_DATE32: np.dtype('datetime64[ns]'), |
| _Type_DATE64: np.dtype('datetime64[ns]'), |
| _Type_TIMESTAMP: np.dtype('datetime64[ns]'), |
| _Type_DURATION: np.dtype('timedelta64[ns]'), |
| _Type_BINARY: np.object_, |
| _Type_FIXED_SIZE_BINARY: np.object_, |
| _Type_STRING: np.object_, |
| _Type_LIST: np.object_, |
| _Type_DECIMAL: np.object_, |
| } |
| |
| cdef dict _pep3118_type_map = { |
| _Type_INT8: b'b', |
| _Type_INT16: b'h', |
| _Type_INT32: b'i', |
| _Type_INT64: b'q', |
| _Type_UINT8: b'B', |
| _Type_UINT16: b'H', |
| _Type_UINT32: b'I', |
| _Type_UINT64: b'Q', |
| _Type_HALF_FLOAT: b'e', |
| _Type_FLOAT: b'f', |
| _Type_DOUBLE: b'd', |
| } |
| |
| |
| cdef bytes _datatype_to_pep3118(CDataType* type): |
| """ |
| Construct a PEP 3118 format string describing the given datatype. |
| None is returned for unsupported types. |
| """ |
| try: |
| char = _pep3118_type_map[type.id()] |
| except KeyError: |
| return None |
| else: |
| if char in b'bBhHiIqQ': |
| # Use "standard" int widths, not native |
| return b'=' + char |
| else: |
| return char |
| |
| |
| def _is_primitive(Type type): |
| # This is simply a redirect, the official API is in pyarrow.types. |
| return is_primitive(type) |
| |
| |
| # Workaround for Cython parsing bug |
| # https://github.com/cython/cython/issues/2143 |
| ctypedef CFixedWidthType* _CFixedWidthTypePtr |
| |
| |
| cdef class DataType: |
| """ |
| Base class of all Arrow data types. |
| |
| Each data type is an *instance* of this class. |
| """ |
| |
| def __cinit__(self): |
| pass |
| |
| def __init__(self): |
| raise TypeError("Do not call {}'s constructor directly, use public " |
| "functions like pyarrow.int64, pyarrow.list_, etc. " |
| "instead.".format(self.__class__.__name__)) |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| assert type != nullptr |
| self.sp_type = type |
| self.type = type.get() |
| self.pep3118_format = _datatype_to_pep3118(self.type) |
| |
| cdef Field field(self, int i): |
| cdef int index = <int> _normalize_index(i, self.type.num_fields()) |
| return pyarrow_wrap_field(self.type.field(index)) |
| |
| @property |
| def id(self): |
| return self.type.id() |
| |
| @property |
| def bit_width(self): |
| cdef _CFixedWidthTypePtr ty |
| ty = dynamic_cast[_CFixedWidthTypePtr](self.type) |
| if ty == nullptr: |
| raise ValueError("Non-fixed width type") |
| return ty.bit_width() |
| |
| @property |
| def num_children(self): |
| """ |
| The number of child fields. |
| """ |
| import warnings |
| warnings.warn("num_children is deprecated, use num_fields", |
| FutureWarning) |
| return self.num_fields |
| |
| @property |
| def num_fields(self): |
| """ |
| The number of child fields. |
| """ |
| return self.type.num_fields() |
| |
| @property |
| def num_buffers(self): |
| """ |
| Number of data buffers required to construct Array type |
| excluding children. |
| """ |
| return self.type.layout().buffers.size() |
| |
| def __str__(self): |
| return frombytes(self.type.ToString()) |
| |
| def __hash__(self): |
| return hash(str(self)) |
| |
| def __reduce__(self): |
| return type_for_alias, (str(self),) |
| |
| def __repr__(self): |
| return '{0.__class__.__name__}({0})'.format(self) |
| |
| def __eq__(self, other): |
| try: |
| return self.equals(other) |
| except (TypeError, ValueError): |
| return NotImplemented |
| |
| def equals(self, other): |
| """ |
| Return true if type is equivalent to passed value. |
| |
| Parameters |
| ---------- |
| other : DataType or string convertible to DataType |
| |
| Returns |
| ------- |
| is_equal : bool |
| """ |
| cdef DataType other_type |
| |
| other_type = ensure_type(other) |
| return self.type.Equals(deref(other_type.type)) |
| |
| def to_pandas_dtype(self): |
| """ |
| Return the equivalent NumPy / Pandas dtype. |
| """ |
| cdef Type type_id = self.type.id() |
| if type_id in _pandas_type_map: |
| return _pandas_type_map[type_id] |
| else: |
| raise NotImplementedError(str(self)) |
| |
| def _export_to_c(self, uintptr_t out_ptr): |
| """ |
| Export to a C ArrowSchema struct, given its pointer. |
| |
| Be careful: if you don't pass the ArrowSchema struct to a consumer, |
| its memory will leak. This is a low-level function intended for |
| expert users. |
| """ |
| check_status(ExportType(deref(self.type), <ArrowSchema*> out_ptr)) |
| |
| @staticmethod |
| def _import_from_c(uintptr_t in_ptr): |
| """ |
| Import DataType from a C ArrowSchema struct, given its pointer. |
| |
| This is a low-level function intended for expert users. |
| """ |
| result = GetResultValue(ImportType(<ArrowSchema*> in_ptr)) |
| return pyarrow_wrap_data_type(result) |
| |
| |
| cdef class DictionaryMemo: |
| """ |
| Tracking container for dictionary-encoded fields. |
| """ |
| |
| def __cinit__(self): |
| self.sp_memo.reset(new CDictionaryMemo()) |
| self.memo = self.sp_memo.get() |
| |
| |
| cdef class DictionaryType(DataType): |
| """ |
| Concrete class for dictionary data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.dict_type = <const CDictionaryType*> type.get() |
| |
| def __reduce__(self): |
| return dictionary, (self.index_type, self.value_type, self.ordered) |
| |
| @property |
| def ordered(self): |
| """ |
| Whether the dictionary is ordered, i.e. whether the ordering of values |
| in the dictionary is important. |
| """ |
| return self.dict_type.ordered() |
| |
| @property |
| def index_type(self): |
| """ |
| The data type of dictionary indices (a signed integer type). |
| """ |
| return pyarrow_wrap_data_type(self.dict_type.index_type()) |
| |
| @property |
| def value_type(self): |
| """ |
| The dictionary value type. |
| |
| The dictionary values are found in an instance of DictionaryArray. |
| """ |
| return pyarrow_wrap_data_type(self.dict_type.value_type()) |
| |
| |
| cdef class ListType(DataType): |
| """ |
| Concrete class for list data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.list_type = <const CListType*> type.get() |
| |
| def __reduce__(self): |
| return list_, (self.value_type,) |
| |
| @property |
| def value_field(self): |
| return pyarrow_wrap_field(self.list_type.value_field()) |
| |
| @property |
| def value_type(self): |
| """ |
| The data type of list values. |
| """ |
| return pyarrow_wrap_data_type(self.list_type.value_type()) |
| |
| |
| cdef class LargeListType(DataType): |
| """ |
| Concrete class for large list data types |
| (like ListType, but with 64-bit offsets). |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.list_type = <const CLargeListType*> type.get() |
| |
| def __reduce__(self): |
| return large_list, (self.value_type,) |
| |
| @property |
| def value_type(self): |
| """ |
| The data type of large list values. |
| """ |
| return pyarrow_wrap_data_type(self.list_type.value_type()) |
| |
| |
| cdef class MapType(DataType): |
| """ |
| Concrete class for map data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.map_type = <const CMapType*> type.get() |
| |
| def __reduce__(self): |
| return map_, (self.key_type, self.item_type) |
| |
| @property |
| def key_type(self): |
| """ |
| The data type of keys in the map entries. |
| """ |
| return pyarrow_wrap_data_type(self.map_type.key_type()) |
| |
| @property |
| def item_type(self): |
| """ |
| The data type of items in the map entries. |
| """ |
| return pyarrow_wrap_data_type(self.map_type.item_type()) |
| |
| |
| cdef class FixedSizeListType(DataType): |
| """ |
| Concrete class for fixed size list data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.list_type = <const CFixedSizeListType*> type.get() |
| |
| def __reduce__(self): |
| return list_, (self.value_type, self.list_size) |
| |
| @property |
| def value_type(self): |
| """ |
| The data type of large list values. |
| """ |
| return pyarrow_wrap_data_type(self.list_type.value_type()) |
| |
| @property |
| def list_size(self): |
| """ |
| The size of the fixed size lists. |
| """ |
| return self.list_type.list_size() |
| |
| |
| cdef class StructType(DataType): |
| """ |
| Concrete class for struct data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.struct_type = <const CStructType*> type.get() |
| |
| cdef Field field_by_name(self, name): |
| """ |
| Return a child field by its name rather than its index. |
| """ |
| cdef vector[shared_ptr[CField]] fields |
| |
| fields = self.struct_type.GetAllFieldsByName(tobytes(name)) |
| if fields.size() == 0: |
| raise KeyError(name) |
| elif fields.size() > 1: |
| warnings.warn("Struct field name corresponds to more " |
| "than one field", UserWarning) |
| raise KeyError(name) |
| else: |
| return pyarrow_wrap_field(fields[0]) |
| |
| def get_field_index(self, name): |
| """ |
| Return index of field with given unique name. Returns -1 if not found |
| or if duplicated |
| """ |
| return self.struct_type.GetFieldIndex(tobytes(name)) |
| |
| def get_all_field_indices(self, name): |
| """ |
| Return sorted list of indices for fields with the given name |
| """ |
| return self.struct_type.GetAllFieldIndices(tobytes(name)) |
| |
| def __len__(self): |
| """ |
| Like num_fields(). |
| """ |
| return self.type.num_fields() |
| |
| def __iter__(self): |
| """ |
| Iterate over struct fields, in order. |
| """ |
| for i in range(len(self)): |
| yield self[i] |
| |
| def __getitem__(self, i): |
| """ |
| Return the struct field with the given index or name. |
| """ |
| if isinstance(i, (bytes, str)): |
| return self.field_by_name(i) |
| elif isinstance(i, int): |
| return self.field(i) |
| else: |
| raise TypeError('Expected integer or string index') |
| |
| def __reduce__(self): |
| return struct, (list(self),) |
| |
| |
| cdef class UnionType(DataType): |
| """ |
| Concrete class for struct data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| |
| @property |
| def mode(self): |
| """ |
| The mode of the union ("dense" or "sparse"). |
| """ |
| cdef CUnionType* type = <CUnionType*> self.sp_type.get() |
| cdef int mode = type.mode() |
| if mode == _UnionMode_DENSE: |
| return 'dense' |
| if mode == _UnionMode_SPARSE: |
| return 'sparse' |
| assert 0 |
| |
| @property |
| def type_codes(self): |
| """ |
| The type code to indicate each data type in this union. |
| """ |
| cdef CUnionType* type = <CUnionType*> self.sp_type.get() |
| return type.type_codes() |
| |
| def __len__(self): |
| """ |
| Like num_fields(). |
| """ |
| return self.type.num_fields() |
| |
| def __iter__(self): |
| """ |
| Iterate over union members, in order. |
| """ |
| for i in range(len(self)): |
| yield self[i] |
| |
| def __getitem__(self, i): |
| """ |
| Return a child field by its index. |
| """ |
| return self.field(i) |
| |
| def __reduce__(self): |
| return union, (list(self), self.mode, self.type_codes) |
| |
| |
| cdef class TimestampType(DataType): |
| """ |
| Concrete class for timestamp data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.ts_type = <const CTimestampType*> type.get() |
| |
| @property |
| def unit(self): |
| """ |
| The timestamp unit ('s', 'ms', 'us' or 'ns'). |
| """ |
| return timeunit_to_string(self.ts_type.unit()) |
| |
| @property |
| def tz(self): |
| """ |
| The timestamp time zone, if any, or None. |
| """ |
| if self.ts_type.timezone().size() > 0: |
| return frombytes(self.ts_type.timezone()) |
| else: |
| return None |
| |
| def to_pandas_dtype(self): |
| """ |
| Return the equivalent NumPy / Pandas dtype. |
| """ |
| if self.tz is None: |
| return _pandas_type_map[_Type_TIMESTAMP] |
| else: |
| # Return DatetimeTZ |
| from pyarrow.pandas_compat import make_datetimetz |
| return make_datetimetz(self.tz) |
| |
| def __reduce__(self): |
| return timestamp, (self.unit, self.tz) |
| |
| |
| cdef class Time32Type(DataType): |
| """ |
| Concrete class for time32 data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.time_type = <const CTime32Type*> type.get() |
| |
| @property |
| def unit(self): |
| """ |
| The time unit ('s', 'ms', 'us' or 'ns'). |
| """ |
| return timeunit_to_string(self.time_type.unit()) |
| |
| |
| cdef class Time64Type(DataType): |
| """ |
| Concrete class for time64 data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.time_type = <const CTime64Type*> type.get() |
| |
| @property |
| def unit(self): |
| """ |
| The time unit ('s', 'ms', 'us' or 'ns'). |
| """ |
| return timeunit_to_string(self.time_type.unit()) |
| |
| |
| cdef class DurationType(DataType): |
| """ |
| Concrete class for duration data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.duration_type = <const CDurationType*> type.get() |
| |
| @property |
| def unit(self): |
| """ |
| The duration unit ('s', 'ms', 'us' or 'ns'). |
| """ |
| return timeunit_to_string(self.duration_type.unit()) |
| |
| |
| cdef class FixedSizeBinaryType(DataType): |
| """ |
| Concrete class for fixed-size binary data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.fixed_size_binary_type = ( |
| <const CFixedSizeBinaryType*> type.get()) |
| |
| def __reduce__(self): |
| return binary, (self.byte_width,) |
| |
| @property |
| def byte_width(self): |
| """ |
| The binary size in bytes. |
| """ |
| return self.fixed_size_binary_type.byte_width() |
| |
| |
| cdef class Decimal128Type(FixedSizeBinaryType): |
| """ |
| Concrete class for decimal128 data types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| FixedSizeBinaryType.init(self, type) |
| self.decimal128_type = <const CDecimal128Type*> type.get() |
| |
| def __reduce__(self): |
| return decimal128, (self.precision, self.scale) |
| |
| @property |
| def precision(self): |
| """ |
| The decimal precision, in number of decimal digits (an integer). |
| """ |
| return self.decimal128_type.precision() |
| |
| @property |
| def scale(self): |
| """ |
| The decimal scale (an integer). |
| """ |
| return self.decimal128_type.scale() |
| |
| |
| cdef class BaseExtensionType(DataType): |
| """ |
| Concrete base class for extension types. |
| """ |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| DataType.init(self, type) |
| self.ext_type = <const CExtensionType*> type.get() |
| |
| @property |
| def extension_name(self): |
| """ |
| The extension type name. |
| """ |
| return frombytes(self.ext_type.extension_name()) |
| |
| @property |
| def storage_type(self): |
| """ |
| The underlying storage type. |
| """ |
| return pyarrow_wrap_data_type(self.ext_type.storage_type()) |
| |
| |
| cdef class ExtensionType(BaseExtensionType): |
| """ |
| Concrete base class for Python-defined extension types. |
| """ |
| |
| def __cinit__(self): |
| if type(self) is ExtensionType: |
| raise TypeError("Can only instantiate subclasses of " |
| "ExtensionType") |
| |
| def __init__(self, DataType storage_type, extension_name): |
| """ |
| Initialize an extension type instance. |
| |
| This should be called at the end of the subclass' |
| ``__init__`` method. |
| |
| Parameters |
| ---------- |
| storage_type : DataType |
| extension_name : str |
| """ |
| cdef: |
| shared_ptr[CExtensionType] cpy_ext_type |
| c_string c_extension_name |
| |
| c_extension_name = tobytes(extension_name) |
| |
| assert storage_type is not None |
| check_status(CPyExtensionType.FromClass( |
| storage_type.sp_type, c_extension_name, type(self), |
| &cpy_ext_type)) |
| self.init(<shared_ptr[CDataType]> cpy_ext_type) |
| |
| cdef void init(self, const shared_ptr[CDataType]& type) except *: |
| BaseExtensionType.init(self, type) |
| self.cpy_ext_type = <const CPyExtensionType*> type.get() |
| # Store weakref and serialized version of self on C++ type instance |
| check_status(self.cpy_ext_type.SetInstance(self)) |
| |
| def __eq__(self, other): |
| # Default implementation to avoid infinite recursion through |
| # DataType.__eq__ -> ExtensionType::ExtensionEquals -> DataType.__eq__ |
| if isinstance(other, ExtensionType): |
| return (type(self) == type(other) and |
| self.extension_name == other.extension_name and |
| self.storage_type == other.storage_type) |
| else: |
| return NotImplemented |
| |
| def __arrow_ext_serialize__(self): |
| """ |
| Serialized representation of metadata to reconstruct the type object. |
| |
| This method should return a bytes object, and those serialized bytes |
| are stored in the custom metadata of the Field holding an extension |
| type in an IPC message. |
| The bytes are passed to ``__arrow_ext_deserialize`` and should hold |
| sufficient information to reconstruct the data type instance. |
| """ |
| return NotImplementedError |
| |
| @classmethod |
| def __arrow_ext_deserialize__(self, storage_type, serialized): |
| """ |
| Return an extension type instance from the storage type and serialized |
| metadata. |
| |
| This method should return an instance of the ExtensionType subclass |
| that matches the passed storage type and serialized metadata (the |
| return value of ``__arrow_ext_serialize__``). |
| """ |
| return NotImplementedError |
| |
| def __arrow_ext_class__(self): |
| """Return an extension array class to be used for building or |
| deserializing arrays with this extension type. |
| |
| This method should return a subclass of the ExtensionArray class. By |
| default, if not specialized in the extension implementation, an |
| extension type array will be a built-in ExtensionArray instance. |
| """ |
| return ExtensionArray |
| |
| |
| cdef class PyExtensionType(ExtensionType): |
| """ |
| Concrete base class for Python-defined extension types based on pickle |
| for (de)serialization. |
| """ |
| |
| def __cinit__(self): |
| if type(self) is PyExtensionType: |
| raise TypeError("Can only instantiate subclasses of " |
| "PyExtensionType") |
| |
| def __init__(self, DataType storage_type): |
| ExtensionType.__init__(self, storage_type, "arrow.py_extension_type") |
| |
| def __reduce__(self): |
| raise NotImplementedError("Please implement {0}.__reduce__" |
| .format(type(self).__name__)) |
| |
| def __arrow_ext_serialize__(self): |
| return builtin_pickle.dumps(self) |
| |
| @classmethod |
| def __arrow_ext_deserialize__(cls, storage_type, serialized): |
| try: |
| ty = builtin_pickle.loads(serialized) |
| except Exception: |
| # For some reason, it's impossible to deserialize the |
| # ExtensionType instance. Perhaps the serialized data is |
| # corrupt, or more likely the type is being deserialized |
| # in an environment where the original Python class or module |
| # is not available. Fall back on a generic BaseExtensionType. |
| return UnknownExtensionType(storage_type, serialized) |
| |
| if ty.storage_type != storage_type: |
| raise TypeError("Expected storage type {0} but got {1}" |
| .format(ty.storage_type, storage_type)) |
| return ty |
| |
| |
| cdef class UnknownExtensionType(PyExtensionType): |
| """ |
| A concrete class for Python-defined extension types that refer to |
| an unknown Python implementation. |
| """ |
| |
| cdef: |
| bytes serialized |
| |
| def __init__(self, DataType storage_type, serialized): |
| self.serialized = serialized |
| PyExtensionType.__init__(self, storage_type) |
| |
| def __arrow_ext_serialize__(self): |
| return self.serialized |
| |
| |
| _python_extension_types_registry = [] |
| |
| |
| def register_extension_type(ext_type): |
| """ |
| Register a Python extension type. |
| |
| Registration is based on the extension name (so different registered types |
| need unique extension names). Registration needs an extension type |
| instance, but then works for any instance of the same subclass regardless |
| of parametrization of the type. |
| |
| Parameters |
| ---------- |
| ext_type : BaseExtensionType instance |
| The ExtensionType subclass to register. |
| |
| """ |
| cdef: |
| DataType _type = ensure_type(ext_type, allow_none=False) |
| |
| if not isinstance(_type, BaseExtensionType): |
| raise TypeError("Only extension types can be registered") |
| |
| # register on the C++ side |
| check_status( |
| RegisterPyExtensionType(<shared_ptr[CDataType]> _type.sp_type)) |
| |
| # register on the python side |
| _python_extension_types_registry.append(_type) |
| |
| |
| def unregister_extension_type(type_name): |
| """ |
| Unregister a Python extension type. |
| |
| Parameters |
| ---------- |
| type_name : str |
| The name of the ExtensionType subclass to unregister. |
| |
| """ |
| cdef: |
| c_string c_type_name = tobytes(type_name) |
| check_status(UnregisterPyExtensionType(c_type_name)) |
| |
| |
| cdef class KeyValueMetadata(_Metadata, Mapping): |
| |
| def __init__(self, __arg0__=None, **kwargs): |
| cdef: |
| vector[c_string] keys, values |
| shared_ptr[const CKeyValueMetadata] result |
| |
| items = [] |
| if __arg0__ is not None: |
| other = (__arg0__.items() if isinstance(__arg0__, Mapping) |
| else __arg0__) |
| items.extend((tobytes(k), v) for k, v in other) |
| |
| prior_keys = {k for k, v in items} |
| for k, v in kwargs.items(): |
| k = tobytes(k) |
| if k in prior_keys: |
| raise KeyError("Duplicate key {}, " |
| "use pass all items as list of tuples if you " |
| "intend to have duplicate keys") |
| items.append((k, v)) |
| |
| keys.reserve(len(items)) |
| for key, value in items: |
| keys.push_back(tobytes(key)) |
| values.push_back(tobytes(value)) |
| result.reset(new CKeyValueMetadata(move(keys), move(values))) |
| self.init(result) |
| |
| cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped): |
| self.wrapped = wrapped |
| self.metadata = wrapped.get() |
| |
| @staticmethod |
| cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp): |
| cdef KeyValueMetadata self = KeyValueMetadata.__new__(KeyValueMetadata) |
| self.init(sp) |
| return self |
| |
| cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil: |
| return self.wrapped |
| |
| def equals(self, KeyValueMetadata other): |
| return self.metadata.Equals(deref(other.wrapped)) |
| |
| def __repr__(self): |
| return str(self) |
| |
| def __str__(self): |
| return frombytes(self.metadata.ToString()) |
| |
| def __eq__(self, other): |
| try: |
| return self.equals(other) |
| except TypeError: |
| pass |
| |
| if isinstance(other, Mapping): |
| try: |
| other = KeyValueMetadata(other) |
| return self.equals(other) |
| except TypeError: |
| pass |
| |
| return NotImplemented |
| |
| def __len__(self): |
| return self.metadata.size() |
| |
| def __contains__(self, key): |
| return self.metadata.Contains(tobytes(key)) |
| |
| def __getitem__(self, key): |
| return GetResultValue(self.metadata.Get(tobytes(key))) |
| |
| def __iter__(self): |
| return self.keys() |
| |
| def __reduce__(self): |
| return KeyValueMetadata, (list(self.items()),) |
| |
| def key(self, i): |
| return self.metadata.key(i) |
| |
| def value(self, i): |
| return self.metadata.value(i) |
| |
| def keys(self): |
| for i in range(self.metadata.size()): |
| yield self.metadata.key(i) |
| |
| def values(self): |
| for i in range(self.metadata.size()): |
| yield self.metadata.value(i) |
| |
| def items(self): |
| for i in range(self.metadata.size()): |
| yield (self.metadata.key(i), self.metadata.value(i)) |
| |
| def get_all(self, key): |
| key = tobytes(key) |
| return [v for k, v in self.items() if k == key] |
| |
| def to_dict(self): |
| """ |
| Convert KeyValueMetadata to dict. If a key occurs twice, the value for |
| the first one is returned |
| """ |
| cdef object key # to force coercion to Python |
| result = ordered_dict() |
| for i in range(self.metadata.size()): |
| key = self.metadata.key(i) |
| if key not in result: |
| result[key] = self.metadata.value(i) |
| return result |
| |
| |
| cdef KeyValueMetadata ensure_metadata(object meta, c_bool allow_none=False): |
| if allow_none and meta is None: |
| return None |
| elif isinstance(meta, KeyValueMetadata): |
| return meta |
| else: |
| return KeyValueMetadata(meta) |
| |
| |
| cdef class Field: |
| """ |
| A named field, with a data type, nullability, and optional metadata. |
| |
| Notes |
| ----- |
| Do not use this class's constructor directly; use pyarrow.field |
| """ |
| |
| def __cinit__(self): |
| pass |
| |
| def __init__(self): |
| raise TypeError("Do not call Field's constructor directly, use " |
| "`pyarrow.field` instead.") |
| |
| cdef void init(self, const shared_ptr[CField]& field): |
| self.sp_field = field |
| self.field = field.get() |
| self.type = pyarrow_wrap_data_type(field.get().type()) |
| |
| def equals(self, Field other, bint check_metadata=False): |
| """ |
| Test if this field is equal to the other |
| |
| Parameters |
| ---------- |
| other : pyarrow.Field |
| check_metadata : bool, default False |
| Whether Field metadata equality should be checked as well. |
| |
| Returns |
| ------- |
| is_equal : bool |
| """ |
| return self.field.Equals(deref(other.field), check_metadata) |
| |
| def __eq__(self, other): |
| try: |
| return self.equals(other) |
| except TypeError: |
| return NotImplemented |
| |
| def __reduce__(self): |
| return field, (self.name, self.type, self.nullable, self.metadata) |
| |
| def __str__(self): |
| return 'pyarrow.Field<{0}>'.format(frombytes(self.field.ToString())) |
| |
| def __repr__(self): |
| return self.__str__() |
| |
| def __hash__(self): |
| return hash((self.field.name(), self.type, self.field.nullable())) |
| |
| @property |
| def nullable(self): |
| return self.field.nullable() |
| |
| @property |
| def name(self): |
| return frombytes(self.field.name()) |
| |
| @property |
| def metadata(self): |
| wrapped = pyarrow_wrap_metadata(self.field.metadata()) |
| if wrapped is not None: |
| return wrapped.to_dict() |
| else: |
| return wrapped |
| |
| def add_metadata(self, metadata): |
| warnings.warn("The 'add_metadata' method is deprecated, use " |
| "'with_metadata' instead", FutureWarning, stacklevel=2) |
| return self.with_metadata(metadata) |
| |
| def with_metadata(self, metadata): |
| """ |
| Add metadata as dict of string keys and values to Field |
| |
| Parameters |
| ---------- |
| metadata : dict |
| Keys and values must be string-like / coercible to bytes |
| |
| Returns |
| ------- |
| field : pyarrow.Field |
| """ |
| cdef shared_ptr[CField] c_field |
| |
| meta = ensure_metadata(metadata, allow_none=False) |
| with nogil: |
| c_field = self.field.WithMetadata(meta.unwrap()) |
| |
| return pyarrow_wrap_field(c_field) |
| |
| def remove_metadata(self): |
| """ |
| Create new field without metadata, if any |
| |
| Returns |
| ------- |
| field : pyarrow.Field |
| """ |
| cdef shared_ptr[CField] new_field |
| with nogil: |
| new_field = self.field.RemoveMetadata() |
| return pyarrow_wrap_field(new_field) |
| |
| def with_type(self, DataType new_type): |
| """ |
| A copy of this field with the replaced type |
| |
| Parameters |
| ---------- |
| new_type : pyarrow.DataType |
| |
| Returns |
| ------- |
| field : pyarrow.Field |
| """ |
| cdef: |
| shared_ptr[CField] c_field |
| shared_ptr[CDataType] c_datatype |
| |
| c_datatype = pyarrow_unwrap_data_type(new_type) |
| with nogil: |
| c_field = self.field.WithType(c_datatype) |
| |
| return pyarrow_wrap_field(c_field) |
| |
| def with_name(self, name): |
| """ |
| A copy of this field with the replaced name |
| |
| Parameters |
| ---------- |
| name : str |
| |
| Returns |
| ------- |
| field : pyarrow.Field |
| """ |
| cdef: |
| shared_ptr[CField] c_field |
| |
| c_field = self.field.WithName(tobytes(name)) |
| |
| return pyarrow_wrap_field(c_field) |
| |
| def with_nullable(self, nullable): |
| """ |
| A copy of this field with the replaced nullability |
| |
| Parameters |
| ---------- |
| nullable : bool |
| |
| Returns |
| ------- |
| field: pyarrow.Field |
| """ |
| cdef: |
| shared_ptr[CField] field |
| c_bool c_nullable |
| |
| c_nullable = bool(nullable) |
| with nogil: |
| c_field = self.field.WithNullable(c_nullable) |
| |
| return pyarrow_wrap_field(c_field) |
| |
| def flatten(self): |
| """ |
| Flatten this field. If a struct field, individual child fields |
| will be returned with their names prefixed by the parent's name. |
| |
| Returns |
| ------- |
| fields : List[pyarrow.Field] |
| """ |
| cdef vector[shared_ptr[CField]] flattened |
| with nogil: |
| flattened = self.field.Flatten() |
| return [pyarrow_wrap_field(f) for f in flattened] |
| |
| |
| cdef class Schema: |
| |
| def __cinit__(self): |
| pass |
| |
| def __init__(self): |
| raise TypeError("Do not call Schema's constructor directly, use " |
| "`pyarrow.schema` instead.") |
| |
| def __len__(self): |
| return self.schema.num_fields() |
| |
| def __getitem__(self, key): |
| # access by integer index |
| return self._field(key) |
| |
| def __iter__(self): |
| for i in range(len(self)): |
| yield self[i] |
| |
| cdef void init(self, const vector[shared_ptr[CField]]& fields): |
| self.schema = new CSchema(fields) |
| self.sp_schema.reset(self.schema) |
| |
| cdef void init_schema(self, const shared_ptr[CSchema]& schema): |
| self.schema = schema.get() |
| self.sp_schema = schema |
| |
| def __reduce__(self): |
| return schema, (list(self), self.metadata) |
| |
| def __hash__(self): |
| return hash((tuple(self), self.metadata)) |
| |
| def __sizeof__(self): |
| size = 0 |
| if self.metadata: |
| for key, value in self.metadata.items(): |
| size += sys.getsizeof(key) |
| size += sys.getsizeof(value) |
| |
| return size + super(Schema, self).__sizeof__() |
| |
| @property |
| def pandas_metadata(self): |
| """ |
| Return deserialized-from-JSON pandas metadata field (if it exists) |
| """ |
| metadata = self.metadata |
| key = b'pandas' |
| if metadata is None or key not in metadata: |
| return None |
| |
| import json |
| return json.loads(metadata[key].decode('utf8')) |
| |
| @property |
| def names(self): |
| """ |
| The schema's field names. |
| |
| Returns |
| ------- |
| list of str |
| """ |
| cdef int i |
| result = [] |
| for i in range(self.schema.num_fields()): |
| name = frombytes(self.schema.field(i).get().name()) |
| result.append(name) |
| return result |
| |
| @property |
| def types(self): |
| """ |
| The schema's field types. |
| |
| Returns |
| ------- |
| list of DataType |
| """ |
| return [field.type for field in self] |
| |
| @property |
| def metadata(self): |
| wrapped = pyarrow_wrap_metadata(self.schema.metadata()) |
| if wrapped is not None: |
| return wrapped.to_dict() |
| else: |
| return wrapped |
| |
| def __eq__(self, other): |
| try: |
| return self.equals(other) |
| except TypeError: |
| return NotImplemented |
| |
| def empty_table(self): |
| """ |
| Provide an empty table according to the schema. |
| |
| Returns |
| ------- |
| table: pyarrow.Table |
| """ |
| arrays = [] |
| names = [] |
| for field in self: |
| arrays.append(_empty_array(field.type)) |
| names.append(field.name) |
| return Table.from_arrays( |
| arrays=arrays, |
| names=names, |
| metadata=self.metadata |
| ) |
| |
| def equals(self, Schema other not None, bint check_metadata=False): |
| """ |
| Test if this schema is equal to the other |
| |
| Parameters |
| ---------- |
| other : pyarrow.Schema |
| check_metadata : bool, default False |
| Key/value metadata must be equal too |
| |
| Returns |
| ------- |
| is_equal : bool |
| """ |
| return self.sp_schema.get().Equals(deref(other.schema), |
| check_metadata) |
| |
| @classmethod |
| def from_pandas(cls, df, preserve_index=None): |
| """ |
| Returns implied schema from dataframe |
| |
| Parameters |
| ---------- |
| df : pandas.DataFrame |
| preserve_index : bool, default True |
| Whether to store the index as an additional column (or columns, for |
| MultiIndex) in the resulting `Table`. |
| The default of None will store the index as a column, except for |
| RangeIndex which is stored as metadata only. Use |
| ``preserve_index=True`` to force it to be stored as a column. |
| |
| Returns |
| ------- |
| pyarrow.Schema |
| |
| Examples |
| -------- |
| |
| >>> import pandas as pd |
| >>> import pyarrow as pa |
| >>> df = pd.DataFrame({ |
| ... 'int': [1, 2], |
| ... 'str': ['a', 'b'] |
| ... }) |
| >>> pa.Schema.from_pandas(df) |
| int: int64 |
| str: string |
| __index_level_0__: int64 |
| """ |
| from pyarrow.pandas_compat import dataframe_to_types |
| names, types, metadata = dataframe_to_types( |
| df, |
| preserve_index=preserve_index |
| ) |
| fields = [] |
| for name, type_ in zip(names, types): |
| fields.append(field(name, type_)) |
| return schema(fields, metadata) |
| |
| def field(self, i): |
| """ |
| Select a field by its column name or numeric index. |
| |
| Parameters |
| ---------- |
| i : int or string |
| |
| Returns |
| ------- |
| pyarrow.Field |
| """ |
| if isinstance(i, (bytes, str)): |
| field_index = self.get_field_index(i) |
| if field_index < 0: |
| raise KeyError("Column {} does not exist in schema".format(i)) |
| else: |
| return self._field(field_index) |
| elif isinstance(i, int): |
| return self._field(i) |
| else: |
| raise TypeError("Index must either be string or integer") |
| |
| def _field(self, int i): |
| """Select a field by its numeric index.""" |
| cdef int index = <int> _normalize_index(i, self.schema.num_fields()) |
| return pyarrow_wrap_field(self.schema.field(index)) |
| |
| def field_by_name(self, name): |
| """ |
| Access a field by its name rather than the column index. |
| |
| Parameters |
| ---------- |
| name: str |
| |
| Returns |
| ------- |
| field: pyarrow.Field |
| """ |
| cdef: |
| vector[shared_ptr[CField]] results |
| |
| warnings.warn( |
| "The 'field_by_name' method is deprecated, use 'field' instead", |
| FutureWarning, stacklevel=2) |
| |
| results = self.schema.GetAllFieldsByName(tobytes(name)) |
| if results.size() == 0: |
| return None |
| elif results.size() > 1: |
| warnings.warn("Schema field name corresponds to more " |
| "than one field", UserWarning) |
| return None |
| else: |
| return pyarrow_wrap_field(results[0]) |
| |
| def get_field_index(self, name): |
| """ |
| Return index of field with given unique name. Returns -1 if not found |
| or if duplicated |
| """ |
| return self.schema.GetFieldIndex(tobytes(name)) |
| |
| def get_all_field_indices(self, name): |
| """ |
| Return sorted list of indices for fields with the given name |
| """ |
| return self.schema.GetAllFieldIndices(tobytes(name)) |
| |
| def append(self, Field field): |
| """ |
| Append a field at the end of the schema. |
| |
| In contrast to Python's ``list.append()`` it does return a new |
| object, leaving the original Schema unmodified. |
| |
| Parameters |
| ---------- |
| field: Field |
| |
| Returns |
| ------- |
| schema: Schema |
| New object with appended field. |
| """ |
| return self.insert(self.schema.num_fields(), field) |
| |
| def insert(self, int i, Field field): |
| """ |
| Add a field at position i to the schema. |
| |
| Parameters |
| ---------- |
| i: int |
| field: Field |
| |
| Returns |
| ------- |
| schema: Schema |
| """ |
| cdef: |
| shared_ptr[CSchema] new_schema |
| shared_ptr[CField] c_field |
| |
| c_field = field.sp_field |
| |
| with nogil: |
| new_schema = GetResultValue(self.schema.AddField(i, c_field)) |
| |
| return pyarrow_wrap_schema(new_schema) |
| |
| def remove(self, int i): |
| """ |
| Remove the field at index i from the schema. |
| |
| Parameters |
| ---------- |
| i: int |
| |
| Returns |
| ------- |
| schema: Schema |
| """ |
| cdef shared_ptr[CSchema] new_schema |
| |
| with nogil: |
| new_schema = GetResultValue(self.schema.RemoveField(i)) |
| |
| return pyarrow_wrap_schema(new_schema) |
| |
| def set(self, int i, Field field): |
| """ |
| Replace a field at position i in the schema. |
| |
| Parameters |
| ---------- |
| i: int |
| field: Field |
| |
| Returns |
| ------- |
| schema: Schema |
| """ |
| cdef: |
| shared_ptr[CSchema] new_schema |
| shared_ptr[CField] c_field |
| |
| c_field = field.sp_field |
| |
| with nogil: |
| new_schema = GetResultValue(self.schema.SetField(i, c_field)) |
| |
| return pyarrow_wrap_schema(new_schema) |
| |
| def add_metadata(self, metadata): |
| warnings.warn("The 'add_metadata' method is deprecated, use " |
| "'with_metadata' instead", FutureWarning, stacklevel=2) |
| return self.with_metadata(metadata) |
| |
| def with_metadata(self, metadata): |
| """ |
| Add metadata as dict of string keys and values to Schema |
| |
| Parameters |
| ---------- |
| metadata : dict |
| Keys and values must be string-like / coercible to bytes |
| |
| Returns |
| ------- |
| schema : pyarrow.Schema |
| """ |
| cdef shared_ptr[CSchema] c_schema |
| |
| meta = ensure_metadata(metadata, allow_none=False) |
| with nogil: |
| c_schema = self.schema.WithMetadata(meta.unwrap()) |
| |
| return pyarrow_wrap_schema(c_schema) |
| |
| def serialize(self, DictionaryMemo dictionary_memo=None, memory_pool=None): |
| """ |
| Write Schema to Buffer as encapsulated IPC message |
| |
| Parameters |
| ---------- |
| memory_pool : MemoryPool, default None |
| Uses default memory pool if not specified |
| dictionary_memo : DictionaryMemo, optional |
| If schema contains dictionaries, must pass a |
| DictionaryMemo to be able to deserialize RecordBatch |
| objects |
| |
| Returns |
| ------- |
| serialized : Buffer |
| """ |
| cdef: |
| shared_ptr[CBuffer] buffer |
| CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool) |
| CDictionaryMemo temp_memo |
| CDictionaryMemo* arg_dict_memo |
| |
| if dictionary_memo is not None: |
| arg_dict_memo = dictionary_memo.memo |
| else: |
| arg_dict_memo = &temp_memo |
| |
| with nogil: |
| buffer = GetResultValue(SerializeSchema(deref(self.schema), |
| arg_dict_memo, pool)) |
| return pyarrow_wrap_buffer(buffer) |
| |
| def remove_metadata(self): |
| """ |
| Create new schema without metadata, if any |
| |
| Returns |
| ------- |
| schema : pyarrow.Schema |
| """ |
| cdef shared_ptr[CSchema] new_schema |
| with nogil: |
| new_schema = self.schema.RemoveMetadata() |
| return pyarrow_wrap_schema(new_schema) |
| |
| def to_string(self, truncate_metadata=True, show_field_metadata=True, |
| show_schema_metadata=True): |
| """ |
| Return human-readable representation of Schema |
| |
| Parameters |
| ---------- |
| truncate_metadata : boolean, default True |
| Limit metadata key/value display to a single line of ~80 characters |
| or less |
| show_field_metadata : boolean, default True |
| Display Field-level KeyValueMetadata |
| show_schema_metadata : boolean, default True |
| Display Schema-level KeyValueMetadata |
| |
| Returns |
| ------- |
| str : the formatted output |
| """ |
| cdef: |
| c_string result |
| PrettyPrintOptions options = PrettyPrintOptions.Defaults() |
| |
| options.indent = 0 |
| options.truncate_metadata = truncate_metadata |
| options.show_field_metadata = show_field_metadata |
| options.show_schema_metadata = show_schema_metadata |
| |
| with nogil: |
| check_status( |
| PrettyPrint( |
| deref(self.schema), |
| options, |
| &result |
| ) |
| ) |
| |
| return frombytes(result) |
| |
| def _export_to_c(self, uintptr_t out_ptr): |
| """ |
| Export to a C ArrowSchema struct, given its pointer. |
| |
| Be careful: if you don't pass the ArrowSchema struct to a consumer, |
| its memory will leak. This is a low-level function intended for |
| expert users. |
| """ |
| check_status(ExportSchema(deref(self.schema), <ArrowSchema*> out_ptr)) |
| |
| @staticmethod |
| def _import_from_c(uintptr_t in_ptr): |
| """ |
| Import Schema from a C ArrowSchema struct, given its pointer. |
| |
| This is a low-level function intended for expert users. |
| """ |
| with nogil: |
| result = GetResultValue(ImportSchema(<ArrowSchema*> in_ptr)) |
| return pyarrow_wrap_schema(result) |
| |
| def __str__(self): |
| return self.to_string() |
| |
| def __repr__(self): |
| return self.__str__() |
| |
| |
| def unify_schemas(list schemas): |
| """ |
| Unify schemas by merging fields by name. |
| |
| The resulting schema will contain the union of fields from all schemas. |
| Fields with the same name will be merged. Note that two fields with |
| different types will fail merging. |
| |
| - The unified field will inherit the metadata from the schema where |
| that field is first defined. |
| - The first N fields in the schema will be ordered the same as the |
| N fields in the first schema. |
| |
| The resulting schema will inherit its metadata from the first input |
| schema. |
| |
| Parameters |
| ---------- |
| schemas : list of Schema |
| Schemas to merge into a single one. |
| |
| Returns |
| ------- |
| Schema |
| |
| Raises |
| ------ |
| ArrowInvalid : |
| If any input schema contains fields with duplicate names. |
| If Fields of the same name are not mergeable. |
| """ |
| cdef: |
| Schema schema |
| vector[shared_ptr[CSchema]] c_schemas |
| for schema in schemas: |
| c_schemas.push_back(pyarrow_unwrap_schema(schema)) |
| return pyarrow_wrap_schema(GetResultValue(UnifySchemas(c_schemas))) |
| |
| |
| cdef dict _type_cache = {} |
| |
| |
| cdef DataType primitive_type(Type type): |
| if type in _type_cache: |
| return _type_cache[type] |
| |
| cdef DataType out = DataType.__new__(DataType) |
| out.init(GetPrimitiveType(type)) |
| |
| _type_cache[type] = out |
| return out |
| |
| |
| # ----------------------------------------------------------- |
| # Type factory functions |
| |
| |
| def field(name, type, bint nullable=True, metadata=None): |
| """ |
| Create a pyarrow.Field instance. |
| |
| Parameters |
| ---------- |
| name : str or bytes |
| Name of the field. |
| type : pyarrow.DataType |
| Arrow datatype of the field. |
| nullable : bool, default True |
| Whether the field's values are nullable. |
| metadata : dict, default None |
| Optional field metadata, the keys and values must be coercible to |
| bytes. |
| |
| Returns |
| ------- |
| field : pyarrow.Field |
| """ |
| cdef: |
| Field result = Field.__new__(Field) |
| DataType _type = ensure_type(type, allow_none=False) |
| shared_ptr[const CKeyValueMetadata] c_meta |
| |
| metadata = ensure_metadata(metadata, allow_none=True) |
| c_meta = pyarrow_unwrap_metadata(metadata) |
| |
| if _type.type.id() == _Type_NA and not nullable: |
| raise ValueError("A null type field may not be non-nullable") |
| |
| result.sp_field.reset( |
| new CField(tobytes(name), _type.sp_type, nullable, c_meta) |
| ) |
| result.field = result.sp_field.get() |
| result.type = _type |
| |
| return result |
| |
| |
| cdef set PRIMITIVE_TYPES = set([ |
| _Type_NA, _Type_BOOL, |
| _Type_UINT8, _Type_INT8, |
| _Type_UINT16, _Type_INT16, |
| _Type_UINT32, _Type_INT32, |
| _Type_UINT64, _Type_INT64, |
| _Type_TIMESTAMP, _Type_DATE32, |
| _Type_TIME32, _Type_TIME64, |
| _Type_DATE64, |
| _Type_HALF_FLOAT, |
| _Type_FLOAT, |
| _Type_DOUBLE]) |
| |
| |
| def null(): |
| """ |
| Create instance of null type. |
| """ |
| return primitive_type(_Type_NA) |
| |
| |
| def bool_(): |
| """ |
| Create instance of boolean type. |
| """ |
| return primitive_type(_Type_BOOL) |
| |
| |
| def uint8(): |
| """ |
| Create instance of unsigned int8 type. |
| """ |
| return primitive_type(_Type_UINT8) |
| |
| |
| def int8(): |
| """ |
| Create instance of signed int8 type. |
| """ |
| return primitive_type(_Type_INT8) |
| |
| |
| def uint16(): |
| """ |
| Create instance of unsigned uint16 type. |
| """ |
| return primitive_type(_Type_UINT16) |
| |
| |
| def int16(): |
| """ |
| Create instance of signed int16 type. |
| """ |
| return primitive_type(_Type_INT16) |
| |
| |
| def uint32(): |
| """ |
| Create instance of unsigned uint32 type. |
| """ |
| return primitive_type(_Type_UINT32) |
| |
| |
| def int32(): |
| """ |
| Create instance of signed int32 type. |
| """ |
| return primitive_type(_Type_INT32) |
| |
| |
| def uint64(): |
| """ |
| Create instance of unsigned uint64 type. |
| """ |
| return primitive_type(_Type_UINT64) |
| |
| |
| def int64(): |
| """ |
| Create instance of signed int64 type. |
| """ |
| return primitive_type(_Type_INT64) |
| |
| |
| cdef dict _timestamp_type_cache = {} |
| cdef dict _time_type_cache = {} |
| cdef dict _duration_type_cache = {} |
| |
| |
| cdef timeunit_to_string(TimeUnit unit): |
| if unit == TimeUnit_SECOND: |
| return 's' |
| elif unit == TimeUnit_MILLI: |
| return 'ms' |
| elif unit == TimeUnit_MICRO: |
| return 'us' |
| elif unit == TimeUnit_NANO: |
| return 'ns' |
| |
| |
| _FIXED_OFFSET_RE = re.compile(r'([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$') |
| |
| |
| def tzinfo_to_string(tz): |
| """ |
| Converts a time zone object into a string indicating the name of a time |
| zone, one of: |
| * As used in the Olson time zone database (the "tz database" or |
| "tzdata"), such as "America/New_York" |
| * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 |
| |
| Parameters |
| ---------- |
| tz : datetime.tzinfo |
| Time zone object |
| |
| Returns |
| ------- |
| name : str |
| Time zone name |
| """ |
| import pytz |
| import datetime |
| |
| def fixed_offset_to_string(offset): |
| seconds = int(offset.utcoffset(None).total_seconds()) |
| sign = '+' if seconds >= 0 else '-' |
| minutes, seconds = divmod(abs(seconds), 60) |
| hours, minutes = divmod(minutes, 60) |
| if seconds > 0: |
| raise ValueError('Offset must represent whole number of minutes') |
| return '{}{:02d}:{:02d}'.format(sign, hours, minutes) |
| |
| if tz is pytz.utc: |
| return tz.zone # ARROW-4055 |
| elif isinstance(tz, pytz.tzinfo.BaseTzInfo): |
| return tz.zone |
| elif isinstance(tz, pytz._FixedOffset): |
| return fixed_offset_to_string(tz) |
| elif isinstance(tz, datetime.tzinfo): |
| if isinstance(tz, datetime.timezone): |
| return fixed_offset_to_string(tz) |
| else: |
| raise ValueError('Unable to convert timezone `{}` to string' |
| .format(tz)) |
| else: |
| raise TypeError('Must be an instance of `datetime.tzinfo`') |
| |
| |
| def string_to_tzinfo(name): |
| """ |
| Convert a time zone name into a time zone object. |
| |
| Supported input strings are: |
| * As used in the Olson time zone database (the "tz database" or |
| "tzdata"), such as "America/New_York" |
| * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30 |
| |
| Parameters |
| ---------- |
| name: str |
| Time zone name. |
| |
| Returns |
| ------- |
| tz : datetime.tzinfo |
| Time zone object |
| """ |
| import pytz |
| m = _FIXED_OFFSET_RE.match(name) |
| if m: |
| sign = 1 if m.group(1) == '+' else -1 |
| hours, minutes = map(int, m.group(2, 3)) |
| return pytz.FixedOffset(sign * (hours * 60 + minutes)) |
| else: |
| return pytz.timezone(name) |
| |
| |
| def timestamp(unit, tz=None): |
| """ |
| Create instance of timestamp type with resolution and optional time zone. |
| |
| Parameters |
| ---------- |
| unit : str |
| one of 's' [second], 'ms' [millisecond], 'us' [microsecond], or 'ns' |
| [nanosecond] |
| tz : str, default None |
| Time zone name. None indicates time zone naive |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> pa.timestamp('us') |
| TimestampType(timestamp[us]) |
| >>> pa.timestamp('s', tz='America/New_York') |
| TimestampType(timestamp[s, tz=America/New_York]) |
| >>> pa.timestamp('s', tz='+07:30') |
| TimestampType(timestamp[s, tz=+07:30]) |
| |
| Returns |
| ------- |
| timestamp_type : TimestampType |
| """ |
| cdef: |
| TimeUnit unit_code |
| c_string c_timezone |
| |
| if unit == "s": |
| unit_code = TimeUnit_SECOND |
| elif unit == 'ms': |
| unit_code = TimeUnit_MILLI |
| elif unit == 'us': |
| unit_code = TimeUnit_MICRO |
| elif unit == 'ns': |
| unit_code = TimeUnit_NANO |
| else: |
| raise ValueError('Invalid TimeUnit string') |
| |
| cdef TimestampType out = TimestampType.__new__(TimestampType) |
| |
| if tz is None: |
| out.init(ctimestamp(unit_code)) |
| if unit_code in _timestamp_type_cache: |
| return _timestamp_type_cache[unit_code] |
| _timestamp_type_cache[unit_code] = out |
| else: |
| if not isinstance(tz, (bytes, str)): |
| tz = tzinfo_to_string(tz) |
| |
| c_timezone = tobytes(tz) |
| out.init(ctimestamp(unit_code, c_timezone)) |
| |
| return out |
| |
| |
| def time32(unit): |
| """ |
| Create instance of 32-bit time (time of day) type with unit resolution. |
| |
| Parameters |
| ---------- |
| unit : str |
| one of 's' [second], or 'ms' [millisecond] |
| |
| Returns |
| ------- |
| type : pyarrow.Time32Type |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> pa.time32('s') |
| Time32Type(time32[s]) |
| >>> pa.time32('ms') |
| Time32Type(time32[ms]) |
| """ |
| cdef: |
| TimeUnit unit_code |
| c_string c_timezone |
| |
| if unit == 's': |
| unit_code = TimeUnit_SECOND |
| elif unit == 'ms': |
| unit_code = TimeUnit_MILLI |
| else: |
| raise ValueError('Invalid TimeUnit for time32: {}'.format(unit)) |
| |
| if unit_code in _time_type_cache: |
| return _time_type_cache[unit_code] |
| |
| cdef Time32Type out = Time32Type.__new__(Time32Type) |
| |
| out.init(ctime32(unit_code)) |
| _time_type_cache[unit_code] = out |
| |
| return out |
| |
| |
| def time64(unit): |
| """ |
| Create instance of 64-bit time (time of day) type with unit resolution. |
| |
| Parameters |
| ---------- |
| unit : str |
| One of 'us' [microsecond], or 'ns' [nanosecond]. |
| |
| Returns |
| ------- |
| type : pyarrow.Time64Type |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> pa.time64('us') |
| Time64Type(time64[us]) |
| >>> pa.time64('ns') |
| Time64Type(time64[ns]) |
| """ |
| cdef: |
| TimeUnit unit_code |
| c_string c_timezone |
| |
| if unit == 'us': |
| unit_code = TimeUnit_MICRO |
| elif unit == 'ns': |
| unit_code = TimeUnit_NANO |
| else: |
| raise ValueError('Invalid TimeUnit for time64: {}'.format(unit)) |
| |
| if unit_code in _time_type_cache: |
| return _time_type_cache[unit_code] |
| |
| cdef Time64Type out = Time64Type.__new__(Time64Type) |
| |
| out.init(ctime64(unit_code)) |
| _time_type_cache[unit_code] = out |
| |
| return out |
| |
| |
| def duration(unit): |
| """ |
| Create instance of a duration type with unit resolution. |
| |
| Parameters |
| ---------- |
| unit : str |
| One of 's' [second], 'ms' [millisecond], 'us' [microsecond], or |
| 'ns' [nanosecond]. |
| |
| Returns |
| ------- |
| type : pyarrow.DurationType |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> pa.duration('us') |
| DurationType(duration[us]) |
| >>> pa.duration('s') |
| DurationType(duration[s]) |
| """ |
| cdef: |
| TimeUnit unit_code |
| |
| if unit == "s": |
| unit_code = TimeUnit_SECOND |
| elif unit == 'ms': |
| unit_code = TimeUnit_MILLI |
| elif unit == 'us': |
| unit_code = TimeUnit_MICRO |
| elif unit == 'ns': |
| unit_code = TimeUnit_NANO |
| else: |
| raise ValueError('Invalid TimeUnit string') |
| |
| if unit_code in _duration_type_cache: |
| return _duration_type_cache[unit_code] |
| |
| cdef DurationType out = DurationType.__new__(DurationType) |
| |
| out.init(cduration(unit_code)) |
| _duration_type_cache[unit_code] = out |
| |
| return out |
| |
| |
| def date32(): |
| """ |
| Create instance of 32-bit date (days since UNIX epoch 1970-01-01). |
| """ |
| return primitive_type(_Type_DATE32) |
| |
| |
| def date64(): |
| """ |
| Create instance of 64-bit date (milliseconds since UNIX epoch 1970-01-01). |
| """ |
| return primitive_type(_Type_DATE64) |
| |
| |
| def float16(): |
| """ |
| Create half-precision floating point type. |
| """ |
| return primitive_type(_Type_HALF_FLOAT) |
| |
| |
| def float32(): |
| """ |
| Create single-precision floating point type. |
| """ |
| return primitive_type(_Type_FLOAT) |
| |
| |
| def float64(): |
| """ |
| Create double-precision floating point type. |
| """ |
| return primitive_type(_Type_DOUBLE) |
| |
| |
| cpdef DataType decimal128(int precision, int scale=0): |
| """ |
| Create decimal type with precision and scale and 128bit width. |
| |
| Parameters |
| ---------- |
| precision : int |
| scale : int |
| |
| Returns |
| ------- |
| decimal_type : Decimal128Type |
| """ |
| cdef shared_ptr[CDataType] decimal_type |
| if precision < 1 or precision > 38: |
| raise ValueError("precision should be between 1 and 38") |
| decimal_type.reset(new CDecimal128Type(precision, scale)) |
| return pyarrow_wrap_data_type(decimal_type) |
| |
| |
| def string(): |
| """ |
| Create UTF8 variable-length string type. |
| """ |
| return primitive_type(_Type_STRING) |
| |
| |
| def utf8(): |
| """ |
| Alias for string(). |
| """ |
| return string() |
| |
| |
| def binary(int length=-1): |
| """ |
| Create variable-length binary type. |
| |
| Parameters |
| ---------- |
| length : int, optional, default -1 |
| If length == -1 then return a variable length binary type. If length is |
| greater than or equal to 0 then return a fixed size binary type of |
| width `length`. |
| """ |
| if length == -1: |
| return primitive_type(_Type_BINARY) |
| |
| cdef shared_ptr[CDataType] fixed_size_binary_type |
| fixed_size_binary_type.reset(new CFixedSizeBinaryType(length)) |
| return pyarrow_wrap_data_type(fixed_size_binary_type) |
| |
| |
| def large_binary(): |
| """ |
| Create large variable-length binary type. |
| |
| This data type may not be supported by all Arrow implementations. Unless |
| you need to represent data larger than 2GB, you should prefer binary(). |
| """ |
| return primitive_type(_Type_LARGE_BINARY) |
| |
| |
| def large_string(): |
| """ |
| Create large UTF8 variable-length string type. |
| |
| This data type may not be supported by all Arrow implementations. Unless |
| you need to represent data larger than 2GB, you should prefer string(). |
| """ |
| return primitive_type(_Type_LARGE_STRING) |
| |
| |
| def large_utf8(): |
| """ |
| Alias for large_string(). |
| """ |
| return large_string() |
| |
| |
| def list_(value_type, int list_size=-1): |
| """ |
| Create ListType instance from child data type or field. |
| |
| Parameters |
| ---------- |
| value_type : DataType or Field |
| list_size : int, optional, default -1 |
| If length == -1 then return a variable length list type. If length is |
| greater than or equal to 0 then return a fixed size list type. |
| |
| Returns |
| ------- |
| list_type : DataType |
| """ |
| cdef: |
| Field _field |
| shared_ptr[CDataType] list_type |
| |
| if isinstance(value_type, DataType): |
| _field = field('item', value_type) |
| elif isinstance(value_type, Field): |
| _field = value_type |
| else: |
| raise TypeError('List requires DataType or Field') |
| |
| if list_size == -1: |
| list_type.reset(new CListType(_field.sp_field)) |
| else: |
| if list_size < 0: |
| raise ValueError("list_size should be a positive integer") |
| list_type.reset(new CFixedSizeListType(_field.sp_field, list_size)) |
| |
| return pyarrow_wrap_data_type(list_type) |
| |
| |
| cpdef LargeListType large_list(value_type): |
| """ |
| Create LargeListType instance from child data type or field. |
| |
| This data type may not be supported by all Arrow implementations. |
| Unless you need to represent data larger than 2**31 elements, you should |
| prefer list_(). |
| |
| Parameters |
| ---------- |
| value_type : DataType or Field |
| |
| Returns |
| ------- |
| list_type : DataType |
| """ |
| cdef: |
| DataType data_type |
| Field _field |
| shared_ptr[CDataType] list_type |
| LargeListType out = LargeListType.__new__(LargeListType) |
| |
| if isinstance(value_type, DataType): |
| _field = field('item', value_type) |
| elif isinstance(value_type, Field): |
| _field = value_type |
| else: |
| raise TypeError('List requires DataType or Field') |
| |
| list_type.reset(new CLargeListType(_field.sp_field)) |
| out.init(list_type) |
| return out |
| |
| |
| cpdef MapType map_(key_type, item_type, keys_sorted=False): |
| """ |
| Create MapType instance from key and item data types. |
| |
| Parameters |
| ---------- |
| key_type : DataType |
| item_type : DataType |
| keys_sorted : bool |
| |
| Returns |
| ------- |
| map_type : DataType |
| """ |
| cdef: |
| DataType _key_type = ensure_type(key_type, allow_none=False) |
| DataType _item_type = ensure_type(item_type, allow_none=False) |
| shared_ptr[CDataType] map_type |
| MapType out = MapType.__new__(MapType) |
| |
| map_type.reset(new CMapType(_key_type.sp_type, _item_type.sp_type, |
| keys_sorted)) |
| out.init(map_type) |
| return out |
| |
| |
| cpdef DictionaryType dictionary(index_type, value_type, bint ordered=False): |
| """ |
| Dictionary (categorical, or simply encoded) type. |
| |
| Parameters |
| ---------- |
| index_type : DataType |
| value_type : DataType |
| ordered : bool |
| |
| Returns |
| ------- |
| type : DictionaryType |
| """ |
| cdef: |
| DataType _index_type = ensure_type(index_type, allow_none=False) |
| DataType _value_type = ensure_type(value_type, allow_none=False) |
| DictionaryType out = DictionaryType.__new__(DictionaryType) |
| shared_ptr[CDataType] dict_type |
| |
| if _index_type.id not in {Type_INT8, Type_INT16, Type_INT32, Type_INT64}: |
| raise TypeError("The dictionary index type should be signed integer.") |
| |
| dict_type.reset(new CDictionaryType(_index_type.sp_type, |
| _value_type.sp_type, ordered == 1)) |
| out.init(dict_type) |
| return out |
| |
| |
| def struct(fields): |
| """ |
| Create StructType instance from fields. |
| |
| A struct is a nested type parameterized by an ordered sequence of types |
| (which can all be distinct), called its fields. |
| |
| Parameters |
| ---------- |
| fields : iterable of Fields or tuples, or mapping of strings to DataTypes |
| Each field must have a UTF8-encoded name, and these field names are |
| part of the type metadata. |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> fields = [ |
| ... ('f1', pa.int32()), |
| ... ('f2', pa.string()), |
| ... ] |
| >>> struct_type = pa.struct(fields) |
| >>> struct_type |
| StructType(struct<f1: int32, f2: string>) |
| >>> fields = [ |
| ... pa.field('f1', pa.int32()), |
| ... pa.field('f2', pa.string(), nullable=False), |
| ... ] |
| >>> pa.struct(fields) |
| StructType(struct<f1: int32, f2: string not null>) |
| |
| Returns |
| ------- |
| type : DataType |
| """ |
| cdef: |
| Field py_field |
| vector[shared_ptr[CField]] c_fields |
| cdef shared_ptr[CDataType] struct_type |
| |
| if isinstance(fields, Mapping): |
| fields = fields.items() |
| |
| for item in fields: |
| if isinstance(item, tuple): |
| py_field = field(*item) |
| else: |
| py_field = item |
| c_fields.push_back(py_field.sp_field) |
| |
| struct_type.reset(new CStructType(c_fields)) |
| return pyarrow_wrap_data_type(struct_type) |
| |
| |
| def union(children_fields, mode, type_codes=None): |
| """ |
| Create UnionType from children fields. |
| |
| A union is defined by an ordered sequence of types; each slot in the union |
| can have a value chosen from these types. |
| |
| Parameters |
| ---------- |
| fields : sequence of Field values |
| Each field must have a UTF8-encoded name, and these field names are |
| part of the type metadata. |
| mode : str |
| Either 'dense' or 'sparse'. |
| type_codes : list of integers, default None |
| |
| Returns |
| ------- |
| type : DataType |
| """ |
| cdef: |
| Field child_field |
| vector[shared_ptr[CField]] c_fields |
| vector[int8_t] c_type_codes |
| shared_ptr[CDataType] union_type |
| int i |
| |
| if isinstance(mode, int): |
| if mode not in (_UnionMode_SPARSE, _UnionMode_DENSE): |
| raise ValueError("Invalid union mode {0!r}".format(mode)) |
| else: |
| if mode == 'sparse': |
| mode = _UnionMode_SPARSE |
| elif mode == 'dense': |
| mode = _UnionMode_DENSE |
| else: |
| raise ValueError("Invalid union mode {0!r}".format(mode)) |
| |
| for child_field in children_fields: |
| c_fields.push_back(child_field.sp_field) |
| |
| if type_codes is not None: |
| if len(type_codes) != <Py_ssize_t>(c_fields.size()): |
| raise ValueError("type_codes should have the same length " |
| "as fields") |
| for code in type_codes: |
| c_type_codes.push_back(code) |
| else: |
| c_type_codes = range(c_fields.size()) |
| |
| if mode == UnionMode_SPARSE: |
| union_type = CMakeSparseUnionType(c_fields, c_type_codes) |
| else: |
| union_type = CMakeDenseUnionType(c_fields, c_type_codes) |
| |
| return pyarrow_wrap_data_type(union_type) |
| |
| |
| cdef dict _type_aliases = { |
| 'null': null, |
| 'bool': bool_, |
| 'boolean': bool_, |
| 'i1': int8, |
| 'int8': int8, |
| 'i2': int16, |
| 'int16': int16, |
| 'i4': int32, |
| 'int32': int32, |
| 'i8': int64, |
| 'int64': int64, |
| 'u1': uint8, |
| 'uint8': uint8, |
| 'u2': uint16, |
| 'uint16': uint16, |
| 'u4': uint32, |
| 'uint32': uint32, |
| 'u8': uint64, |
| 'uint64': uint64, |
| 'f2': float16, |
| 'halffloat': float16, |
| 'float16': float16, |
| 'f4': float32, |
| 'float': float32, |
| 'float32': float32, |
| 'f8': float64, |
| 'double': float64, |
| 'float64': float64, |
| 'string': string, |
| 'str': string, |
| 'utf8': string, |
| 'binary': binary, |
| 'large_string': large_string, |
| 'large_str': large_string, |
| 'large_utf8': large_string, |
| 'large_binary': large_binary, |
| 'date32': date32, |
| 'date64': date64, |
| 'date32[day]': date32, |
| 'date64[ms]': date64, |
| 'time32[s]': time32('s'), |
| 'time32[ms]': time32('ms'), |
| 'time64[us]': time64('us'), |
| 'time64[ns]': time64('ns'), |
| 'timestamp[s]': timestamp('s'), |
| 'timestamp[ms]': timestamp('ms'), |
| 'timestamp[us]': timestamp('us'), |
| 'timestamp[ns]': timestamp('ns'), |
| 'duration[s]': duration('s'), |
| 'duration[ms]': duration('ms'), |
| 'duration[us]': duration('us'), |
| 'duration[ns]': duration('ns'), |
| } |
| |
| |
| def type_for_alias(name): |
| """ |
| Return DataType given a string alias if one exists. |
| |
| Returns |
| ------- |
| type : DataType |
| """ |
| name = name.lower() |
| try: |
| alias = _type_aliases[name] |
| except KeyError: |
| raise ValueError('No type alias for {0}'.format(name)) |
| |
| if isinstance(alias, DataType): |
| return alias |
| return alias() |
| |
| |
| cpdef DataType ensure_type(object ty, bint allow_none=False): |
| if allow_none and ty is None: |
| return None |
| elif isinstance(ty, DataType): |
| return ty |
| elif isinstance(ty, str): |
| return type_for_alias(ty) |
| else: |
| raise TypeError('DataType expected, got {!r}'.format(type(ty))) |
| |
| |
| def schema(fields, metadata=None): |
| """ |
| Construct pyarrow.Schema from collection of fields. |
| |
| Parameters |
| ---------- |
| field : iterable of Fields or tuples, or mapping of strings to DataTypes |
| metadata : dict, default None |
| Keys and values must be coercible to bytes. |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| >>> pa.schema([ |
| ... ('some_int', pa.int32()), |
| ... ('some_string', pa.string()) |
| ... ]) |
| some_int: int32 |
| some_string: string |
| >>> pa.schema([ |
| ... pa.field('some_int', pa.int32()), |
| ... pa.field('some_string', pa.string()) |
| ... ]) |
| some_int: int32 |
| some_string: string |
| |
| Returns |
| ------- |
| schema : pyarrow.Schema |
| """ |
| cdef: |
| shared_ptr[const CKeyValueMetadata] c_meta |
| shared_ptr[CSchema] c_schema |
| Schema result |
| Field py_field |
| vector[shared_ptr[CField]] c_fields |
| |
| if isinstance(fields, Mapping): |
| fields = fields.items() |
| |
| for item in fields: |
| if isinstance(item, tuple): |
| py_field = field(*item) |
| else: |
| py_field = item |
| if py_field is None: |
| raise TypeError("field or tuple expected, got None") |
| c_fields.push_back(py_field.sp_field) |
| |
| metadata = ensure_metadata(metadata, allow_none=True) |
| c_meta = pyarrow_unwrap_metadata(metadata) |
| |
| c_schema.reset(new CSchema(c_fields, c_meta)) |
| result = Schema.__new__(Schema) |
| result.init_schema(c_schema) |
| |
| return result |
| |
| |
| def from_numpy_dtype(object dtype): |
| """ |
| Convert NumPy dtype to pyarrow.DataType. |
| """ |
| cdef shared_ptr[CDataType] c_type |
| dtype = np.dtype(dtype) |
| with nogil: |
| check_status(NumPyDtypeToArrow(dtype, &c_type)) |
| |
| return pyarrow_wrap_data_type(c_type) |
| |
| |
| def is_boolean_value(object obj): |
| return IsPyBool(obj) |
| |
| |
| def is_integer_value(object obj): |
| return IsPyInt(obj) |
| |
| |
| def is_float_value(object obj): |
| return IsPyFloat(obj) |
| |
| |
| cdef class _ExtensionRegistryNanny: |
| # Keep the registry alive until we have unregistered PyExtensionType |
| cdef: |
| shared_ptr[CExtensionTypeRegistry] registry |
| |
| def __cinit__(self): |
| self.registry = CExtensionTypeRegistry.GetGlobalRegistry() |
| |
| def release_registry(self): |
| self.registry.reset() |
| |
| |
| _registry_nanny = _ExtensionRegistryNanny() |
| |
| |
| def _register_py_extension_type(): |
| cdef: |
| DataType storage_type |
| shared_ptr[CExtensionType] cpy_ext_type |
| c_string c_extension_name = tobytes("arrow.py_extension_type") |
| |
| # Make a dummy C++ ExtensionType |
| storage_type = null() |
| check_status(CPyExtensionType.FromClass( |
| storage_type.sp_type, c_extension_name, PyExtensionType, |
| &cpy_ext_type)) |
| check_status( |
| RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type)) |
| |
| |
| def _unregister_py_extension_types(): |
| # This needs to be done explicitly before the Python interpreter is |
| # finalized. If the C++ type is destroyed later in the process |
| # teardown stage, it will invoke CPython APIs such as Py_DECREF |
| # with a destroyed interpreter. |
| unregister_extension_type("arrow.py_extension_type") |
| for ext_type in _python_extension_types_registry: |
| try: |
| unregister_extension_type(ext_type.extension_name) |
| except KeyError: |
| pass |
| _registry_nanny.release_registry() |
| |
| |
| _register_py_extension_type() |
| atexit.register(_unregister_py_extension_types) |