| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import collections |
| |
| |
| cdef class Scalar(_Weakrefable): |
| """ |
| The base class for scalars. |
| """ |
| |
| def __init__(self): |
| raise TypeError("Do not call {}'s constructor directly, use " |
| "pa.scalar() instead.".format(self.__class__.__name__)) |
| |
| cdef void init(self, const shared_ptr[CScalar]& wrapped): |
| self.wrapped = wrapped |
| |
| @staticmethod |
| cdef wrap(const shared_ptr[CScalar]& wrapped): |
| cdef: |
| Scalar self |
| Type type_id = wrapped.get().type.get().id() |
| shared_ptr[CDataType] sp_data_type = wrapped.get().type |
| |
| if type_id == _Type_NA: |
| return _NULL |
| |
| if type_id not in _scalar_classes: |
| raise NotImplementedError( |
| "Wrapping scalar of type " + frombytes(sp_data_type.get().ToString())) |
| |
| typ = get_scalar_class_from_type(sp_data_type) |
| self = typ.__new__(typ) |
| self.init(wrapped) |
| |
| return self |
| |
| cdef inline shared_ptr[CScalar] unwrap(self) nogil: |
| return self.wrapped |
| |
| @property |
| def type(self): |
| """ |
| Data type of the Scalar object. |
| """ |
| return pyarrow_wrap_data_type(self.wrapped.get().type) |
| |
| @property |
| def is_valid(self): |
| """ |
| Holds a valid (non-null) value. |
| """ |
| return self.wrapped.get().is_valid |
| |
| def cast(self, object target_type): |
| """ |
| Attempt a safe cast to target data type. |
| |
| Parameters |
| ---------- |
| target_type : DataType or string coercible to DataType |
| The type to cast the scalar to. |
| |
| Returns |
| ------- |
| scalar : A Scalar of the given target data type. |
| """ |
| cdef: |
| DataType type = ensure_type(target_type) |
| shared_ptr[CScalar] result |
| |
| with nogil: |
| result = GetResultValue(self.wrapped.get().CastTo(type.sp_type)) |
| |
| return Scalar.wrap(result) |
| |
| def __repr__(self): |
| return '<pyarrow.{}: {!r}>'.format( |
| self.__class__.__name__, self.as_py() |
| ) |
| |
| def __str__(self): |
| return str(self.as_py()) |
| |
| def equals(self, Scalar other not None): |
| return self.wrapped.get().Equals(other.unwrap().get()[0]) |
| |
| def __eq__(self, other): |
| try: |
| return self.equals(other) |
| except TypeError: |
| return NotImplemented |
| |
| def __hash__(self): |
| cdef CScalarHash hasher |
| return hasher(self.wrapped) |
| |
| def __reduce__(self): |
| return scalar, (self.as_py(), self.type) |
| |
| def as_py(self): |
| raise NotImplementedError() |
| |
| |
| _NULL = NA = None |
| |
| |
| cdef class NullScalar(Scalar): |
| """ |
| Concrete class for null scalars. |
| """ |
| |
| def __cinit__(self): |
| global NA |
| if NA is not None: |
| raise RuntimeError('Cannot create multiple NullScalar instances') |
| self.init(shared_ptr[CScalar](new CNullScalar())) |
| |
| def __init__(self): |
| pass |
| |
| def as_py(self): |
| """ |
| Return this value as a Python None. |
| """ |
| return None |
| |
| |
| _NULL = NA = NullScalar() |
| |
| |
| cdef class BooleanScalar(Scalar): |
| """ |
| Concrete class for boolean scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python bool. |
| """ |
| cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class UInt8Scalar(Scalar): |
| """ |
| Concrete class for uint8 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class Int8Scalar(Scalar): |
| """ |
| Concrete class for int8 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class UInt16Scalar(Scalar): |
| """ |
| Concrete class for uint16 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class Int16Scalar(Scalar): |
| """ |
| Concrete class for int16 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class UInt32Scalar(Scalar): |
| """ |
| Concrete class for uint32 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class Int32Scalar(Scalar): |
| """ |
| Concrete class for int32 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class UInt64Scalar(Scalar): |
| """ |
| Concrete class for uint64 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class Int64Scalar(Scalar): |
| """ |
| Concrete class for int64 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python int. |
| """ |
| cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class HalfFloatScalar(Scalar): |
| """ |
| Concrete class for float scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python float. |
| """ |
| cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get() |
| return PyHalf_FromHalf(sp.value) if sp.is_valid else None |
| |
| |
| cdef class FloatScalar(Scalar): |
| """ |
| Concrete class for float scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python float. |
| """ |
| cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class DoubleScalar(Scalar): |
| """ |
| Concrete class for double scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python float. |
| """ |
| cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| |
| cdef class Decimal128Scalar(Scalar): |
| """ |
| Concrete class for decimal128 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python Decimal. |
| """ |
| cdef: |
| CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get() |
| CDecimal128Type* dtype = <CDecimal128Type*> sp.type.get() |
| if sp.is_valid: |
| return _pydecimal.Decimal( |
| frombytes(sp.value.ToString(dtype.scale())) |
| ) |
| else: |
| return None |
| |
| |
| cdef class Decimal256Scalar(Scalar): |
| """ |
| Concrete class for decimal256 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python Decimal. |
| """ |
| cdef: |
| CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get() |
| CDecimal256Type* dtype = <CDecimal256Type*> sp.type.get() |
| if sp.is_valid: |
| return _pydecimal.Decimal( |
| frombytes(sp.value.ToString(dtype.scale())) |
| ) |
| else: |
| return None |
| |
| |
| cdef class Date32Scalar(Scalar): |
| """ |
| Concrete class for date32 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python datetime.datetime instance. |
| """ |
| cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get() |
| |
| if sp.is_valid: |
| # shift to seconds since epoch |
| return ( |
| datetime.date(1970, 1, 1) + datetime.timedelta(days=sp.value) |
| ) |
| else: |
| return None |
| |
| |
| cdef class Date64Scalar(Scalar): |
| """ |
| Concrete class for date64 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python datetime.datetime instance. |
| """ |
| cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get() |
| |
| if sp.is_valid: |
| return ( |
| datetime.date(1970, 1, 1) + |
| datetime.timedelta(days=sp.value / 86400000) |
| ) |
| else: |
| return None |
| |
| |
| def _datetime_from_int(int64_t value, TimeUnit unit, tzinfo=None): |
| if unit == TimeUnit_SECOND: |
| delta = datetime.timedelta(seconds=value) |
| elif unit == TimeUnit_MILLI: |
| delta = datetime.timedelta(milliseconds=value) |
| elif unit == TimeUnit_MICRO: |
| delta = datetime.timedelta(microseconds=value) |
| else: |
| # TimeUnit_NANO: prefer pandas timestamps if available |
| if _pandas_api.have_pandas: |
| return _pandas_api.pd.Timestamp(value, tz=tzinfo, unit='ns') |
| # otherwise safely truncate to microsecond resolution datetime |
| if value % 1000 != 0: |
| raise ValueError( |
| "Nanosecond resolution temporal type {} is not safely " |
| "convertible to microseconds to convert to datetime.datetime. " |
| "Install pandas to return as Timestamp with nanosecond " |
| "support or access the .value attribute.".format(value) |
| ) |
| delta = datetime.timedelta(microseconds=value // 1000) |
| |
| dt = datetime.datetime(1970, 1, 1) + delta |
| # adjust timezone if set to the datatype |
| if tzinfo is not None: |
| dt = dt.replace(tzinfo=datetime.timezone.utc).astimezone(tzinfo) |
| |
| return dt |
| |
| |
| cdef class Time32Scalar(Scalar): |
| """ |
| Concrete class for time32 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python datetime.timedelta instance. |
| """ |
| cdef: |
| CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get() |
| CTime32Type* dtype = <CTime32Type*> sp.type.get() |
| |
| if sp.is_valid: |
| return _datetime_from_int(sp.value, unit=dtype.unit()).time() |
| else: |
| return None |
| |
| |
| cdef class Time64Scalar(Scalar): |
| """ |
| Concrete class for time64 scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python datetime.timedelta instance. |
| """ |
| cdef: |
| CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get() |
| CTime64Type* dtype = <CTime64Type*> sp.type.get() |
| |
| if sp.is_valid: |
| return _datetime_from_int(sp.value, unit=dtype.unit()).time() |
| else: |
| return None |
| |
| |
| cdef class TimestampScalar(Scalar): |
| """ |
| Concrete class for timestamp scalars. |
| """ |
| |
| @property |
| def value(self): |
| cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| def as_py(self): |
| """ |
| Return this value as a Pandas Timestamp instance (if units are |
| nanoseconds and pandas is available), otherwise as a Python |
| datetime.datetime instance. |
| """ |
| cdef: |
| CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get() |
| CTimestampType* dtype = <CTimestampType*> sp.type.get() |
| |
| if not sp.is_valid: |
| return None |
| |
| if not dtype.timezone().empty(): |
| tzinfo = string_to_tzinfo(frombytes(dtype.timezone())) |
| else: |
| tzinfo = None |
| |
| return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo) |
| |
| |
| cdef class DurationScalar(Scalar): |
| """ |
| Concrete class for duration scalars. |
| """ |
| |
| @property |
| def value(self): |
| cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() |
| return sp.value if sp.is_valid else None |
| |
| def as_py(self): |
| """ |
| Return this value as a Pandas Timedelta instance (if units are |
| nanoseconds and pandas is available), otherwise as a Python |
| datetime.timedelta instance. |
| """ |
| cdef: |
| CDurationScalar* sp = <CDurationScalar*> self.wrapped.get() |
| CDurationType* dtype = <CDurationType*> sp.type.get() |
| TimeUnit unit = dtype.unit() |
| |
| if not sp.is_valid: |
| return None |
| |
| if unit == TimeUnit_SECOND: |
| return datetime.timedelta(seconds=sp.value) |
| elif unit == TimeUnit_MILLI: |
| return datetime.timedelta(milliseconds=sp.value) |
| elif unit == TimeUnit_MICRO: |
| return datetime.timedelta(microseconds=sp.value) |
| else: |
| # TimeUnit_NANO: prefer pandas timestamps if available |
| if _pandas_api.have_pandas: |
| return _pandas_api.pd.Timedelta(sp.value, unit='ns') |
| # otherwise safely truncate to microsecond resolution timedelta |
| if sp.value % 1000 != 0: |
| raise ValueError( |
| "Nanosecond duration {} is not safely convertible to " |
| "microseconds to convert to datetime.timedelta. Install " |
| "pandas to return as Timedelta with nanosecond support or " |
| "access the .value attribute.".format(sp.value) |
| ) |
| return datetime.timedelta(microseconds=sp.value // 1000) |
| |
| |
| cdef class MonthDayNanoIntervalScalar(Scalar): |
| """ |
| Concrete class for month, day, nanosecond interval scalars. |
| """ |
| |
| @property |
| def value(self): |
| """ |
| Same as self.as_py() |
| """ |
| return self.as_py() |
| |
| def as_py(self): |
| """ |
| Return this value as a pyarrow.MonthDayNano. |
| """ |
| cdef: |
| PyObject* val |
| CMonthDayNanoIntervalScalar* scalar |
| scalar = <CMonthDayNanoIntervalScalar*>self.wrapped.get() |
| val = GetResultValue(MonthDayNanoIntervalScalarToPyObject( |
| deref(scalar))) |
| return PyObject_to_object(val) |
| |
| |
| cdef class BinaryScalar(Scalar): |
| """ |
| Concrete class for binary-like scalars. |
| """ |
| |
| def as_buffer(self): |
| """ |
| Return a view over this value as a Buffer object. |
| """ |
| cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get() |
| return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None |
| |
| def as_py(self): |
| """ |
| Return this value as a Python bytes. |
| """ |
| buffer = self.as_buffer() |
| return None if buffer is None else buffer.to_pybytes() |
| |
| |
| cdef class LargeBinaryScalar(BinaryScalar): |
| pass |
| |
| |
| cdef class FixedSizeBinaryScalar(BinaryScalar): |
| pass |
| |
| |
| cdef class StringScalar(BinaryScalar): |
| """ |
| Concrete class for string-like (utf8) scalars. |
| """ |
| |
| def as_py(self): |
| """ |
| Return this value as a Python string. |
| """ |
| buffer = self.as_buffer() |
| return None if buffer is None else str(buffer, 'utf8') |
| |
| |
| cdef class LargeStringScalar(StringScalar): |
| pass |
| |
| |
| cdef class ListScalar(Scalar): |
| """ |
| Concrete class for list-like scalars. |
| """ |
| |
| @property |
| def values(self): |
| cdef CBaseListScalar* sp = <CBaseListScalar*> self.wrapped.get() |
| if sp.is_valid: |
| return pyarrow_wrap_array(sp.value) |
| else: |
| return None |
| |
| def __len__(self): |
| """ |
| Return the number of values. |
| """ |
| return len(self.values) |
| |
| def __getitem__(self, i): |
| """ |
| Return the value at the given index. |
| """ |
| return self.values[_normalize_index(i, len(self))] |
| |
| def __iter__(self): |
| """ |
| Iterate over this element's values. |
| """ |
| return iter(self.values) |
| |
| def as_py(self): |
| """ |
| Return this value as a Python list. |
| """ |
| arr = self.values |
| return None if arr is None else arr.to_pylist() |
| |
| |
| cdef class FixedSizeListScalar(ListScalar): |
| pass |
| |
| |
| cdef class LargeListScalar(ListScalar): |
| pass |
| |
| |
| cdef class StructScalar(Scalar, collections.abc.Mapping): |
| """ |
| Concrete class for struct scalars. |
| """ |
| |
| def __len__(self): |
| cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
| return sp.value.size() |
| |
| def __iter__(self): |
| cdef: |
| CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
| CStructType* dtype = <CStructType*> sp.type.get() |
| vector[shared_ptr[CField]] fields = dtype.fields() |
| |
| for i in range(dtype.num_fields()): |
| yield frombytes(fields[i].get().name()) |
| |
| def items(self): |
| return ((key, self[i]) for i, key in enumerate(self)) |
| |
| def __contains__(self, key): |
| return key in list(self) |
| |
| def __getitem__(self, key): |
| """ |
| Return the child value for the given field. |
| |
| Parameters |
| ---------- |
| index : Union[int, str] |
| Index / position or name of the field. |
| |
| Returns |
| ------- |
| result : Scalar |
| """ |
| cdef: |
| CFieldRef ref |
| CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
| |
| if isinstance(key, (bytes, str)): |
| ref = CFieldRef(<c_string> tobytes(key)) |
| elif isinstance(key, int): |
| ref = CFieldRef(<int> key) |
| else: |
| raise TypeError('Expected integer or string index') |
| |
| try: |
| return Scalar.wrap(GetResultValue(sp.field(ref))) |
| except ArrowInvalid as exc: |
| if isinstance(key, int): |
| raise IndexError(key) from exc |
| else: |
| raise KeyError(key) from exc |
| |
| def as_py(self): |
| """ |
| Return this value as a Python dict. |
| """ |
| if self.is_valid: |
| try: |
| return {k: self[k].as_py() for k in self.keys()} |
| except KeyError: |
| raise ValueError( |
| "Converting to Python dictionary is not supported when " |
| "duplicate field names are present") |
| else: |
| return None |
| |
| def _as_py_tuple(self): |
| # a version that returns a tuple instead of dict to support repr/str |
| # with the presence of duplicate field names |
| if self.is_valid: |
| return [(key, self[i].as_py()) for i, key in enumerate(self)] |
| else: |
| return None |
| |
| def __repr__(self): |
| return '<pyarrow.{}: {!r}>'.format( |
| self.__class__.__name__, self._as_py_tuple() |
| ) |
| |
| def __str__(self): |
| return str(self._as_py_tuple()) |
| |
| |
| cdef class MapScalar(ListScalar): |
| """ |
| Concrete class for map scalars. |
| """ |
| |
| def __getitem__(self, i): |
| """ |
| Return the value at the given index. |
| """ |
| arr = self.values |
| if arr is None: |
| raise IndexError(i) |
| dct = arr[_normalize_index(i, len(arr))] |
| return (dct['key'], dct['value']) |
| |
| def __iter__(self): |
| """ |
| Iterate over this element's values. |
| """ |
| arr = self.values |
| if array is None: |
| raise StopIteration |
| for k, v in zip(arr.field('key'), arr.field('value')): |
| yield (k.as_py(), v.as_py()) |
| |
| def as_py(self): |
| """ |
| Return this value as a Python list. |
| """ |
| cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get() |
| return list(self) if sp.is_valid else None |
| |
| |
| cdef class DictionaryScalar(Scalar): |
| """ |
| Concrete class for dictionary-encoded scalars. |
| """ |
| |
| @classmethod |
| def _reconstruct(cls, type, is_valid, index, dictionary): |
| cdef: |
| CDictionaryScalarIndexAndDictionary value |
| shared_ptr[CDictionaryScalar] wrapped |
| DataType type_ |
| Scalar index_ |
| Array dictionary_ |
| |
| type_ = ensure_type(type, allow_none=False) |
| if not isinstance(type_, DictionaryType): |
| raise TypeError('Must pass a DictionaryType instance') |
| |
| if isinstance(index, Scalar): |
| if not index.type.equals(type.index_type): |
| raise TypeError("The Scalar value passed as index must have " |
| "identical type to the dictionary type's " |
| "index_type") |
| index_ = index |
| else: |
| index_ = scalar(index, type=type_.index_type) |
| |
| if isinstance(dictionary, Array): |
| if not dictionary.type.equals(type.value_type): |
| raise TypeError("The Array passed as dictionary must have " |
| "identical type to the dictionary type's " |
| "value_type") |
| dictionary_ = dictionary |
| else: |
| dictionary_ = array(dictionary, type=type_.value_type) |
| |
| value.index = pyarrow_unwrap_scalar(index_) |
| value.dictionary = pyarrow_unwrap_array(dictionary_) |
| |
| wrapped = make_shared[CDictionaryScalar]( |
| value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid) |
| ) |
| return Scalar.wrap(<shared_ptr[CScalar]> wrapped) |
| |
| def __reduce__(self): |
| return DictionaryScalar._reconstruct, ( |
| self.type, self.is_valid, self.index, self.dictionary |
| ) |
| |
| @property |
| def index(self): |
| """ |
| Return this value's underlying index as a scalar. |
| """ |
| cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
| return Scalar.wrap(sp.value.index) |
| |
| @property |
| def value(self): |
| """ |
| Return the encoded value as a scalar. |
| """ |
| cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
| return Scalar.wrap(GetResultValue(sp.GetEncodedValue())) |
| |
| @property |
| def dictionary(self): |
| cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get() |
| return pyarrow_wrap_array(sp.value.dictionary) |
| |
| def as_py(self): |
| """ |
| Return this encoded value as a Python object. |
| """ |
| return self.value.as_py() if self.is_valid else None |
| |
| @property |
| def index_value(self): |
| warnings.warn("`index_value` property is deprecated as of 1.0.0" |
| "please use the `index` property instead", |
| FutureWarning) |
| return self.index |
| |
| @property |
| def dictionary_value(self): |
| warnings.warn("`dictionary_value` property is deprecated as of 1.0.0, " |
| "please use the `value` property instead", FutureWarning) |
| return self.value |
| |
| |
| cdef class UnionScalar(Scalar): |
| """ |
| Concrete class for Union scalars. |
| """ |
| |
| @property |
| def value(self): |
| """ |
| Return underlying value as a scalar. |
| """ |
| cdef CSparseUnionScalar* sp |
| cdef CDenseUnionScalar* dp |
| if self.type.id == _Type_SPARSE_UNION: |
| sp = <CSparseUnionScalar*> self.wrapped.get() |
| return Scalar.wrap(sp.value[sp.child_id]) if sp.is_valid else None |
| else: |
| dp = <CDenseUnionScalar*> self.wrapped.get() |
| return Scalar.wrap(dp.value) if dp.is_valid else None |
| |
| def as_py(self): |
| """ |
| Return underlying value as a Python object. |
| """ |
| value = self.value |
| return None if value is None else value.as_py() |
| |
| @property |
| def type_code(self): |
| """ |
| Return the union type code for this scalar. |
| """ |
| cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get() |
| return sp.type_code |
| |
| |
| cdef class ExtensionScalar(Scalar): |
| """ |
| Concrete class for Extension scalars. |
| """ |
| |
| @property |
| def value(self): |
| """ |
| Return storage value as a scalar. |
| """ |
| cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get() |
| return Scalar.wrap(sp.value) if sp.is_valid else None |
| |
| def as_py(self): |
| """ |
| Return this scalar as a Python object. |
| """ |
| return None if self.value is None else self.value.as_py() |
| |
| @staticmethod |
| def from_storage(BaseExtensionType typ, value): |
| """ |
| Construct ExtensionScalar from type and storage value. |
| |
| Parameters |
| ---------- |
| typ : DataType |
| The extension type for the result scalar. |
| value : object |
| The storage value for the result scalar. |
| |
| Returns |
| ------- |
| ext_scalar : ExtensionScalar |
| """ |
| cdef: |
| shared_ptr[CExtensionScalar] sp_scalar |
| shared_ptr[CScalar] sp_storage |
| CExtensionScalar* ext_scalar |
| |
| if value is None: |
| storage = None |
| elif isinstance(value, Scalar): |
| if value.type != typ.storage_type: |
| raise TypeError("Incompatible storage type {0} " |
| "for extension type {1}" |
| .format(value.type, typ)) |
| storage = value |
| else: |
| storage = scalar(value, typ.storage_type) |
| |
| cdef c_bool is_valid = storage is not None and storage.is_valid |
| if is_valid: |
| sp_storage = pyarrow_unwrap_scalar(storage) |
| else: |
| sp_storage = MakeNullScalar((<DataType> typ.storage_type).sp_type) |
| sp_scalar = make_shared[CExtensionScalar](sp_storage, typ.sp_type, |
| is_valid) |
| with nogil: |
| check_status(sp_scalar.get().Validate()) |
| return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar) |
| |
| |
| cdef dict _scalar_classes = { |
| _Type_BOOL: BooleanScalar, |
| _Type_UINT8: UInt8Scalar, |
| _Type_UINT16: UInt16Scalar, |
| _Type_UINT32: UInt32Scalar, |
| _Type_UINT64: UInt64Scalar, |
| _Type_INT8: Int8Scalar, |
| _Type_INT16: Int16Scalar, |
| _Type_INT32: Int32Scalar, |
| _Type_INT64: Int64Scalar, |
| _Type_HALF_FLOAT: HalfFloatScalar, |
| _Type_FLOAT: FloatScalar, |
| _Type_DOUBLE: DoubleScalar, |
| _Type_DECIMAL128: Decimal128Scalar, |
| _Type_DECIMAL256: Decimal256Scalar, |
| _Type_DATE32: Date32Scalar, |
| _Type_DATE64: Date64Scalar, |
| _Type_TIME32: Time32Scalar, |
| _Type_TIME64: Time64Scalar, |
| _Type_TIMESTAMP: TimestampScalar, |
| _Type_DURATION: DurationScalar, |
| _Type_BINARY: BinaryScalar, |
| _Type_LARGE_BINARY: LargeBinaryScalar, |
| _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar, |
| _Type_STRING: StringScalar, |
| _Type_LARGE_STRING: LargeStringScalar, |
| _Type_LIST: ListScalar, |
| _Type_LARGE_LIST: LargeListScalar, |
| _Type_FIXED_SIZE_LIST: FixedSizeListScalar, |
| _Type_STRUCT: StructScalar, |
| _Type_MAP: MapScalar, |
| _Type_DICTIONARY: DictionaryScalar, |
| _Type_SPARSE_UNION: UnionScalar, |
| _Type_DENSE_UNION: UnionScalar, |
| _Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar, |
| _Type_EXTENSION: ExtensionScalar, |
| } |
| |
| |
| cdef object get_scalar_class_from_type( |
| const shared_ptr[CDataType]& sp_data_type): |
| cdef CDataType* data_type = sp_data_type.get() |
| if data_type == NULL: |
| raise ValueError('Scalar data type was NULL') |
| |
| if data_type.id() == _Type_EXTENSION: |
| py_ext_data_type = pyarrow_wrap_data_type(sp_data_type) |
| return py_ext_data_type.__arrow_ext_scalar_class__() |
| else: |
| return _scalar_classes[data_type.id()] |
| |
| |
| def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None): |
| """ |
| Create a pyarrow.Scalar instance from a Python object. |
| |
| Parameters |
| ---------- |
| value : Any |
| Python object coercible to arrow's type system. |
| type : pyarrow.DataType |
| Explicit type to attempt to coerce to, otherwise will be inferred from |
| the value. |
| from_pandas : bool, default None |
| Use pandas's semantics for inferring nulls from values in |
| ndarray-like data. Defaults to False if not passed explicitly by user, |
| or True if a pandas object is passed in. |
| memory_pool : pyarrow.MemoryPool, optional |
| If not passed, will allocate memory from the currently-set default |
| memory pool. |
| |
| Returns |
| ------- |
| scalar : pyarrow.Scalar |
| |
| Examples |
| -------- |
| >>> import pyarrow as pa |
| |
| >>> pa.scalar(42) |
| <pyarrow.Int64Scalar: 42> |
| |
| >>> pa.scalar("string") |
| <pyarrow.StringScalar: 'string'> |
| |
| >>> pa.scalar([1, 2]) |
| <pyarrow.ListScalar: [1, 2]> |
| |
| >>> pa.scalar([1, 2], type=pa.list_(pa.int16())) |
| <pyarrow.ListScalar: [1, 2]> |
| """ |
| cdef: |
| DataType ty |
| PyConversionOptions options |
| shared_ptr[CScalar] scalar |
| shared_ptr[CArray] array |
| shared_ptr[CChunkedArray] chunked |
| bint is_pandas_object = False |
| CMemoryPool* pool |
| |
| type = ensure_type(type, allow_none=True) |
| pool = maybe_unbox_memory_pool(memory_pool) |
| |
| if _is_array_like(value): |
| value = get_values(value, &is_pandas_object) |
| |
| options.size = 1 |
| |
| if type is not None: |
| ty = ensure_type(type) |
| options.type = ty.sp_type |
| |
| if from_pandas is None: |
| options.from_pandas = is_pandas_object |
| else: |
| options.from_pandas = from_pandas |
| |
| value = [value] |
| with nogil: |
| chunked = GetResultValue(ConvertPySequence(value, None, options, pool)) |
| |
| # get the first chunk |
| assert chunked.get().num_chunks() == 1 |
| array = chunked.get().chunk(0) |
| |
| # retrieve the scalar from the first position |
| scalar = GetResultValue(array.get().GetScalar(0)) |
| return Scalar.wrap(scalar) |