| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| from __future__ import annotations |
| import enum |
| |
| import pyarrow as pa |
| |
| |
| class DlpackDeviceType(enum.IntEnum): |
| """Integer enum for device type codes matching DLPack.""" |
| |
| CPU = 1 |
| CUDA = 2 |
| CPU_PINNED = 3 |
| OPENCL = 4 |
| VULKAN = 7 |
| METAL = 8 |
| VPI = 9 |
| ROCM = 10 |
| |
| |
| class _PyArrowBuffer: |
| """ |
| Data in the buffer is guaranteed to be contiguous in memory. |
| |
| Note that there is no dtype attribute present, a buffer can be thought of |
| as simply a block of memory. However, if the column that the buffer is |
| attached to has a dtype that's supported by DLPack and ``__dlpack__`` is |
| implemented, then that dtype information will be contained in the return |
| value from ``__dlpack__``. |
| |
| This distinction is useful to support both data exchange via DLPack on a |
| buffer and (b) dtypes like variable-length strings which do not have a |
| fixed number of bytes per element. |
| """ |
| |
| def __init__(self, x: pa.Buffer, allow_copy: bool = True) -> None: |
| """ |
| Handle PyArrow Buffers. |
| """ |
| self._x = x |
| |
| @property |
| def bufsize(self) -> int: |
| """ |
| Buffer size in bytes. |
| """ |
| return self._x.size |
| |
| @property |
| def ptr(self) -> int: |
| """ |
| Pointer to start of the buffer as an integer. |
| """ |
| return self._x.address |
| |
| def __dlpack__(self): |
| """ |
| Produce DLPack capsule (see array API standard). |
| |
| Raises: |
| - TypeError : if the buffer contains unsupported dtypes. |
| - NotImplementedError : if DLPack support is not implemented |
| |
| Useful to have to connect to array libraries. Support optional because |
| it's not completely trivial to implement for a Python-only library. |
| """ |
| raise NotImplementedError("__dlpack__") |
| |
| def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: |
| """ |
| Device type and device ID for where the data in the buffer resides. |
| Uses device type codes matching DLPack. |
| Note: must be implemented even if ``__dlpack__`` is not. |
| """ |
| if self._x.is_cpu: |
| return (DlpackDeviceType.CPU, None) |
| else: |
| raise NotImplementedError("__dlpack_device__") |
| |
| def __repr__(self) -> str: |
| return ( |
| "PyArrowBuffer(" + |
| str( |
| { |
| "bufsize": self.bufsize, |
| "ptr": self.ptr, |
| "device": self.__dlpack_device__()[0].name, |
| } |
| ) + |
| ")" |
| ) |