python/pyarrow/tests/test_cffi.py - arrow - Git at Google

 # -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import contextlib
 import ctypes
 import gc

 import pyarrow as pa
 try:
     from pyarrow.cffi import ffi
 except ImportError:
     ffi = None

 import pytest

 try:
     import pandas as pd
     import pandas.testing as tm
 except ImportError:
     pd = tm = None


 needs_cffi = pytest.mark.skipif(ffi is None,
                                 reason="test needs cffi package installed")

 assert_schema_released = pytest.raises(
     ValueError, match="Cannot import released ArrowSchema")

 assert_array_released = pytest.raises(
     ValueError, match="Cannot import released ArrowArray")

 assert_stream_released = pytest.raises(
     ValueError, match="Cannot import released Arrow Stream")


 def PyCapsule_IsValid(capsule, name):
     return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1


 @contextlib.contextmanager
 def registered_extension_type(ext_type):
     pa.register_extension_type(ext_type)
     try:
         yield
     finally:
         pa.unregister_extension_type(ext_type.extension_name)


 class ParamExtType(pa.ExtensionType):

     def __init__(self, width):
         self._width = width
         super().__init__(pa.binary(width),
                          "pyarrow.tests.test_cffi.ParamExtType")

     @property
     def width(self):
         return self._width

     def __arrow_ext_serialize__(self):
         return str(self.width).encode()

     @classmethod
     def __arrow_ext_deserialize__(cls, storage_type, serialized):
         width = int(serialized.decode())
         return cls(width)


 def make_schema():
     return pa.schema([('ints', pa.list_(pa.int32()))],
                      metadata={b'key1': b'value1'})


 def make_extension_schema():
     return pa.schema([('ext', ParamExtType(3))],
                      metadata={b'key1': b'value1'})


 def make_extension_storage_schema():
     # Should be kept in sync with make_extension_schema
     return pa.schema([('ext', ParamExtType(3).storage_type)],
                      metadata={b'key1': b'value1'})


 def make_batch():
     return pa.record_batch([[[1], [2, 42]]], make_schema())


 def make_extension_batch():
     schema = make_extension_schema()
     ext_col = schema[0].type.wrap_array(pa.array([b"foo", b"bar"],
                                                  type=pa.binary(3)))
     return pa.record_batch([ext_col], schema)


 def make_batches():
     schema = make_schema()
     return [
         pa.record_batch([[[1], [2, 42]]], schema),
         pa.record_batch([[None, [], [5, 6]]], schema),
     ]


 def make_serialized(schema, batches):
     with pa.BufferOutputStream() as sink:
         with pa.ipc.new_stream(sink, schema) as out:
             for batch in batches:
                 out.write(batch)
         return sink.getvalue()


 @needs_cffi
 def test_export_import_type():
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     typ = pa.list_(pa.int32())
     typ._export_to_c(ptr_schema)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete and recreate C++ object from exported pointer
     del typ
     assert pa.total_allocated_bytes() > old_allocated
     typ_new = pa.DataType._import_from_c(ptr_schema)
     assert typ_new == pa.list_(pa.int32())
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
         pa.DataType._import_from_c(ptr_schema)

     # Invalid format string
     pa.int32()._export_to_c(ptr_schema)
     bad_format = ffi.new("char[]", b"zzz")
     c_schema.format = bad_format
     with pytest.raises(ValueError,
                        match="Invalid or unsupported format string"):
         pa.DataType._import_from_c(ptr_schema)
     # Now released
     with assert_schema_released:
         pa.DataType._import_from_c(ptr_schema)


 @needs_cffi
 def test_export_import_field():
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     field = pa.field("test", pa.list_(pa.int32()), nullable=True)
     field._export_to_c(ptr_schema)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete and recreate C++ object from exported pointer
     del field
     assert pa.total_allocated_bytes() > old_allocated

     field_new = pa.Field._import_from_c(ptr_schema)
     assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True)
     assert pa.total_allocated_bytes() == old_allocated

     # Now released
     with assert_schema_released:
         pa.Field._import_from_c(ptr_schema)


 def check_export_import_array(array_type, exporter, importer):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
     c_array = ffi.new(f"struct {array_type}*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     # Type is known up front
     typ = pa.list_(pa.int32())
     arr = pa.array([[1], [2, 42]], type=typ)
     py_value = arr.to_pylist()
     exporter(arr, ptr_array)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete recreate C++ object from exported pointer
     del arr
     arr_new = importer(ptr_array, typ)
     assert arr_new.to_pylist() == py_value
     assert arr_new.type == pa.list_(pa.int32())
     assert pa.total_allocated_bytes() > old_allocated
     del arr_new, typ
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_array_released:
         importer(ptr_array, pa.list_(pa.int32()))

     # Type is exported and imported at the same time
     arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
     py_value = arr.to_pylist()
     exporter(arr, ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del arr
     arr_new = importer(ptr_array, ptr_schema)
     assert arr_new.to_pylist() == py_value
     assert arr_new.type == pa.list_(pa.int32())
     assert pa.total_allocated_bytes() > old_allocated
     del arr_new
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
         importer(ptr_array, ptr_schema)


 @needs_cffi
 def test_export_import_array():
     check_export_import_array(
         "ArrowArray",
         pa.Array._export_to_c,
         pa.Array._import_from_c,
     )


 @needs_cffi
 def test_export_import_device_array():
     check_export_import_array(
         "ArrowDeviceArray",
         pa.Array._export_to_c_device,
         pa.Array._import_from_c_device,
     )

     # verify exported struct
     c_array = ffi.new("struct ArrowDeviceArray*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
     arr._export_to_c_device(ptr_array)

     assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
     assert c_array.device_id == -1
     assert c_array.array.length == 2


 def check_export_import_schema(schema_factory, expected_schema_factory=None):
     if expected_schema_factory is None:
         expected_schema_factory = schema_factory

     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     schema_factory()._export_to_c(ptr_schema)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete and recreate C++ object from exported pointer
     schema_new = pa.Schema._import_from_c(ptr_schema)
     assert schema_new == expected_schema_factory()
     assert pa.total_allocated_bytes() == old_allocated
     del schema_new
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
         pa.Schema._import_from_c(ptr_schema)

     # Not a struct type
     pa.int32()._export_to_c(ptr_schema)
     with pytest.raises(ValueError,
                        match="ArrowSchema describes non-struct type"):
         pa.Schema._import_from_c(ptr_schema)
     # Now released
     with assert_schema_released:
         pa.Schema._import_from_c(ptr_schema)


 @needs_cffi
 def test_export_import_schema():
     check_export_import_schema(make_schema)


 @needs_cffi
 def test_export_import_schema_with_extension():
     # Extension type is unregistered => the storage type is imported
     check_export_import_schema(make_extension_schema,
                                make_extension_storage_schema)

     # Extension type is registered => the extension type is imported
     with registered_extension_type(ParamExtType(1)):
         check_export_import_schema(make_extension_schema)


 @needs_cffi
 def test_export_import_schema_float_pointer():
     # Previous versions of the R Arrow library used to pass pointer
     # values as a double.
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))

     match = "Passing a pointer value as a float is unsafe"
     with pytest.warns(UserWarning, match=match):
         make_schema()._export_to_c(float(ptr_schema))
     with pytest.warns(UserWarning, match=match):
         schema_new = pa.Schema._import_from_c(float(ptr_schema))
     assert schema_new == make_schema()


 def check_export_import_batch(array_type, exporter, importer, batch_factory):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
     c_array = ffi.new(f"struct {array_type}*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     # Schema is known up front
     batch = batch_factory()
     schema = batch.schema
     py_value = batch.to_pydict()
     exporter(batch, ptr_array)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete and recreate C++ object from exported pointer
     del batch
     batch_new = importer(ptr_array, schema)
     assert batch_new.to_pydict() == py_value
     assert batch_new.schema == schema
     assert pa.total_allocated_bytes() > old_allocated
     del batch_new, schema
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_array_released:
         importer(ptr_array, make_schema())

     # Type is exported and imported at the same time
     batch = batch_factory()
     py_value = batch.to_pydict()
     batch._export_to_c(ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del batch
     batch_new = importer(ptr_array, ptr_schema)
     assert batch_new.to_pydict() == py_value
     assert batch_new.schema == batch_factory().schema
     assert pa.total_allocated_bytes() > old_allocated
     del batch_new
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
         importer(ptr_array, ptr_schema)

     # Not a struct type
     pa.int32()._export_to_c(ptr_schema)
     batch_factory()._export_to_c(ptr_array)
     with pytest.raises(ValueError,
                        match="ArrowSchema describes non-struct type"):
         importer(ptr_array, ptr_schema)
     # Now released
     with assert_schema_released:
         importer(ptr_array, ptr_schema)


 @needs_cffi
 def test_export_import_batch():
     check_export_import_batch(
         "ArrowArray",
         pa.RecordBatch._export_to_c,
         pa.RecordBatch._import_from_c,
         make_batch,
     )


 @needs_cffi
 def test_export_import_batch_with_extension():
     with registered_extension_type(ParamExtType(1)):
         check_export_import_batch(
             "ArrowArray",
             pa.RecordBatch._export_to_c,
             pa.RecordBatch._import_from_c,
             make_extension_batch,
         )


 @needs_cffi
 def test_export_import_device_batch():
     check_export_import_batch(
         "ArrowDeviceArray",
         pa.RecordBatch._export_to_c_device,
         pa.RecordBatch._import_from_c_device,
         make_batch,
     )

     # verify exported struct
     c_array = ffi.new("struct ArrowDeviceArray*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     batch = make_batch()
     batch._export_to_c_device(ptr_array)
     assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
     assert c_array.device_id == -1
     assert c_array.array.length == 2


 def _export_import_batch_reader(ptr_stream, reader_factory):
     # Prepare input
     batches = make_batches()
     schema = batches[0].schema

     reader = reader_factory(schema, batches)
     reader._export_to_c(ptr_stream)
     # Delete and recreate C++ object from exported pointer
     del reader, batches

     reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
     assert reader_new.schema == schema
     got_batches = list(reader_new)
     del reader_new
     assert got_batches == make_batches()

     # Test read_pandas()
     if pd is not None:
         batches = make_batches()
         schema = batches[0].schema
         expected_df = pa.Table.from_batches(batches).to_pandas()

         reader = reader_factory(schema, batches)
         reader._export_to_c(ptr_stream)
         del reader, batches

         reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
         got_df = reader_new.read_pandas()
         del reader_new
         tm.assert_frame_equal(expected_df, got_df)


 def make_ipc_stream_reader(schema, batches):
     return pa.ipc.open_stream(make_serialized(schema, batches))


 def make_py_record_batch_reader(schema, batches):
     return pa.RecordBatchReader.from_batches(schema, batches)


 @needs_cffi
 @pytest.mark.parametrize('reader_factory',
                          [make_ipc_stream_reader,
                           make_py_record_batch_reader])
 def test_export_import_batch_reader(reader_factory):
     c_stream = ffi.new("struct ArrowArrayStream*")
     ptr_stream = int(ffi.cast("uintptr_t", c_stream))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     _export_import_batch_reader(ptr_stream, reader_factory)

     assert pa.total_allocated_bytes() == old_allocated

     # Now released
     with assert_stream_released:
         pa.RecordBatchReader._import_from_c(ptr_stream)


 @needs_cffi
 def test_export_import_exception_reader():
     # See: https://github.com/apache/arrow/issues/37164
     c_stream = ffi.new("struct ArrowArrayStream*")
     ptr_stream = int(ffi.cast("uintptr_t", c_stream))

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     def gen():
         if True:
             try:
                 raise ValueError('foo')
             except ValueError as e:
                 raise NotImplementedError('bar') from e
         else:
             yield from make_batches()

     original = pa.RecordBatchReader.from_batches(make_schema(), gen())
     original._export_to_c(ptr_stream)

     reader = pa.RecordBatchReader._import_from_c(ptr_stream)
     with pytest.raises(NotImplementedError) as exc_info:
         reader.read_next_batch()

     # inner *and* outer exception should be present
     assert 'ValueError: foo' in str(exc_info.value)
     assert 'NotImplementedError: bar' in str(exc_info.value)
     # Stacktrace containing line of the raise statement
     assert 'raise ValueError(\'foo\')' in str(exc_info.value)

     assert pa.total_allocated_bytes() == old_allocated


 @needs_cffi
 def test_imported_batch_reader_error():
     c_stream = ffi.new("struct ArrowArrayStream*")
     ptr_stream = int(ffi.cast("uintptr_t", c_stream))

     schema = pa.schema([('foo', pa.int32())])
     batches = [pa.record_batch([[1, 2, 3]], schema=schema),
                pa.record_batch([[4, 5, 6]], schema=schema)]
     buf = make_serialized(schema, batches)

     # Open a corrupt/incomplete stream and export it
     reader = pa.ipc.open_stream(buf[:-16])
     reader._export_to_c(ptr_stream)
     del reader

     reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
     batch = reader_new.read_next_batch()
     assert batch == batches[0]
     with pytest.raises(OSError,
                        match="Expected to be able to read 16 bytes "
                              "for message body, got 8"):
         reader_new.read_next_batch()

     # Again, but call read_all()
     reader = pa.ipc.open_stream(buf[:-16])
     reader._export_to_c(ptr_stream)
     del reader

     reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
     with pytest.raises(OSError,
                        match="Expected to be able to read 16 bytes "
                              "for message body, got 8"):
         reader_new.read_all()


 @pytest.mark.parametrize('obj', [pa.int32(), pa.field('foo', pa.int32()),
                                  pa.schema({'foo': pa.int32()})],
                          ids=['type', 'field', 'schema'])
 def test_roundtrip_schema_capsule(obj):
     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     capsule = obj.__arrow_c_schema__()
     assert PyCapsule_IsValid(capsule, b"arrow_schema") == 1
     assert pa.total_allocated_bytes() > old_allocated
     obj_out = type(obj)._import_from_c_capsule(capsule)
     assert obj_out == obj

     assert pa.total_allocated_bytes() == old_allocated

     capsule = obj.__arrow_c_schema__()

     assert pa.total_allocated_bytes() > old_allocated
     del capsule
     assert pa.total_allocated_bytes() == old_allocated


 @pytest.mark.parametrize('arr,schema_accessor,bad_type,good_type', [
     (pa.array(['a', 'b', 'c']), lambda x: x.type, pa.int32(), pa.string()),
     (
         pa.record_batch([pa.array(['a', 'b', 'c'])], names=['x']),
         lambda x: x.schema,
         pa.schema({'x': pa.int32()}),
         pa.schema({'x': pa.string()})
     ),
 ], ids=['array', 'record_batch'])
 def test_roundtrip_array_capsule(arr, schema_accessor, bad_type, good_type):
     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     import_array = type(arr)._import_from_c_capsule

     schema_capsule, capsule = arr.__arrow_c_array__()
     assert PyCapsule_IsValid(schema_capsule, b"arrow_schema") == 1
     assert PyCapsule_IsValid(capsule, b"arrow_array") == 1
     arr_out = import_array(schema_capsule, capsule)
     assert arr_out.equals(arr)

     assert pa.total_allocated_bytes() > old_allocated
     del arr_out

     assert pa.total_allocated_bytes() == old_allocated

     capsule = arr.__arrow_c_array__()

     assert pa.total_allocated_bytes() > old_allocated
     del capsule
     assert pa.total_allocated_bytes() == old_allocated

     with pytest.raises(ValueError,
                        match=r"Could not cast.* string to requested .* int32"):
         arr.__arrow_c_array__(bad_type.__arrow_c_schema__())

     schema_capsule, array_capsule = arr.__arrow_c_array__(
         good_type.__arrow_c_schema__())
     arr_out = import_array(schema_capsule, array_capsule)
     assert schema_accessor(arr_out) == good_type


 @pytest.mark.parametrize('arr,schema_accessor,bad_type,good_type', [
     (pa.array(['a', 'b', 'c']), lambda x: x.type, pa.int32(), pa.string()),
     (
         pa.record_batch([pa.array(['a', 'b', 'c'])], names=['x']),
         lambda x: x.schema,
         pa.schema({'x': pa.int32()}),
         pa.schema({'x': pa.string()})
     ),
 ], ids=['array', 'record_batch'])
 def test_roundtrip_device_array_capsule(arr, schema_accessor, bad_type, good_type):
     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     import_array = type(arr)._import_from_c_device_capsule

     schema_capsule, capsule = arr.__arrow_c_device_array__()
     assert PyCapsule_IsValid(schema_capsule, b"arrow_schema") == 1
     assert PyCapsule_IsValid(capsule, b"arrow_device_array") == 1
     arr_out = import_array(schema_capsule, capsule)
     assert arr_out.equals(arr)

     assert pa.total_allocated_bytes() > old_allocated
     del arr_out

     assert pa.total_allocated_bytes() == old_allocated

     capsule = arr.__arrow_c_array__()

     assert pa.total_allocated_bytes() > old_allocated
     del capsule
     assert pa.total_allocated_bytes() == old_allocated

     with pytest.raises(ValueError,
                        match=r"Could not cast.* string to requested .* int32"):
         arr.__arrow_c_device_array__(bad_type.__arrow_c_schema__())

     schema_capsule, array_capsule = arr.__arrow_c_device_array__(
         good_type.__arrow_c_schema__())
     arr_out = import_array(schema_capsule, array_capsule)
     assert schema_accessor(arr_out) == good_type


 # TODO: implement requested_schema for stream
 @pytest.mark.parametrize('constructor', [
     pa.RecordBatchReader.from_batches,
     # Use a lambda because we need to re-order the parameters
     lambda schema, batches: pa.Table.from_batches(batches, schema),
 ], ids=['recordbatchreader', 'table'])
 def test_roundtrip_reader_capsule(constructor):
     batches = make_batches()
     schema = batches[0].schema

     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
     old_allocated = pa.total_allocated_bytes()

     obj = constructor(schema, batches)

     capsule = obj.__arrow_c_stream__()
     assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
     imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
     assert imported_reader.schema == schema
     imported_batches = list(imported_reader)
     assert len(imported_batches) == len(batches)
     for batch, expected in zip(imported_batches, batches):
         assert batch.equals(expected)

     del obj, imported_reader, batch, expected, imported_batches

     assert pa.total_allocated_bytes() == old_allocated

     obj = constructor(schema, batches)

     bad_schema = pa.schema({'ints': pa.int32()})
     with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"):
         obj.__arrow_c_stream__(bad_schema.__arrow_c_schema__())

     # Can work with matching schema
     matching_schema = pa.schema({'ints': pa.list_(pa.int32())})
     capsule = obj.__arrow_c_stream__(matching_schema.__arrow_c_schema__())
     imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
     assert imported_reader.schema == matching_schema
     for batch, expected in zip(imported_reader, batches):
         assert batch.equals(expected)


 def test_roundtrip_batch_reader_capsule_requested_schema():
     batch = make_batch()
     requested_schema = pa.schema([('ints', pa.list_(pa.int64()))])
     requested_capsule = requested_schema.__arrow_c_schema__()
     batch_as_requested = batch.cast(requested_schema)

     capsule = batch.__arrow_c_stream__(requested_capsule)
     assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
     imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
     assert imported_reader.schema == requested_schema
     assert imported_reader.read_next_batch().equals(batch_as_requested)
     with pytest.raises(StopIteration):
         imported_reader.read_next_batch()


 def test_roundtrip_batch_reader_capsule():
     batch = make_batch()

     capsule = batch.__arrow_c_stream__()
     assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
     imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
     assert imported_reader.schema == batch.schema
     assert imported_reader.read_next_batch().equals(batch)
     with pytest.raises(StopIteration):
         imported_reader.read_next_batch()


 def test_roundtrip_chunked_array_capsule():
     chunked = pa.chunked_array([pa.array(["a", "b", "c"])])

     capsule = chunked.__arrow_c_stream__()
     assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
     imported_chunked = pa.ChunkedArray._import_from_c_capsule(capsule)
     assert imported_chunked.type == chunked.type
     assert imported_chunked == chunked


 def test_roundtrip_chunked_array_capsule_requested_schema():
     chunked = pa.chunked_array([pa.array(["a", "b", "c"])])

     # Requesting the same type should work
     requested_capsule = chunked.type.__arrow_c_schema__()
     capsule = chunked.__arrow_c_stream__(requested_capsule)
     imported_chunked = pa.ChunkedArray._import_from_c_capsule(capsule)
     assert imported_chunked == chunked

     # Casting to something else should error if not possible
     requested_type = pa.binary()
     requested_capsule = requested_type.__arrow_c_schema__()
     capsule = chunked.__arrow_c_stream__(requested_capsule)
     imported_chunked = pa.ChunkedArray._import_from_c_capsule(capsule)
     assert imported_chunked == chunked.cast(pa.binary())

     requested_type = pa.int64()
     requested_capsule = requested_type.__arrow_c_schema__()
     with pytest.raises(
         ValueError, match="Could not cast string to requested type int64"
     ):
         chunked.__arrow_c_stream__(requested_capsule)


 @needs_cffi
 def test_import_device_no_cuda():
     try:
         import pyarrow.cuda  # noqa
     except ImportError:
         pass
     else:
         pytest.skip("pyarrow.cuda is available")

     c_array = ffi.new("struct ArrowDeviceArray*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     arr = pa.array([1, 2, 3], type=pa.int64())
     arr._export_to_c_device(ptr_array)

     # patch the device type of the struct, this results in an invalid ArrowDeviceArray
     # but this is just to test we raise am error before actually importing buffers
     c_array.device_type = 2  # ARROW_DEVICE_CUDA

     with pytest.raises(ImportError, match="Trying to import data on a CUDA device"):
         pa.Array._import_from_c_device(ptr_array, arr.type)