Arrow: Support Large Binary when using `to_arrow` (#409)
* Arrow: Support Large Binary
* Merge with binary
---------
Co-authored-by: Fokko Driesprong <fokko@apache.org>
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 8657144..57f09ba 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -533,7 +533,7 @@
return pa.binary(16)
def visit_binary(self, _: BinaryType) -> pa.DataType:
- return pa.binary()
+ return pa.large_binary()
def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
@@ -882,7 +882,7 @@
return TimestamptzType()
elif primitive.tz is None:
return TimestampType()
- elif pa.types.is_binary(primitive):
+ elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive):
return BinaryType()
elif pa.types.is_fixed_size_binary(primitive):
primitive = cast(pa.FixedSizeBinaryType, primitive)
diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py
index c08916b..58ab830 100644
--- a/tests/integration/test_writes.py
+++ b/tests/integration/test_writes.py
@@ -140,7 +140,7 @@
# ("time", pa.time64("us")),
# Not natively supported by Arrow
# ("uuid", pa.fixed(16)),
- ("binary", pa.binary()),
+ ("binary", pa.large_binary()),
("fixed", pa.binary(16)),
])
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index 745de1a..a3dd56d 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -467,7 +467,7 @@
def test_binary_type_to_pyarrow() -> None:
iceberg_type = BinaryType()
- assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.binary()
+ assert visit(iceberg_type, _ConvertToArrowSchema()) == pa.large_binary()
def test_struct_type_to_pyarrow(table_schema_simple: Schema) -> None:
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index c6ba18c..7d35cae 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -215,7 +215,7 @@
def test_pyarrow_variable_binary_to_iceberg() -> None:
- pyarrow_type = pa.binary()
+ pyarrow_type = pa.large_binary()
converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg())
assert converted_iceberg_type == BinaryType()
assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type