Python: Fix reading UUIDs (#6486)
diff --git a/python/pyiceberg/avro/decoder.py b/python/pyiceberg/avro/decoder.py
index f2690c4..f2e67d0 100644
--- a/python/pyiceberg/avro/decoder.py
+++ b/python/pyiceberg/avro/decoder.py
@@ -18,6 +18,7 @@
import struct
from datetime import datetime, time
from io import SEEK_CUR
+from uuid import UUID
from pyiceberg.io import InputStream
from pyiceberg.utils.datetime import micros_to_time, micros_to_timestamp, micros_to_timestamptz
@@ -154,6 +155,10 @@
"""
return micros_to_timestamptz(self.read_int())
+ def read_uuid_from_fixed(self) -> UUID:
+ """Reads a UUID as a fixed[16]"""
+ return UUID(bytes=self.read(16))
+
def skip_boolean(self) -> None:
self.skip(1)
diff --git a/python/pyiceberg/avro/reader.py b/python/pyiceberg/avro/reader.py
index 8264085..bbb3f82 100644
--- a/python/pyiceberg/avro/reader.py
+++ b/python/pyiceberg/avro/reader.py
@@ -64,6 +64,7 @@
TimestampType,
TimestamptzType,
TimeType,
+ UUIDType,
)
from pyiceberg.utils.singleton import Singleton
@@ -209,10 +210,10 @@
class UUIDReader(Reader):
def read(self, decoder: BinaryDecoder) -> UUID:
- return UUID(decoder.read_utf8())
+ return decoder.read_uuid_from_fixed()
def skip(self, decoder: BinaryDecoder) -> None:
- decoder.skip_utf8()
+ decoder.skip(16)
@dataclass(frozen=True)
@@ -431,3 +432,8 @@
@primitive_reader.register
def _(_: BinaryType) -> Reader:
return BinaryReader()
+
+
+@primitive_reader.register
+def _(_: UUIDType) -> Reader:
+ return UUIDReader()
diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
index ce4e9b8..e19f96f 100644
--- a/python/pyiceberg/io/pyarrow.py
+++ b/python/pyiceberg/io/pyarrow.py
@@ -76,6 +76,7 @@
TimestampType,
TimestamptzType,
TimeType,
+ UUIDType,
)
@@ -383,6 +384,11 @@
@_iceberg_to_pyarrow_type.register
+def _(_: UUIDType) -> pa.DataType:
+ return pa.binary(16)
+
+
+@_iceberg_to_pyarrow_type.register
def _(_: BinaryType) -> pa.DataType:
# Variable length by default
return pa.binary()
diff --git a/python/pyiceberg/utils/schema_conversion.py b/python/pyiceberg/utils/schema_conversion.py
index c2bb5c9..2f9c321 100644
--- a/python/pyiceberg/utils/schema_conversion.py
+++ b/python/pyiceberg/utils/schema_conversion.py
@@ -68,7 +68,7 @@
("timestamp-millis", "long"): TimestampType(),
("time-micros", "int"): TimeType(),
("timestamp-micros", "long"): TimestampType(),
- ("uuid", "string"): UUIDType(),
+ ("uuid", "fixed"): UUIDType(),
}
diff --git a/python/tests/avro/test_decoder.py b/python/tests/avro/test_decoder.py
index d48ebd0..8a4ecf0 100644
--- a/python/tests/avro/test_decoder.py
+++ b/python/tests/avro/test_decoder.py
@@ -17,6 +17,7 @@
from datetime import datetime, timezone
from decimal import Decimal
from io import SEEK_SET
+from uuid import UUID
import pytest
@@ -215,3 +216,9 @@
reader = promote(FloatType(), DoubleType())
assert reader.read(decoder) == 19.25
+
+
+def test_read_uuid_from_fixed() -> None:
+ mis = MemoryInputStream(b"\x12\x34\x56\x78" * 4)
+ decoder = BinaryDecoder(mis)
+ assert decoder.read_uuid_from_fixed() == UUID("{12345678-1234-5678-1234-567812345678}")
diff --git a/python/tests/avro/test_reader.py b/python/tests/avro/test_reader.py
index ac0d001..5b94060 100644
--- a/python/tests/avro/test_reader.py
+++ b/python/tests/avro/test_reader.py
@@ -34,6 +34,7 @@
TimeReader,
TimestampReader,
TimestamptzReader,
+ UUIDReader,
primitive_reader,
)
from pyiceberg.manifest import _convert_pos_to_dict
@@ -57,6 +58,7 @@
TimestampType,
TimestamptzType,
TimeType,
+ UUIDType,
)
from tests.io.test_io import LocalInputFile
@@ -501,3 +503,7 @@
primitive_reader(UnknownType())
assert "Unknown type:" in str(exc_info.value)
+
+
+def test_uuid_reader() -> None:
+ assert primitive_reader(UUIDType()) == UUIDReader()