blob: 2c453b6e03b1ac3e7c822edb19f21e3fdeedb9ff [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=redefined-outer-name
import io
import json
from copy import copy
from typing import Any, Dict
from unittest.mock import MagicMock, patch
from uuid import UUID
import pytest
from pyiceberg.exceptions import ValidationError
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
from pyiceberg.serializers import FromByteStream
from pyiceberg.table import SortOrder
from pyiceberg.table.metadata import (
TableMetadataUtil,
TableMetadataV1,
TableMetadataV2,
new_table_metadata,
)
from pyiceberg.table.refs import SnapshotRef, SnapshotRefType
from pyiceberg.table.sorting import NullOrder, SortDirection, SortField
from pyiceberg.transforms import IdentityTransform
from pyiceberg.typedef import UTF8
from pyiceberg.types import (
BooleanType,
FloatType,
IntegerType,
ListType,
LongType,
MapType,
NestedField,
StringType,
StructType,
)
def test_from_dict_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
"""Test initialization of a TableMetadata instance from a dictionary"""
TableMetadataUtil.parse_obj(example_table_metadata_v1)
def test_from_dict_v1_parse_raw(example_table_metadata_v1: Dict[str, Any]) -> None:
"""Test initialization of a TableMetadata instance from a str"""
TableMetadataUtil.parse_raw(json.dumps(example_table_metadata_v1))
def test_from_dict_v2(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test initialization of a TableMetadata instance from a dictionary"""
TableMetadataUtil.parse_obj(example_table_metadata_v2)
def test_from_dict_v2_parse_raw(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test initialization of a TableMetadata instance from a str"""
TableMetadataUtil.parse_raw(json.dumps(example_table_metadata_v2))
def test_from_byte_stream(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test generating a TableMetadata instance from a file-like byte stream"""
data = bytes(json.dumps(example_table_metadata_v2), encoding=UTF8)
byte_stream = io.BytesIO(data)
FromByteStream.table_metadata(byte_stream=byte_stream)
def test_v2_metadata_parsing(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test retrieving values from a TableMetadata instance of version 2"""
table_metadata = TableMetadataUtil.parse_obj(example_table_metadata_v2)
assert table_metadata.format_version == 2
assert table_metadata.table_uuid == UUID("9c12d441-03fe-4693-9a96-a0705ddf69c1")
assert table_metadata.location == "s3://bucket/test/location"
assert table_metadata.last_sequence_number == 34
assert table_metadata.last_updated_ms == 1602638573590
assert table_metadata.last_column_id == 3
assert table_metadata.schemas[0].schema_id == 0
assert table_metadata.current_schema_id == 1
assert table_metadata.partition_specs[0].spec_id == 0
assert table_metadata.default_spec_id == 0
assert table_metadata.last_partition_id == 1000
assert table_metadata.properties["read.split.target.size"] == "134217728"
assert table_metadata.current_snapshot_id == 3055729675574597004
assert table_metadata.snapshots[0].snapshot_id == 3051729675574597004
assert table_metadata.snapshot_log[0].timestamp_ms == 1515100955770
assert table_metadata.sort_orders[0].order_id == 3
assert table_metadata.default_sort_order_id == 3
def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, Any]) -> None:
"""Test retrieving values from a TableMetadata instance of version 1"""
table_metadata = TableMetadataV1(**example_table_metadata_v1)
assert isinstance(table_metadata, TableMetadataV1)
# The version 1 will automatically be bumped to version 2
assert table_metadata.format_version == 1
assert table_metadata.table_uuid == UUID("d20125c8-7284-442c-9aea-15fee620737c")
assert table_metadata.location == "s3://bucket/test/location"
assert table_metadata.last_updated_ms == 1602638573874
assert table_metadata.last_column_id == 3
assert table_metadata.schemas == [
Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
)
]
assert table_metadata.schemas[0].schema_id == 0
assert table_metadata.current_schema_id == 0
assert table_metadata.partition_specs == [
PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="x"))
]
assert table_metadata.default_spec_id == 0
assert table_metadata.last_partition_id == 1000
assert table_metadata.current_snapshot_id is None
assert table_metadata.default_sort_order_id == 0
def test_parsing_correct_types(example_table_metadata_v2: Dict[str, Any]) -> None:
table_metadata = TableMetadataV2(**example_table_metadata_v2)
assert isinstance(table_metadata.schemas[0], Schema)
assert isinstance(table_metadata.schemas[0].fields[0], NestedField)
assert isinstance(table_metadata.schemas[0].fields[0].field_type, LongType)
def test_updating_metadata(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test creating a new TableMetadata instance that's an updated version of
an existing TableMetadata instance"""
table_metadata = TableMetadataV2(**example_table_metadata_v2)
new_schema = {
"type": "struct",
"schema-id": 1,
"fields": [
{"id": 1, "name": "foo", "required": True, "type": "string"},
{"id": 2, "name": "bar", "required": True, "type": "string"},
{"id": 3, "name": "baz", "required": True, "type": "string"},
],
}
mutable_table_metadata = table_metadata.model_dump()
mutable_table_metadata["schemas"].append(new_schema)
mutable_table_metadata["current-schema-id"] = 1
table_metadata = TableMetadataV2(**mutable_table_metadata)
assert table_metadata.current_schema_id == 1
assert table_metadata.schemas[-1] == Schema(**new_schema)
def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
table_metadata = TableMetadataV1(**example_table_metadata_v1)
table_metadata_json = table_metadata.model_dump_json()
expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}"""
assert table_metadata_json == expected
def test_serialize_v2(example_table_metadata_v2: Dict[str, Any]) -> None:
table_metadata = TableMetadataV2(**example_table_metadata_v2).model_dump_json()
expected = """{"location":"s3://bucket/test/location","table-uuid":"9c12d441-03fe-4693-9a96-a0705ddf69c1","last-updated-ms":1602638573590,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":1,"identifier-field-ids":[1,2]}],"current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{"read.split.target.size":"134217728"},"current-snapshot-id":3055729675574597004,"snapshots":[{"snapshot-id":3051729675574597004,"sequence-number":0,"timestamp-ms":1515100955770,"manifest-list":"s3://a/b/1.avro","summary":{"operation":"append"}},{"snapshot-id":3055729675574597004,"parent-snapshot-id":3051729675574597004,"sequence-number":1,"timestamp-ms":1555100955770,"manifest-list":"s3://a/b/2.avro","summary":{"operation":"append"},"schema-id":1}],"snapshot-log":[{"snapshot-id":3051729675574597004,"timestamp-ms":1515100955770},{"snapshot-id":3055729675574597004,"timestamp-ms":1555100955770}],"metadata-log":[{"metadata-file":"s3://bucket/.../v1.json","timestamp-ms":1515100}],"sort-orders":[{"order-id":3,"fields":[{"source-id":2,"transform":"identity","direction":"asc","null-order":"nulls-first"},{"source-id":3,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"}]}],"default-sort-order-id":3,"refs":{"test":{"snapshot-id":3051729675574597004,"type":"tag","max-ref-age-ms":10000000},"main":{"snapshot-id":3055729675574597004,"type":"branch"}},"format-version":2,"last-sequence-number":34}"""
assert table_metadata == expected
def test_migrate_v1_schemas(example_table_metadata_v1: Dict[str, Any]) -> None:
table_metadata = TableMetadataV1(**example_table_metadata_v1)
assert isinstance(table_metadata, TableMetadataV1)
assert len(table_metadata.schemas) == 1
assert table_metadata.schemas[0] == table_metadata.schema_
def test_migrate_v1_partition_specs(example_table_metadata_v1: Dict[str, Any]) -> None:
# Copy the example, and add a spec
table_metadata = TableMetadataV1(**example_table_metadata_v1)
assert isinstance(table_metadata, TableMetadataV1)
assert len(table_metadata.partition_specs) == 1
# Spec ID gets added automatically
assert table_metadata.partition_specs == [
PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="x")),
]
def test_invalid_format_version(example_table_metadata_v1: Dict[str, Any]) -> None:
"""Test the exception when trying to load an unknown version"""
example_table_metadata_v22 = copy(example_table_metadata_v1)
example_table_metadata_v22["format-version"] = -1
with pytest.raises(ValidationError) as exc_info:
TableMetadataUtil.parse_raw(json.dumps(example_table_metadata_v22))
assert "Input tag '-1' found using 'format_version'" in str(exc_info.value)
def test_current_schema_not_found() -> None:
"""Test that we raise an exception when the schema can't be found"""
table_metadata_schema_not_found = {
"format-version": 2,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": "s3://bucket/test/location",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schemas": [
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"current-schema-id": 2,
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"default-sort-order-id": 0,
"properties": {},
"current-snapshot-id": -1,
"snapshots": [],
}
with pytest.raises(ValidationError) as exc_info:
TableMetadataUtil.parse_raw(json.dumps(table_metadata_schema_not_found))
assert "current-schema-id 2 can't be found in the schemas" in str(exc_info.value)
def test_sort_order_not_found() -> None:
"""Test that we raise an exception when the schema can't be found"""
table_metadata_schema_not_found = {
"format-version": 2,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": "s3://bucket/test/location",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schemas": [
{
"type": "struct",
"schema-id": 0,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"default-sort-order-id": 4,
"sort-orders": [
{
"order-id": 3,
"fields": [
{"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"},
{"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"},
],
}
],
"current-schema-id": 0,
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"properties": {},
"current-snapshot-id": -1,
"snapshots": [],
}
with pytest.raises(ValidationError) as exc_info:
TableMetadataUtil.parse_raw(json.dumps(table_metadata_schema_not_found))
assert "default-sort-order-id 4 can't be found" in str(exc_info.value)
def test_sort_order_unsorted() -> None:
"""Test that we raise an exception when the schema can't be found"""
table_metadata_schema_not_found = {
"format-version": 2,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": "s3://bucket/test/location",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schemas": [
{
"type": "struct",
"schema-id": 0,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"default-sort-order-id": 0,
"sort-orders": [],
"current-schema-id": 0,
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"properties": {},
"current-snapshot-id": -1,
"snapshots": [],
}
table_metadata = TableMetadataUtil.parse_raw(json.dumps(table_metadata_schema_not_found))
# Most important here is that we correctly handle sort-order-id 0
assert len(table_metadata.sort_orders) == 0
def test_invalid_partition_spec() -> None:
table_metadata_spec_not_found = {
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 1,
"schemas": [
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"sort-orders": [],
"default-sort-order-id": 0,
"default-spec-id": 1,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
}
with pytest.raises(ValidationError) as exc_info:
TableMetadataUtil.parse_raw(json.dumps(table_metadata_spec_not_found))
assert "default-spec-id 1 can't be found" in str(exc_info.value)
def test_v1_writing_metadata(example_table_metadata_v1: Dict[str, Any]) -> None:
"""
https://iceberg.apache.org/spec/#version-2
Writing v1 metadata:
- Table metadata field last-sequence-number should not be written
"""
table_metadata = TableMetadataV1(**example_table_metadata_v1)
metadata_v1_json = table_metadata.model_dump_json()
metadata_v1 = json.loads(metadata_v1_json)
assert "last-sequence-number" not in metadata_v1
def test_v1_metadata_for_v2(example_table_metadata_v1: Dict[str, Any]) -> None:
"""
https://iceberg.apache.org/spec/#version-2
Reading v1 metadata for v2:
- Table metadata field last-sequence-number must default to 0
"""
table_metadata = TableMetadataV1(**example_table_metadata_v1).to_v2()
assert table_metadata.last_sequence_number == 0
def test_v1_write_metadata_for_v2() -> None:
"""
https://iceberg.apache.org/spec/#version-2
Table metadata JSON:
- last-sequence-number was added and is required; default to 0 when reading v1 metadata
- table-uuid is now required
- current-schema-id is now required
- schemas is now required
- partition-specs is now required
- default-spec-id is now required
- last-partition-id is now required
- sort-orders is now required
- default-sort-order-id is now required
- schema is no longer required and should be omitted; use schemas and current-schema-id instead
- partition-spec is no longer required and should be omitted; use partition-specs and default-spec-id instead
"""
minimal_example_v1 = {
"format-version": 1,
"location": "s3://bucket/test/location",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schema": {
"type": "struct",
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
"partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}],
"properties": {},
"current-snapshot-id": -1,
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
}
table_metadata = TableMetadataV1(**minimal_example_v1).to_v2()
metadata_v2_json = table_metadata.model_dump_json()
metadata_v2 = json.loads(metadata_v2_json)
assert metadata_v2["last-sequence-number"] == 0
assert UUID(metadata_v2["table-uuid"]) is not None
assert metadata_v2["current-schema-id"] == 0
assert metadata_v2["schemas"] == [
{
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"doc": "comment", "id": 2, "name": "y", "required": True, "type": "long"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
"identifier-field-ids": [],
"schema-id": 0,
"type": "struct",
}
]
assert metadata_v2["partition-specs"] == [
{
"spec-id": 0,
"fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}],
}
]
assert metadata_v2["default-spec-id"] == 0
assert metadata_v2["last-partition-id"] == 1000
assert metadata_v2["sort-orders"] == [{"order-id": 0, "fields": []}]
assert metadata_v2["default-sort-order-id"] == 0
# Deprecated fields
assert "schema" not in metadata_v2
assert "partition-spec" not in metadata_v2
def test_v2_ref_creation(example_table_metadata_v2: Dict[str, Any]) -> None:
table_metadata = TableMetadataV2(**example_table_metadata_v2)
assert table_metadata.refs == {
"main": SnapshotRef(
snapshot_id=3055729675574597004,
snapshot_ref_type=SnapshotRefType.BRANCH,
min_snapshots_to_keep=None,
max_snapshot_age_ms=None,
max_ref_age_ms=None,
),
"test": SnapshotRef(
snapshot_id=3051729675574597004,
snapshot_ref_type=SnapshotRefType.TAG,
min_snapshots_to_keep=None,
max_snapshot_age_ms=None,
max_ref_age_ms=10000000,
),
}
def test_metadata_v1() -> None:
valid_v1 = {
"format-version": 1,
"table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
"location": "s3://tabular-wh-us-west-2-dev/8bcb0838-50fc-472d-9ddb-8feb89ef5f1e/bf289591-dcc0-4234-ad4f-5c3eed811a29",
"last-updated-ms": 1657810967051,
"last-column-id": 3,
"schema": {
"type": "struct",
"schema-id": 0,
"identifier-field-ids": [2],
"fields": [
{"id": 1, "name": "foo", "required": False, "type": "string"},
{"id": 2, "name": "bar", "required": True, "type": "int"},
{"id": 3, "name": "baz", "required": False, "type": "boolean"},
],
},
"current-schema-id": 0,
"schemas": [
{
"type": "struct",
"schema-id": 0,
"identifier-field-ids": [2],
"fields": [
{"id": 1, "name": "foo", "required": False, "type": "string"},
{"id": 2, "name": "bar", "required": True, "type": "int"},
{"id": 3, "name": "baz", "required": False, "type": "boolean"},
],
}
],
"partition-spec": [],
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": []}],
"last-partition-id": 999,
"default-sort-order-id": 0,
"sort-orders": [{"order-id": 0, "fields": []}],
"properties": {
"write.delete.parquet.compression-codec": "zstd",
"write.metadata.compression-codec": "gzip",
"write.summary.partition-limit": "100",
"write.parquet.compression-codec": "zstd",
},
"current-snapshot-id": -1,
"refs": {},
"snapshots": [],
"snapshot-log": [],
"metadata-log": [],
}
TableMetadataV1(**valid_v1)
@patch("time.time", MagicMock(return_value=12345))
def test_make_metadata_fresh() -> None:
schema = Schema(
NestedField(field_id=10, name="foo", field_type=StringType(), required=False),
NestedField(field_id=22, name="bar", field_type=IntegerType(), required=True),
NestedField(field_id=33, name="baz", field_type=BooleanType(), required=False),
NestedField(
field_id=41,
name="qux",
field_type=ListType(element_id=56, element_type=StringType(), element_required=True),
required=True,
),
NestedField(
field_id=6,
name="quux",
field_type=MapType(
key_id=77,
key_type=StringType(),
value_id=88,
value_type=MapType(key_id=91, key_type=StringType(), value_id=102, value_type=IntegerType(), value_required=True),
value_required=True,
),
required=True,
),
NestedField(
field_id=113,
name="location",
field_type=ListType(
element_id=124,
element_type=StructType(
NestedField(field_id=132, name="latitude", field_type=FloatType(), required=False),
NestedField(field_id=143, name="longitude", field_type=FloatType(), required=False),
),
element_required=True,
),
required=True,
),
NestedField(
field_id=155,
name="person",
field_type=StructType(
NestedField(field_id=169, name="name", field_type=StringType(), required=False),
NestedField(field_id=178, name="age", field_type=IntegerType(), required=True),
),
required=False,
),
schema_id=10,
identifier_field_ids=[22],
)
partition_spec = PartitionSpec(
PartitionField(source_id=22, field_id=1022, transform=IdentityTransform(), name="bar"), spec_id=10
)
sort_order = SortOrder(
SortField(source_id=10, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST),
order_id=10,
)
actual = new_table_metadata(
schema=schema, partition_spec=partition_spec, sort_order=sort_order, location="s3://", properties={}
)
expected = TableMetadataV2(
location="s3://",
table_uuid=actual.table_uuid,
last_updated_ms=actual.last_updated_ms,
last_column_id=17,
schemas=[
Schema(
NestedField(field_id=1, name="foo", field_type=StringType(), required=False),
NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True),
NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
NestedField(
field_id=4,
name="qux",
field_type=ListType(type="list", element_id=8, element_type=StringType(), element_required=True),
required=True,
),
NestedField(
field_id=5,
name="quux",
field_type=MapType(
type="map",
key_id=9,
key_type=StringType(),
value_id=10,
value_type=MapType(
type="map",
key_id=11,
key_type=StringType(),
value_id=12,
value_type=IntegerType(),
value_required=True,
),
value_required=True,
),
required=True,
),
NestedField(
field_id=6,
name="location",
field_type=ListType(
type="list",
element_id=13,
element_type=StructType(
NestedField(field_id=14, name="latitude", field_type=FloatType(), required=False),
NestedField(field_id=15, name="longitude", field_type=FloatType(), required=False),
),
element_required=True,
),
required=True,
),
NestedField(
field_id=7,
name="person",
field_type=StructType(
NestedField(field_id=16, name="name", field_type=StringType(), required=False),
NestedField(field_id=17, name="age", field_type=IntegerType(), required=True),
),
required=False,
),
schema_id=0,
identifier_field_ids=[2],
)
],
current_schema_id=0,
partition_specs=[PartitionSpec(PartitionField(source_id=2, field_id=1000, transform=IdentityTransform(), name="bar"))],
default_spec_id=0,
last_partition_id=1000,
properties={},
current_snapshot_id=None,
snapshots=[],
snapshot_log=[],
metadata_log=[],
sort_orders=[
SortOrder(
SortField(
source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST
),
order_id=1,
)
],
default_sort_order_id=1,
refs={},
format_version=2,
last_sequence_number=0,
)
assert actual.model_dump() == expected.model_dump()