blob: d74aa3234cffcf467bef50b10493555a74d24b93 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
from pyiceberg.schema import Schema
from pyiceberg.table.name_mapping import MappedField, NameMapping, create_mapping_from_schema, parse_mapping_from_json
@pytest.fixture(scope="session")
def table_name_mapping_nested() -> NameMapping:
return NameMapping([
MappedField(field_id=1, names=['foo']),
MappedField(field_id=2, names=['bar']),
MappedField(field_id=3, names=['baz']),
MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]),
MappedField(
field_id=6,
names=['quux'],
fields=[
MappedField(field_id=7, names=['key']),
MappedField(
field_id=8,
names=['value'],
fields=[
MappedField(field_id=9, names=['key']),
MappedField(field_id=10, names=['value']),
],
),
],
),
MappedField(
field_id=11,
names=['location'],
fields=[
MappedField(
field_id=12,
names=['element'],
fields=[
MappedField(field_id=13, names=['latitude']),
MappedField(field_id=14, names=['longitude']),
],
)
],
),
MappedField(
field_id=15,
names=['person'],
fields=[
MappedField(field_id=16, names=['name']),
MappedField(field_id=17, names=['age']),
],
),
])
def test_json_mapped_field_deserialization() -> None:
mapped_field = """{
"field-id": 1,
"names": ["id", "record_id"]
}
"""
assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field)
mapped_field_with_null_fields = """{
"field-id": 1,
"names": ["id", "record_id"],
"fields": null
}
"""
assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field_with_null_fields)
def test_json_name_mapping_deserialization() -> None:
name_mapping = """
[
{
"field-id": 1,
"names": [
"id",
"record_id"
]
},
{
"field-id": 2,
"names": [
"data"
]
},
{
"field-id": 3,
"names": [
"location"
],
"fields": [
{
"field-id": 4,
"names": [
"latitude",
"lat"
]
},
{
"field-id": 5,
"names": [
"longitude",
"long"
]
}
]
}
]
"""
assert parse_mapping_from_json(name_mapping) == NameMapping([
MappedField(field_id=1, names=['id', 'record_id']),
MappedField(field_id=2, names=['data']),
MappedField(
names=['location'],
field_id=3,
fields=[
MappedField(field_id=4, names=['latitude', 'lat']),
MappedField(field_id=5, names=['longitude', 'long']),
],
),
])
def test_json_serialization(table_name_mapping_nested: NameMapping) -> None:
assert (
table_name_mapping_nested.model_dump_json()
== """[{"field-id":1,"names":["foo"]},{"field-id":2,"names":["bar"]},{"field-id":3,"names":["baz"]},{"field-id":4,"names":["qux"],"fields":[{"field-id":5,"names":["element"]}]},{"field-id":6,"names":["quux"],"fields":[{"field-id":7,"names":["key"]},{"field-id":8,"names":["value"],"fields":[{"field-id":9,"names":["key"]},{"field-id":10,"names":["value"]}]}]},{"field-id":11,"names":["location"],"fields":[{"field-id":12,"names":["element"],"fields":[{"field-id":13,"names":["latitude"]},{"field-id":14,"names":["longitude"]}]}]},{"field-id":15,"names":["person"],"fields":[{"field-id":16,"names":["name"]},{"field-id":17,"names":["age"]}]}]"""
)
def test_name_mapping_to_string() -> None:
nm = NameMapping([
MappedField(field_id=1, names=['id', 'record_id']),
MappedField(field_id=2, names=['data']),
MappedField(
names=['location'],
field_id=3,
fields=[
MappedField(field_id=4, names=['lat', 'latitude']),
MappedField(field_id=5, names=['long', 'longitude']),
],
),
])
assert (
str(nm)
== """[
([id, record_id] -> 1)
([data] -> 2)
([location] -> 3 ([lat, latitude] -> 4), ([long, longitude] -> 5))
]"""
)
def test_mapping_from_schema(table_schema_nested: Schema, table_name_mapping_nested: NameMapping) -> None:
nm = create_mapping_from_schema(table_schema_nested)
assert nm == table_name_mapping_nested
def test_mapping_by_name(table_name_mapping_nested: NameMapping) -> None:
assert table_name_mapping_nested._field_by_name == {
'person.age': MappedField(field_id=17, names=['age']),
'person.name': MappedField(field_id=16, names=['name']),
'person': MappedField(
field_id=15,
names=['person'],
fields=[MappedField(field_id=16, names=['name']), MappedField(field_id=17, names=['age'])],
),
'location.element.longitude': MappedField(field_id=14, names=['longitude']),
'location.element.latitude': MappedField(field_id=13, names=['latitude']),
'location.element': MappedField(
field_id=12,
names=['element'],
fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])],
),
'location': MappedField(
field_id=11,
names=['location'],
fields=[
MappedField(
field_id=12,
names=['element'],
fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])],
)
],
),
'quux.value.value': MappedField(field_id=10, names=['value']),
'quux.value.key': MappedField(field_id=9, names=['key']),
'quux.value': MappedField(
field_id=8,
names=['value'],
fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])],
),
'quux.key': MappedField(field_id=7, names=['key']),
'quux': MappedField(
field_id=6,
names=['quux'],
fields=[
MappedField(field_id=7, names=['key']),
MappedField(
field_id=8,
names=['value'],
fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])],
),
],
),
'qux.element': MappedField(field_id=5, names=['element']),
'qux': MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]),
'baz': MappedField(field_id=3, names=['baz']),
'bar': MappedField(field_id=2, names=['bar']),
'foo': MappedField(field_id=1, names=['foo']),
}
def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None:
assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=['foo'])
assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=['latitude'])
assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=['latitude'])
assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=['latitude'])
with pytest.raises(ValueError, match="Could not find field with name: boom"):
table_name_mapping_nested.find("boom")