blob: 9dab35fa7b2f996172e554c438120d8a08089b4d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "iceberg/schema.h"
#include <memory>
#include <thread>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
template <typename... Args>
std::shared_ptr<iceberg::StructType> MakeStructType(Args&&... args) {
return std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{std::move(args)...});
}
template <typename... Args>
std::unique_ptr<iceberg::Schema> MakeSchema(Args&&... args) {
return std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{std::move(args)...});
}
TEST(SchemaTest, Basics) {
iceberg::SchemaField field1(5, "foo", iceberg::int32(), true);
iceberg::SchemaField field2(7, "bar", iceberg::string(), true);
iceberg::Schema schema({field1, field2}, 100);
ASSERT_EQ(schema, schema);
ASSERT_EQ(100, schema.schema_id());
std::span<const iceberg::SchemaField> fields = schema.fields();
ASSERT_EQ(2, fields.size());
ASSERT_EQ(field1, fields[0]);
ASSERT_EQ(field2, fields[1]);
ASSERT_THAT(schema.GetFieldById(5), ::testing::Optional(field1));
ASSERT_THAT(schema.GetFieldById(7), ::testing::Optional(field2));
ASSERT_THAT(schema.GetFieldByIndex(0), ::testing::Optional(field1));
ASSERT_THAT(schema.GetFieldByIndex(1), ::testing::Optional(field2));
ASSERT_THAT(schema.GetFieldByName("foo"), ::testing::Optional(field1));
ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2));
ASSERT_EQ(std::nullopt, schema.GetFieldById(0));
auto result = schema.GetFieldByIndex(2);
ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument));
ASSERT_THAT(result,
iceberg::HasErrorMessage("Invalid index 2 to get field from struct"));
result = schema.GetFieldByIndex(-1);
ASSERT_THAT(result, IsError(iceberg::ErrorKind::kInvalidArgument));
ASSERT_THAT(result,
iceberg::HasErrorMessage("Invalid index -1 to get field from struct"));
ASSERT_EQ(std::nullopt, schema.GetFieldByName("element"));
ASSERT_EQ(0, schema.IdentifierFieldIds().size());
auto identifier_field_names = schema.IdentifierFieldNames();
ASSERT_THAT(identifier_field_names, iceberg::IsOk());
ASSERT_THAT(identifier_field_names.value(), ::testing::IsEmpty());
}
TEST(SchemaTest, Equality) {
iceberg::SchemaField field1(5, "foo", iceberg::int32(), true);
iceberg::SchemaField field2(7, "bar", iceberg::string(), true);
iceberg::SchemaField field3(5, "foobar", iceberg::int32(), true);
iceberg::Schema schema1({field1, field2}, 100);
iceberg::Schema schema2({field1, field2}, 101);
iceberg::Schema schema3({field1}, 101);
iceberg::Schema schema4({field3, field2}, 101);
iceberg::Schema schema5({field1, field2}, 100);
ASSERT_EQ(schema1, schema1);
ASSERT_NE(schema1, schema2);
ASSERT_NE(schema2, schema1);
ASSERT_NE(schema1, schema3);
ASSERT_NE(schema3, schema1);
ASSERT_NE(schema1, schema4);
ASSERT_NE(schema4, schema1);
ASSERT_EQ(schema1, schema5);
ASSERT_EQ(schema5, schema1);
}
TEST(SchemaTest, IdentifierFields) {
using iceberg::ErrorKind;
using iceberg::Schema;
using iceberg::SchemaField;
// identifier fields without identifier fields
SchemaField field1(1, "id", iceberg::int32(), false);
SchemaField field2(2, "name", iceberg::string(), true);
SchemaField field3(3, "age", iceberg::float32(), false);
Schema schema({field1, field2}, 100);
// Schema with normal identifier fields
ICEBERG_UNWRAP_OR_FAIL(auto schema_with_pk,
Schema::Make({field1, field2}, 100, std::vector<int32_t>{1}));
ASSERT_THAT(schema_with_pk->IdentifierFieldIds(), testing::ElementsAre(1));
auto result = schema_with_pk->IdentifierFieldNames();
ASSERT_THAT(result, iceberg::IsOk());
EXPECT_THAT(result.value(), testing::ElementsAre("id"));
// Euqality check
EXPECT_NE(schema, *schema_with_pk);
auto schema_with_pk_other =
Schema::Make({field1, field2}, 100, std::vector<int32_t>{1});
ASSERT_THAT(schema_with_pk_other, iceberg::IsOk())
<< schema_with_pk_other.error().message;
EXPECT_EQ(*schema_with_pk, *schema_with_pk_other.value());
// Invalid identifier fields, identifier fields cannot be optional
auto res = Schema::Make({field1, field2}, 100, std::vector<int32_t>{2});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(res, iceberg::HasErrorMessage("not a required field"));
// Invalid identifier fields, identifier fields invalid type
res = Schema::Make({field1, field2, field3}, 100, std::vector<int32_t>{3});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(res, iceberg::HasErrorMessage("must not be float or double field"));
SchemaField field4(
4, "struct",
std::make_shared<iceberg::StructType>(std::vector<SchemaField>{field1, field2}),
false);
res = Schema::Make({field4}, 100, std::vector<int32_t>{4});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(res, iceberg::HasErrorMessage("not a primitive type field"));
// Invalid identifier fields, identifier fields cannot be nested in optional field
SchemaField field5(
4, "struct",
std::make_shared<iceberg::StructType>(std::vector<SchemaField>{field1, field2}),
true);
res = Schema::Make({field5}, 100, std::vector<int32_t>{1});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(res, iceberg::HasErrorMessage("must not be nested in optional field"));
// Invalid identifier fields, identifier fields cannot be nested in repeated field
SchemaField field6(5, "element", iceberg::int32(), false);
SchemaField field7(4, "list", std::make_shared<iceberg::ListType>(field6), true);
res = Schema::Make({field7}, 100, std::vector<int32_t>{5});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidArgument));
EXPECT_THAT(res, iceberg::HasErrorMessage("must not be nested in list"));
// Normal nested identifier fields
SchemaField field8(
4, "struct",
std::make_shared<iceberg::StructType>(std::vector<SchemaField>{field1, field2}),
false);
res = Schema::Make({field8}, 100, std::vector<int32_t>{1});
ASSERT_THAT(res, iceberg::IsOk());
EXPECT_THAT(res.value()->IdentifierFieldIds(), testing::ElementsAre(1));
ICEBERG_UNWRAP_OR_FAIL(auto identifier_field_names,
res.value()->IdentifierFieldNames());
EXPECT_THAT(identifier_field_names, testing::ElementsAre("struct.id"));
// Invalid identifier fields, identifier fields not found
res = Schema::Make({field1, field2}, 100, std::vector<std::string>{"not_exist"});
EXPECT_THAT(res, iceberg::IsError(ErrorKind::kInvalidSchema));
EXPECT_THAT(res, iceberg::HasErrorMessage("Cannot find identifier field"));
}
class BasicShortNameTest : public ::testing::Test {
protected:
void SetUp() override {
field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", iceberg::int32(), true);
field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", iceberg::string(), true);
field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", iceberg::int32(), true);
auto structtype = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
field4_ = std::make_unique<iceberg::SchemaField>(4, "element", structtype, false);
auto listype = std::make_shared<iceberg::ListType>(*field4_);
field5_ = std::make_unique<iceberg::SchemaField>(5, "key", iceberg::int32(), false);
field6_ = std::make_unique<iceberg::SchemaField>(6, "value", listype, false);
auto maptype = std::make_shared<iceberg::MapType>(*field5_, *field6_);
field7_ = std::make_unique<iceberg::SchemaField>(7, "Value", maptype, false);
schema_ =
std::make_unique<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field7_}, 1);
}
std::unique_ptr<iceberg::Schema> schema_;
std::unique_ptr<iceberg::SchemaField> field1_;
std::unique_ptr<iceberg::SchemaField> field2_;
std::unique_ptr<iceberg::SchemaField> field3_;
std::unique_ptr<iceberg::SchemaField> field4_;
std::unique_ptr<iceberg::SchemaField> field5_;
std::unique_ptr<iceberg::SchemaField> field6_;
std::unique_ptr<iceberg::SchemaField> field7_;
};
TEST_F(BasicShortNameTest, TestFindById) {
ASSERT_THAT(schema_->FindFieldById(7), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldById(6), ::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldById(5), ::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldById(4), ::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldById(3), ::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldById(10), ::testing::Optional(std::nullopt));
}
TEST_F(BasicShortNameTest, TestFindByName) {
ASSERT_THAT(schema_->FindFieldByName("Value"), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldByName("Value.value"), ::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("Value.key"), ::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.element"),
::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foobar"),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Bar"),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foo"),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.element.FoO"),
::testing::Optional(std::nullopt));
}
TEST_F(BasicShortNameTest, TestFindByNameCaseInsensitive) {
ASSERT_THAT(schema_->FindFieldByName("vALue", false), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldByName("vALue.VALUE", false),
::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("valUe.kEy", false),
::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.elEment", false),
::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.eLement.fOObar", false),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("valUe.vaLUe.elemEnt.Bar", false),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FOO", false),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FO", false),
::testing::Optional(std::nullopt));
}
TEST_F(BasicShortNameTest, TestFindByShortNameCaseInsensitive) {
ASSERT_THAT(schema_->FindFieldByName("vaLue.value.FOO", false),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.Bar", false),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR", false),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR.a", false),
::testing::Optional(std::nullopt));
}
class ComplexShortNameTest : public ::testing::Test {
protected:
void SetUp() override {
field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", iceberg::int32(), true);
field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", iceberg::string(), true);
field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", iceberg::int32(), true);
auto structtype = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
field4_ = std::make_unique<iceberg::SchemaField>(4, "element", structtype, false);
auto listype = std::make_shared<iceberg::ListType>(*field4_);
field5_ =
std::make_unique<iceberg::SchemaField>(5, "First_child", iceberg::int32(), false);
field6_ = std::make_unique<iceberg::SchemaField>(6, "Second_child", listype, false);
auto structtype2 = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*field5_, *field6_});
field7_ = std::make_unique<iceberg::SchemaField>(7, "key", iceberg::int32(), false);
field8_ = std::make_unique<iceberg::SchemaField>(8, "value", structtype2, false);
auto maptype = std::make_shared<iceberg::MapType>(*field7_, *field8_);
field9_ = std::make_unique<iceberg::SchemaField>(9, "Map", maptype, false);
schema_ =
std::make_unique<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field9_}, 1);
}
std::unique_ptr<iceberg::Schema> schema_;
std::unique_ptr<iceberg::SchemaField> field1_;
std::unique_ptr<iceberg::SchemaField> field2_;
std::unique_ptr<iceberg::SchemaField> field3_;
std::unique_ptr<iceberg::SchemaField> field4_;
std::unique_ptr<iceberg::SchemaField> field5_;
std::unique_ptr<iceberg::SchemaField> field6_;
std::unique_ptr<iceberg::SchemaField> field7_;
std::unique_ptr<iceberg::SchemaField> field8_;
std::unique_ptr<iceberg::SchemaField> field9_;
};
TEST_F(ComplexShortNameTest, TestFindById) {
ASSERT_THAT(schema_->FindFieldById(9), ::testing::Optional(*field9_));
ASSERT_THAT(schema_->FindFieldById(8), ::testing::Optional(*field8_));
ASSERT_THAT(schema_->FindFieldById(7), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldById(6), ::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldById(5), ::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldById(4), ::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldById(3), ::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldById(0), ::testing::Optional(std::nullopt));
}
TEST_F(ComplexShortNameTest, TestFindColumnNameById) {
ASSERT_THAT(schema_->FindColumnNameById(0), ::testing::Optional(std::nullopt));
ASSERT_THAT(schema_->FindColumnNameById(1),
::testing::Optional(std::string("Map.value.Second_child.element.Foo")));
ASSERT_THAT(schema_->FindColumnNameById(2),
::testing::Optional(std::string("Map.value.Second_child.element.Bar")));
ASSERT_THAT(schema_->FindColumnNameById(3),
::testing::Optional(std::string("Map.value.Second_child.element.Foobar")));
ASSERT_THAT(schema_->FindColumnNameById(4),
::testing::Optional(std::string("Map.value.Second_child.element")));
ASSERT_THAT(schema_->FindColumnNameById(5),
::testing::Optional(std::string("Map.value.First_child")));
ASSERT_THAT(schema_->FindColumnNameById(6),
::testing::Optional(std::string("Map.value.Second_child")));
ASSERT_THAT(schema_->FindColumnNameById(7),
::testing::Optional(std::string("Map.key")));
ASSERT_THAT(schema_->FindColumnNameById(8),
::testing::Optional(std::string("Map.value")));
ASSERT_THAT(schema_->FindColumnNameById(9), ::testing::Optional(std::string("Map")));
}
TEST_F(ComplexShortNameTest, TestFindByName) {
ASSERT_THAT(schema_->FindFieldByName("Map"), ::testing::Optional(*field9_));
ASSERT_THAT(schema_->FindFieldByName("Map.value"), ::testing::Optional(*field8_));
ASSERT_THAT(schema_->FindFieldByName("Map.key"), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child"),
::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.First_child"),
::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child.element"),
::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child.element.Foobar"),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child.element.Bar"),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child.element.Foo"),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("Map.value.Second_child.element.Fooo"),
::testing::Optional(std::nullopt));
}
TEST_F(ComplexShortNameTest, TestFindByNameCaseInsensitive) {
ASSERT_THAT(schema_->FindFieldByName("map", false), ::testing::Optional(*field9_));
ASSERT_THAT(schema_->FindFieldByName("map.vALUE", false),
::testing::Optional(*field8_));
ASSERT_THAT(schema_->FindFieldByName("map.Key", false), ::testing::Optional(*field7_));
ASSERT_THAT(schema_->FindFieldByName("map.Value.second_Child", false),
::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("map.Value.first_chIld", false),
::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("map.Value.second_child.Element", false),
::testing::Optional(*field4_));
ASSERT_THAT(schema_->FindFieldByName("map.Value.second_child.Element.foobar", false),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("map.VaLue.second_child.Element.bar", false),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("map.value.Second_child.Element.foo", false),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("map.value.Second_child.Element.fooo", false),
::testing::Optional(std::nullopt));
}
TEST_F(ComplexShortNameTest, TestFindByShortName) {
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child"),
::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("Map.First_child"), ::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child.Foobar"),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child.Bar"),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child.Foo"),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child.aaa"),
::testing::Optional(std::nullopt));
}
TEST_F(ComplexShortNameTest, TestFindByShortNameCaseInsensitive) {
ASSERT_THAT(schema_->FindFieldByName("map.second_child", false),
::testing::Optional(*field6_));
ASSERT_THAT(schema_->FindFieldByName("map.first_child", false),
::testing::Optional(*field5_));
ASSERT_THAT(schema_->FindFieldByName("map.second_child.foobar", false),
::testing::Optional(*field3_));
ASSERT_THAT(schema_->FindFieldByName("map.second_child.bar", false),
::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("map.second_child.foo", false),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("Map.Second_child.aaa", false),
::testing::Optional(std::nullopt));
}
class ComplexMapStructShortNameTest : public ::testing::Test {
protected:
void SetUp() override {
exp_inner_key_key_ =
std::make_unique<iceberg::SchemaField>(10, "inner_key", iceberg::int32(), false);
exp_inner_key_value_ = std::make_unique<iceberg::SchemaField>(
11, "inner_value", iceberg::int32(), false);
auto inner_struct_type_key_ = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*exp_inner_key_key_, *exp_inner_key_value_});
exp_inner_value_k_ =
std::make_unique<iceberg::SchemaField>(12, "inner_k", iceberg::int32(), false);
exp_inner_value_v_ =
std::make_unique<iceberg::SchemaField>(13, "inner_v", iceberg::int32(), false);
auto inner_struct_type_value_ = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*exp_inner_value_k_, *exp_inner_value_v_});
exp_key_struct_key_ =
std::make_unique<iceberg::SchemaField>(14, "key", iceberg::int32(), false);
exp_key_struct_value_ = std::make_unique<iceberg::SchemaField>(
15, "value", inner_struct_type_key_, false);
auto key_struct_type_ = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*exp_key_struct_key_, *exp_key_struct_value_});
exp_value_struct_key_ =
std::make_unique<iceberg::SchemaField>(16, "key", iceberg::int32(), false);
exp_value_struct_value_ = std::make_unique<iceberg::SchemaField>(
17, "value", inner_struct_type_value_, false);
auto value_struct_type_ =
std::make_shared<iceberg::StructType>(std::vector<iceberg::SchemaField>{
*exp_value_struct_key_, *exp_value_struct_value_});
exp_map_key_ =
std::make_unique<iceberg::SchemaField>(18, "key", key_struct_type_, false);
exp_map_value_ =
std::make_unique<iceberg::SchemaField>(19, "value", value_struct_type_, false);
auto map_type_ = std::make_shared<iceberg::MapType>(*exp_map_key_, *exp_map_value_);
exp_field_a_ = std::make_unique<iceberg::SchemaField>(20, "a", map_type_, false);
schema_ = std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{*exp_field_a_}, 1);
}
std::unique_ptr<iceberg::Schema> schema_;
std::unique_ptr<iceberg::SchemaField> exp_inner_key_key_;
std::unique_ptr<iceberg::SchemaField> exp_inner_key_value_;
std::unique_ptr<iceberg::SchemaField> exp_inner_value_k_;
std::unique_ptr<iceberg::SchemaField> exp_inner_value_v_;
std::unique_ptr<iceberg::SchemaField> exp_key_struct_key_;
std::unique_ptr<iceberg::SchemaField> exp_key_struct_value_;
std::unique_ptr<iceberg::SchemaField> exp_value_struct_key_;
std::unique_ptr<iceberg::SchemaField> exp_value_struct_value_;
std::unique_ptr<iceberg::SchemaField> exp_map_key_;
std::unique_ptr<iceberg::SchemaField> exp_map_value_;
std::unique_ptr<iceberg::SchemaField> exp_field_a_;
};
TEST_F(ComplexMapStructShortNameTest, TestFindById) {
ASSERT_THAT(schema_->FindFieldById(20), ::testing::Optional(*exp_field_a_));
ASSERT_THAT(schema_->FindFieldById(19), ::testing::Optional(*exp_map_value_));
ASSERT_THAT(schema_->FindFieldById(18), ::testing::Optional(*exp_map_key_));
ASSERT_THAT(schema_->FindFieldById(17), ::testing::Optional(*exp_value_struct_value_));
ASSERT_THAT(schema_->FindFieldById(16), ::testing::Optional(*exp_value_struct_key_));
ASSERT_THAT(schema_->FindFieldById(15), ::testing::Optional(*exp_key_struct_value_));
ASSERT_THAT(schema_->FindFieldById(14), ::testing::Optional(*exp_key_struct_key_));
ASSERT_THAT(schema_->FindFieldById(13), ::testing::Optional(*exp_inner_value_v_));
ASSERT_THAT(schema_->FindFieldById(12), ::testing::Optional(*exp_inner_value_k_));
ASSERT_THAT(schema_->FindFieldById(11), ::testing::Optional(*exp_inner_key_value_));
ASSERT_THAT(schema_->FindFieldById(10), ::testing::Optional(*exp_inner_key_key_));
}
TEST_F(ComplexMapStructShortNameTest, TestFindByName) {
ASSERT_THAT(schema_->FindFieldByName("a"), ::testing::Optional(*exp_field_a_));
ASSERT_THAT(schema_->FindFieldByName("a.key"), ::testing::Optional(*exp_map_key_));
ASSERT_THAT(schema_->FindFieldByName("a.value"), ::testing::Optional(*exp_map_value_));
ASSERT_THAT(schema_->FindFieldByName("a.key.key"),
::testing::Optional(*exp_key_struct_key_));
ASSERT_THAT(schema_->FindFieldByName("a.key.value"),
::testing::Optional(*exp_key_struct_value_));
ASSERT_THAT(schema_->FindFieldByName("a.key.value.inner_key"),
::testing::Optional(*exp_inner_key_key_));
ASSERT_THAT(schema_->FindFieldByName("a.key.value.inner_value"),
::testing::Optional(*exp_inner_key_value_));
ASSERT_THAT(schema_->FindFieldByName("a.value.key"),
::testing::Optional(*exp_value_struct_key_));
ASSERT_THAT(schema_->FindFieldByName("a.value.value"),
::testing::Optional(*exp_value_struct_value_));
ASSERT_THAT(schema_->FindFieldByName("a.value.value.inner_k"),
::testing::Optional(*exp_inner_value_k_));
ASSERT_THAT(schema_->FindFieldByName("a.value.value.inner_v"),
::testing::Optional(*exp_inner_value_v_));
}
TEST_F(ComplexMapStructShortNameTest, TestFindByNameCaseInsensitive) {
ASSERT_THAT(schema_->FindFieldByName("A", false), ::testing::Optional(*exp_field_a_));
ASSERT_THAT(schema_->FindFieldByName("A.KEY", false),
::testing::Optional(*exp_map_key_));
ASSERT_THAT(schema_->FindFieldByName("A.VALUE", false),
::testing::Optional(*exp_map_value_));
ASSERT_THAT(schema_->FindFieldByName("A.KEY.KEY", false),
::testing::Optional(*exp_key_struct_key_));
ASSERT_THAT(schema_->FindFieldByName("A.KEY.VALUE", false),
::testing::Optional(*exp_key_struct_value_));
ASSERT_THAT(schema_->FindFieldByName("A.KEY.VALUE.INNER_KEY", false),
::testing::Optional(*exp_inner_key_key_));
ASSERT_THAT(schema_->FindFieldByName("A.KEY.VALUE.INNER_VALUE", false),
::testing::Optional(*exp_inner_key_value_));
ASSERT_THAT(schema_->FindFieldByName("A.VALUE.KEY", false),
::testing::Optional(*exp_value_struct_key_));
ASSERT_THAT(schema_->FindFieldByName("A.VALUE.VALUE", false),
::testing::Optional(*exp_value_struct_value_));
ASSERT_THAT(schema_->FindFieldByName("A.VALUE.VALUE.INNER_K", false),
::testing::Optional(*exp_inner_value_k_));
ASSERT_THAT(schema_->FindFieldByName("A.VALUE.VALUE.INNER_V", false),
::testing::Optional(*exp_inner_value_v_));
}
TEST_F(ComplexMapStructShortNameTest, TestInvalidPaths) {
ASSERT_THAT(schema_->FindFieldByName("a.invalid"), ::testing::Optional(std::nullopt));
ASSERT_THAT(schema_->FindFieldByName("a.key.invalid"),
::testing::Optional(std::nullopt));
ASSERT_THAT(schema_->FindFieldByName("a.value.invalid"),
::testing::Optional(std::nullopt));
ASSERT_THAT(schema_->FindFieldByName("A.KEY.VALUE.INVALID", false),
::testing::Optional(std::nullopt));
}
TEST(SchemaTest, DuplicatePathErrorCaseSensitive) {
auto nested_b = std::make_unique<iceberg::SchemaField>(2, "b", iceberg::int32(), false);
auto nested_struct =
std::make_shared<iceberg::StructType>(std::vector<iceberg::SchemaField>{*nested_b});
auto a = std::make_unique<iceberg::SchemaField>(1, "a", nested_struct, false);
auto duplicate_ab =
std::make_unique<iceberg::SchemaField>(3, "a.b", iceberg::int32(), false);
auto schema = std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{*a, *duplicate_ab}, 1);
auto result = schema->FindFieldByName("a.b", /*case_sensitive=*/true);
ASSERT_FALSE(result.has_value());
EXPECT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema);
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate path found: a.b, prev id: 2, curr id: 3"));
}
TEST(SchemaTest, DuplicatePathErrorCaseInsensitive) {
auto nested_b = std::make_unique<iceberg::SchemaField>(2, "B", iceberg::int32(), false);
auto nested_struct =
std::make_shared<iceberg::StructType>(std::vector<iceberg::SchemaField>{*nested_b});
auto a = std::make_unique<iceberg::SchemaField>(1, "A", nested_struct, false);
auto duplicate_ab =
std::make_unique<iceberg::SchemaField>(3, "a.b", iceberg::int32(), false);
auto schema = std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{*a, *duplicate_ab}, 1);
auto result = schema->FindFieldByName("A.B", /*case_sensitive=*/false);
ASSERT_FALSE(result.has_value());
EXPECT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema);
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate path found: a.b, prev id: 2, curr id: 3"));
}
TEST(SchemaTest, NestedDuplicateFieldIdError) {
// Outer struct with field ID 1
auto outer_field =
std::make_unique<iceberg::SchemaField>(1, "outer", iceberg::int32(), true);
// Inner struct with duplicate field ID 1
auto inner_field =
std::make_unique<iceberg::SchemaField>(1, "inner", iceberg::string(), true);
auto inner_struct = std::make_shared<iceberg::StructType>(
std::vector<iceberg::SchemaField>{*inner_field});
// Nested field with inner struct
auto nested_field =
std::make_unique<iceberg::SchemaField>(2, "nested", inner_struct, true);
// Schema with outer and nested fields
auto schema = std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{*outer_field, *nested_field}, 1);
// Attempt to find a field, which should trigger duplicate ID detection
auto result = schema->FindFieldById(1);
ASSERT_FALSE(result.has_value());
EXPECT_EQ(result.error().kind, iceberg::ErrorKind::kInvalidSchema);
EXPECT_THAT(result.error().message,
::testing::HasSubstr("Duplicate field id found: 1"));
}
namespace {
iceberg::SchemaField Id() { return {1, "id", iceberg::int32(), true}; }
iceberg::SchemaField Name() { return {2, "name", iceberg::string(), false}; }
iceberg::SchemaField Age() { return {3, "age", iceberg::int32(), true}; }
iceberg::SchemaField Email() { return {4, "email", iceberg::string(), true}; }
iceberg::SchemaField Street() { return {11, "street", iceberg::string(), true}; }
iceberg::SchemaField City() { return {12, "city", iceberg::string(), true}; }
iceberg::SchemaField Zip() { return {13, "zip", iceberg::int32(), true}; }
iceberg::SchemaField Theme() { return {24, "theme", iceberg::string(), true}; }
iceberg::SchemaField Key() { return {31, "key", iceberg::int32(), false}; }
iceberg::SchemaField Value() { return {32, "value", iceberg::string(), false}; }
iceberg::SchemaField Element() { return {41, "element", iceberg::string(), false}; }
static std::unique_ptr<iceberg::Schema> BasicSchema() {
return MakeSchema(Id(), Name(), Age(), Email());
}
static std::unique_ptr<iceberg::Schema> AddressSchema() {
auto address_type = MakeStructType(Street(), City(), Zip());
auto address_field = iceberg::SchemaField{14, "address", std::move(address_type), true};
return MakeSchema(Id(), Name(), std::move(address_field));
}
static std::unique_ptr<iceberg::Schema> NestedUserSchema() {
auto address_type = MakeStructType(Street(), City());
auto address_field = iceberg::SchemaField{16, "address", std::move(address_type), true};
auto user_type = MakeStructType(Name(), address_field);
auto user_field = iceberg::SchemaField{17, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
}
static std::unique_ptr<iceberg::Schema> MultiLevelSchema() {
auto profile_type = MakeStructType(Name(), Age());
auto profile_field = iceberg::SchemaField{23, "profile", std::move(profile_type), true};
auto settings_type = MakeStructType(Theme());
auto settings_field =
iceberg::SchemaField{25, "settings", std::move(settings_type), true};
auto user_type = MakeStructType(profile_field, settings_field);
auto user_field = iceberg::SchemaField{26, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
}
static std::unique_ptr<iceberg::Schema> ListSchema() {
auto list_type = std::make_shared<iceberg::ListType>(Element());
auto tags_field = iceberg::SchemaField{42, "tags", std::move(list_type), true};
auto user_type = MakeStructType(Name(), Age());
auto user_field = iceberg::SchemaField{45, "user", std::move(user_type), true};
return MakeSchema(Id(), tags_field, user_field);
}
static std::unique_ptr<iceberg::Schema> MapSchema() {
auto map_type = std::make_shared<iceberg::MapType>(Key(), Value());
auto map_field = iceberg::SchemaField{33, "map_field", std::move(map_type), true};
return MakeSchema(map_field);
}
static std::unique_ptr<iceberg::Schema> ListWithStructElementSchema() {
auto struct_type = MakeStructType(Name(), Age());
auto element_field = iceberg::SchemaField{53, "element", std::move(struct_type), false};
auto list_type = std::make_shared<iceberg::ListType>(element_field);
auto list_field = iceberg::SchemaField{54, "list_field", std::move(list_type), true};
return MakeSchema(list_field);
}
static std::unique_ptr<iceberg::Schema> ListOfMapSchema() {
auto map_value_struct = MakeStructType(Name(), Age());
auto map_value_field =
iceberg::SchemaField{64, "value", std::move(map_value_struct), false};
auto map_type = std::make_shared<iceberg::MapType>(Key(), map_value_field);
auto list_element = iceberg::SchemaField{65, "element", std::move(map_type), false};
auto list_type = std::make_shared<iceberg::ListType>(list_element);
auto list_field = iceberg::SchemaField{66, "list_field", std::move(list_type), true};
return MakeSchema(list_field);
}
static std::unique_ptr<iceberg::Schema> ComplexMapSchema() {
auto key_id_field = iceberg::SchemaField{71, "id", iceberg::int32(), false};
auto key_name_field = iceberg::SchemaField{72, "name", iceberg::string(), false};
auto key_struct = MakeStructType(key_id_field, key_name_field);
auto key_field = iceberg::SchemaField{73, "key", std::move(key_struct), false};
auto value_id_field = iceberg::SchemaField{74, "id", iceberg::int32(), false};
auto value_name_field = iceberg::SchemaField{75, "name", iceberg::string(), false};
auto value_struct = MakeStructType(value_id_field, value_name_field);
auto value_field = iceberg::SchemaField{76, "value", std::move(value_struct), false};
auto map_type = std::make_shared<iceberg::MapType>(key_field, value_field);
auto map_field = iceberg::SchemaField{77, "map_field", std::move(map_type), true};
return MakeSchema(map_field);
}
} // namespace
struct SelectTestParam {
std::string test_name;
std::function<std::unique_ptr<iceberg::Schema>()> create_schema;
std::vector<std::string> select_fields;
std::function<std::unique_ptr<iceberg::Schema>()> expected_schema;
bool should_succeed;
std::string expected_error_message;
bool case_sensitive = true;
};
class SelectParamTest : public ::testing::TestWithParam<SelectTestParam> {};
TEST_P(SelectParamTest, SelectFields) {
const auto& param = GetParam();
auto input_schema = param.create_schema();
auto result = input_schema->Select(param.select_fields, param.case_sensitive);
if (param.should_succeed) {
ASSERT_TRUE(result.has_value());
ASSERT_EQ(*result.value(), *param.expected_schema());
} else {
ASSERT_FALSE(result.has_value());
ASSERT_THAT(result, iceberg::IsError(iceberg::ErrorKind::kInvalidArgument));
ASSERT_THAT(result, iceberg::HasErrorMessage(param.expected_error_message));
}
}
INSTANTIATE_TEST_SUITE_P(
SelectTestCases, SelectParamTest,
::testing::Values(
SelectTestParam{.test_name = "SelectAllColumns",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"*"},
.expected_schema = []() { return BasicSchema(); },
.should_succeed = true},
SelectTestParam{.test_name = "SelectSingleField",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"name"},
.expected_schema = []() { return MakeSchema(Name()); },
.should_succeed = true},
SelectTestParam{
.test_name = "SelectMultipleFields",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"id", "name", "age"},
.expected_schema = []() { return MakeSchema(Id(), Name(), Age()); },
.should_succeed = true},
SelectTestParam{.test_name = "SelectNonExistentField",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"nonexistent"},
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true},
SelectTestParam{.test_name = "SelectCaseSensitive",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"Name"}, // case-sensitive
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true},
SelectTestParam{.test_name = "SelectCaseInsensitive",
.create_schema = []() { return BasicSchema(); },
.select_fields = {"Name"}, // case-insensitive
.expected_schema = []() { return MakeSchema(Name()); },
.should_succeed = true,
.case_sensitive = false}));
INSTANTIATE_TEST_SUITE_P(
SelectNestedTestCases, SelectParamTest,
::testing::Values(SelectTestParam{
.test_name = "SelectTopLevelFields",
.create_schema = []() { return AddressSchema(); },
.select_fields = {"id", "name"},
.expected_schema = []() { return MakeSchema(Id(), Name()); },
.should_succeed = true},
SelectTestParam{.test_name = "SelectNestedField",
.create_schema = []() { return AddressSchema(); },
.select_fields = {"address.street"},
.expected_schema =
[]() {
auto address_type = MakeStructType(Street());
auto address_field = iceberg::SchemaField{
14, "address", std::move(address_type),
true};
return MakeSchema(address_field);
},
.should_succeed = true}));
INSTANTIATE_TEST_SUITE_P(
SelectMultiLevelTestCases, SelectParamTest,
::testing::Values(
SelectTestParam{.test_name = "SelectTopLevelAndNestedFields",
.create_schema = []() { return NestedUserSchema(); },
.select_fields = {"id", "user.name", "user.address.street"},
.expected_schema =
[]() {
auto address_type = MakeStructType(Street());
auto address_field = iceberg::SchemaField{
16, "address", std::move(address_type), true};
auto user_type = MakeStructType(Name(), address_field);
auto user_field = iceberg::SchemaField{
17, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
},
.should_succeed = true},
SelectTestParam{.test_name = "SelectNestedFieldsAtDifferentLevels",
.create_schema = []() { return MultiLevelSchema(); },
.select_fields = {"user.profile.name", "user.settings.theme"},
.expected_schema =
[]() {
auto profile_type = MakeStructType(Name());
auto profile_field = iceberg::SchemaField{
23, "profile", std::move(profile_type), true};
auto settings_type = MakeStructType(Theme());
auto settings_field = iceberg::SchemaField{
25, "settings", std::move(settings_type), true};
auto user_type =
MakeStructType(profile_field, settings_field);
auto user_field = iceberg::SchemaField{
26, "user", std::move(user_type), true};
return MakeSchema(user_field);
},
.should_succeed = true},
SelectTestParam{.test_name = "SelectListAndNestedFields",
.create_schema = []() { return ListSchema(); },
.select_fields = {"id", "user.name"},
.expected_schema =
[]() {
auto user_type = MakeStructType(Name());
auto user_field = iceberg::SchemaField{
45, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
},
.should_succeed = true}));
struct ProjectTestParam {
std::string test_name;
std::function<std::unique_ptr<iceberg::Schema>()> create_schema;
std::unordered_set<int32_t> selected_ids;
std::function<std::unique_ptr<iceberg::Schema>()> expected_schema;
bool should_succeed;
std::string expected_error_message;
};
class ProjectParamTest : public ::testing::TestWithParam<ProjectTestParam> {};
TEST_P(ProjectParamTest, ProjectFields) {
const auto& param = GetParam();
auto input_schema = param.create_schema();
auto result = input_schema->Project(param.selected_ids);
if (param.should_succeed) {
ASSERT_TRUE(result.has_value());
ASSERT_EQ(*result.value(), *param.expected_schema());
} else {
ASSERT_FALSE(result.has_value());
ASSERT_THAT(result, iceberg::IsError(iceberg::ErrorKind::kInvalidArgument));
ASSERT_THAT(result, iceberg::HasErrorMessage(param.expected_error_message));
}
}
INSTANTIATE_TEST_SUITE_P(
ProjectTestCases, ProjectParamTest,
::testing::Values(ProjectTestParam{.test_name = "ProjectAllFields",
.create_schema = []() { return BasicSchema(); },
.selected_ids = {1, 2, 3, 4},
.expected_schema = []() { return BasicSchema(); },
.should_succeed = true},
ProjectTestParam{
.test_name = "ProjectSingleField",
.create_schema = []() { return BasicSchema(); },
.selected_ids = {2},
.expected_schema = []() { return MakeSchema(Name()); },
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectNonExistentFieldId",
.create_schema = []() { return BasicSchema(); },
.selected_ids = {999},
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectEmptySelection",
.create_schema = []() { return BasicSchema(); },
.selected_ids = {},
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true}));
INSTANTIATE_TEST_SUITE_P(ProjectNestedTestCases, ProjectParamTest,
::testing::Values(ProjectTestParam{
.test_name = "ProjectNestedStructField",
.create_schema = []() { return AddressSchema(); },
.selected_ids = {11},
.expected_schema =
[]() {
auto address_type = MakeStructType(Street());
auto address_field = iceberg::SchemaField{
14, "address", std::move(address_type), true};
return MakeSchema(address_field);
},
.should_succeed = true}));
INSTANTIATE_TEST_SUITE_P(
ProjectMultiLevelTestCases, ProjectParamTest,
::testing::Values(
ProjectTestParam{.test_name = "ProjectTopLevelAndNestedFields",
.create_schema = []() { return NestedUserSchema(); },
.selected_ids = {1, 2, 11},
.expected_schema =
[]() {
auto address_type = MakeStructType(Street());
auto address_field = iceberg::SchemaField{
16, "address", std::move(address_type), true};
auto user_type = MakeStructType(Name(), address_field);
auto user_field = iceberg::SchemaField{
17, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
},
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectNestedFieldsAtDifferentLevels",
.create_schema = []() { return MultiLevelSchema(); },
.selected_ids = {2, 24},
.expected_schema =
[]() {
auto profile_type = MakeStructType(Name());
auto profile_field = iceberg::SchemaField{
23, "profile", std::move(profile_type), true};
auto settings_type = MakeStructType(Theme());
auto settings_field = iceberg::SchemaField{
25, "settings", std::move(settings_type), true};
auto user_type =
MakeStructType(profile_field, settings_field);
auto user_field = iceberg::SchemaField{
26, "user", std::move(user_type), true};
return MakeSchema(user_field);
},
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectListAndNestedFields",
.create_schema = []() { return ListSchema(); },
.selected_ids = {1, 2},
.expected_schema =
[]() {
auto user_type = MakeStructType(Name());
auto user_field = iceberg::SchemaField{
45, "user", std::move(user_type), true};
return MakeSchema(Id(), user_field);
},
.should_succeed = true}));
INSTANTIATE_TEST_SUITE_P(
ProjectMapErrorTestCases, ProjectParamTest,
::testing::Values(ProjectTestParam{
.test_name = "ProjectMapWithOnlyKey",
.create_schema = []() { return MapSchema(); },
.selected_ids = {31}, // Only select key field, not value field
.expected_schema = []() { return nullptr; },
.should_succeed = false,
.expected_error_message = "Cannot project Map without value field"}));
INSTANTIATE_TEST_SUITE_P(
ProjectListAndMapTestCases, ProjectParamTest,
::testing::Values(
ProjectTestParam{.test_name = "ProjectListElement",
.create_schema = []() { return ListWithStructElementSchema(); },
.selected_ids = {2}, // Only select name field from list element
.expected_schema =
[]() {
auto struct_type = MakeStructType(Name());
auto element_field = iceberg::SchemaField{
53, "element", std::move(struct_type), false};
auto list_type =
std::make_shared<iceberg::ListType>(element_field);
auto list_field = iceberg::SchemaField{
54, "list_field", std::move(list_type), true};
return MakeSchema(list_field);
},
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectListOfMap",
.create_schema = []() { return ListOfMapSchema(); },
.selected_ids = {2, 3},
.expected_schema =
[]() {
auto map_value_struct = MakeStructType(Name(), Age());
auto map_value_field = iceberg::SchemaField{
64, "value", std::move(map_value_struct), false};
auto map_type = std::make_shared<iceberg::MapType>(
Key(), map_value_field);
auto list_element = iceberg::SchemaField{
65, "element", std::move(map_type), false};
auto list_type =
std::make_shared<iceberg::ListType>(list_element);
auto list_field = iceberg::SchemaField{
66, "list_field", std::move(list_type), true};
return MakeSchema(list_field);
},
.should_succeed = true},
ProjectTestParam{
.test_name = "ProjectMapKeyAndValue",
.create_schema = []() { return ComplexMapSchema(); },
.selected_ids = {71, 74},
.expected_schema =
[]() {
auto key_id_field =
iceberg::SchemaField{71, "id", iceberg::int32(), false};
auto key_struct = MakeStructType(key_id_field);
auto key_field =
iceberg::SchemaField{73, "key", std::move(key_struct), false};
auto value_id_field =
iceberg::SchemaField{74, "id", iceberg::int32(), false};
auto value_struct = MakeStructType(value_id_field);
auto value_field =
iceberg::SchemaField{76, "value", std::move(value_struct), false};
auto map_type =
std::make_shared<iceberg::MapType>(key_field, value_field);
auto map_field =
iceberg::SchemaField{77, "map_field", std::move(map_type), true};
return MakeSchema(map_field);
},
.should_succeed = true},
ProjectTestParam{.test_name = "ProjectEmptyResult",
.create_schema = []() { return BasicSchema(); },
.selected_ids = {999}, // Select non-existent field
.expected_schema = []() { return MakeSchema(); },
.should_succeed = true}));
class SchemaThreadSafetyTest : public ::testing::Test {
protected:
void SetUp() override {
field1_ = std::make_unique<iceberg::SchemaField>(1, "id", iceberg::int32(), true);
field2_ = std::make_unique<iceberg::SchemaField>(2, "name", iceberg::string(), true);
field3_ = std::make_unique<iceberg::SchemaField>(3, "age", iceberg::int32(), true);
schema_ = std::make_unique<iceberg::Schema>(
std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_}, 100);
}
std::unique_ptr<iceberg::Schema> schema_;
std::unique_ptr<iceberg::SchemaField> field1_;
std::unique_ptr<iceberg::SchemaField> field2_;
std::unique_ptr<iceberg::SchemaField> field3_;
};
TEST_F(SchemaThreadSafetyTest, ConcurrentFindFieldById) {
const int num_threads = 10;
const int iterations_per_thread = 100;
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([this, iterations_per_thread]() {
for (int j = 0; j < iterations_per_thread; ++j) {
ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldById(999), ::testing::Optional(std::nullopt));
}
});
}
for (auto& thread : threads) {
thread.join();
}
}
TEST_F(SchemaThreadSafetyTest, MixedConcurrentOperations) {
const int num_threads = 8;
const int iterations_per_thread = 50;
std::vector<std::thread> threads;
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([this, iterations_per_thread, i]() {
for (int j = 0; j < iterations_per_thread; ++j) {
if (i % 4 == 0) {
ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
} else if (i % 4 == 1) {
ASSERT_THAT(schema_->FindFieldByName("name", true),
::testing::Optional(*field2_));
} else if (i % 4 == 2) {
ASSERT_THAT(schema_->FindFieldByName("AGE", false),
::testing::Optional(*field3_));
} else {
ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(*field2_));
ASSERT_THAT(schema_->FindFieldByName("id", true),
::testing::Optional(*field1_));
ASSERT_THAT(schema_->FindFieldByName("age", false),
::testing::Optional(*field3_));
}
}
});
}
for (auto& thread : threads) {
thread.join();
}
}