blob: 4146cb01f961fc04a55b09ab68d1c53b65f864e5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <memory>
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "iceberg/partition_spec.h"
#include "iceberg/result.h"
#include "iceberg/schema.h"
#include "iceberg/snapshot.h"
#include "iceberg/sort_field.h"
#include "iceberg/sort_order.h"
#include "iceberg/table_metadata.h"
#include "iceberg/table_properties.h"
#include "iceberg/table_update.h"
#include "iceberg/test/matchers.h"
#include "iceberg/transform.h"
#include "iceberg/type.h"
namespace iceberg {
namespace {
// Helper function to create a simple schema for testing
std::shared_ptr<Schema> CreateTestSchema() {
auto field1 = SchemaField::MakeRequired(1, "id", int32());
auto field2 = SchemaField::MakeRequired(2, "data", string());
auto field3 = SchemaField::MakeRequired(3, "ts", timestamp());
return std::make_shared<Schema>(std::vector<SchemaField>{field1, field2, field3}, 0);
}
// Helper function to create a simple schema with disordered field_ids
Result<std::unique_ptr<Schema>> CreateDisorderedSchema() {
auto field1 = SchemaField::MakeRequired(2, "id", int32());
auto field2 = SchemaField::MakeRequired(5, "part_col", string());
auto field3 = SchemaField::MakeRequired(8, "sort_col", timestamp());
return Schema::Make(std::vector<SchemaField>{field1, field2, field3},
/*schema_id=*/1,
/*identifier_field_ids=*/std::vector<int32_t>{2});
}
// Helper function to create base metadata for tests
std::unique_ptr<TableMetadata> CreateBaseMetadata(
std::shared_ptr<PartitionSpec> spec = nullptr) {
auto metadata = std::make_unique<TableMetadata>();
metadata->format_version = 2;
metadata->table_uuid = "test-uuid-1234";
metadata->location = "s3://bucket/test";
metadata->last_sequence_number = 0;
metadata->last_updated_ms = TimePointMs{std::chrono::milliseconds(1000)};
metadata->last_column_id = 3;
metadata->current_schema_id = 0;
metadata->schemas.push_back(CreateTestSchema());
if (spec == nullptr) {
metadata->partition_specs.push_back(PartitionSpec::Unpartitioned());
metadata->default_spec_id = PartitionSpec::kInitialSpecId;
} else {
metadata->default_spec_id = spec->spec_id();
metadata->partition_specs.push_back(std::move(spec));
}
metadata->last_partition_id = 0;
metadata->current_snapshot_id = kInvalidSnapshotId;
metadata->default_sort_order_id = SortOrder::kUnsortedOrderId;
metadata->sort_orders.push_back(SortOrder::Unsorted());
metadata->next_row_id = TableMetadata::kInitialRowId;
return metadata;
}
} // namespace
// test for TableMetadata
TEST(TableMetadataTest, Make) {
ICEBERG_UNWRAP_OR_FAIL(auto Schema, CreateDisorderedSchema());
ICEBERG_UNWRAP_OR_FAIL(
auto spec, PartitionSpec::Make(1, std::vector<PartitionField>{PartitionField(
5, 1, "part_name", Transform::Identity())}));
ICEBERG_UNWRAP_OR_FAIL(
auto order, SortOrder::Make(1, std::vector<SortField>{SortField(
8, Transform::Identity(),
SortDirection::kAscending, NullOrder::kLast)}));
ICEBERG_UNWRAP_OR_FAIL(
auto metadata, TableMetadata::Make(*Schema, *spec, *order, "s3://bucket/test", {}));
// Check schema fields
ASSERT_EQ(1, metadata->schemas.size());
auto fields = metadata->schemas[0]->fields() | std::ranges::to<std::vector>();
ASSERT_EQ(3, fields.size());
EXPECT_EQ(1, fields[0].field_id());
EXPECT_EQ("id", fields[0].name());
EXPECT_EQ(2, fields[1].field_id());
EXPECT_EQ("part_col", fields[1].name());
EXPECT_EQ(3, fields[2].field_id());
EXPECT_EQ("sort_col", fields[2].name());
const auto& identifier_field_ids = metadata->schemas[0]->IdentifierFieldIds();
ASSERT_EQ(1, identifier_field_ids.size());
EXPECT_EQ(1, identifier_field_ids[0]);
// Check partition spec
ASSERT_EQ(1, metadata->partition_specs.size());
EXPECT_EQ(PartitionSpec::kInitialSpecId, metadata->partition_specs[0]->spec_id());
auto spec_fields =
metadata->partition_specs[0]->fields() | std::ranges::to<std::vector>();
ASSERT_EQ(1, spec_fields.size());
EXPECT_EQ(PartitionSpec::kInvalidPartitionFieldId + 1, spec_fields[0].field_id());
EXPECT_EQ(2, spec_fields[0].source_id());
EXPECT_EQ("part_name", spec_fields[0].name());
// Check sort order
ASSERT_EQ(1, metadata->sort_orders.size());
EXPECT_EQ(SortOrder::kInitialSortOrderId, metadata->sort_orders[0]->order_id());
auto order_fields = metadata->sort_orders[0]->fields() | std::ranges::to<std::vector>();
ASSERT_EQ(1, order_fields.size());
EXPECT_EQ(3, order_fields[0].source_id());
EXPECT_EQ(SortDirection::kAscending, order_fields[0].direction());
EXPECT_EQ(NullOrder::kLast, order_fields[0].null_order());
}
TEST(TableMetadataTest, MakeWithInvalidPartitionSpec) {
ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema());
ICEBERG_UNWRAP_OR_FAIL(
auto spec, PartitionSpec::Make(1, std::vector<PartitionField>{PartitionField(
6, 1, "part_name", Transform::Identity())}));
ICEBERG_UNWRAP_OR_FAIL(
auto order, SortOrder::Make(1, std::vector<SortField>{SortField(
8, Transform::Identity(),
SortDirection::kAscending, NullOrder::kLast)}));
auto res = TableMetadata::Make(*schema, *spec, *order, "s3://bucket/test", {});
EXPECT_THAT(res, IsError(ErrorKind::kInvalidSchema));
EXPECT_THAT(res, HasErrorMessage("Cannot find source partition field"));
}
TEST(TableMetadataTest, MakeWithInvalidSortOrder) {
ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema());
ICEBERG_UNWRAP_OR_FAIL(
auto spec, PartitionSpec::Make(1, std::vector<PartitionField>{PartitionField(
5, 1, "part_name", Transform::Identity())}));
ICEBERG_UNWRAP_OR_FAIL(
auto order, SortOrder::Make(1, std::vector<SortField>{SortField(
9, Transform::Identity(),
SortDirection::kAscending, NullOrder::kLast)}));
auto res = TableMetadata::Make(*schema, *spec, *order, "s3://bucket/test", {});
EXPECT_THAT(res, IsError(ErrorKind::kInvalidSchema));
EXPECT_THAT(res, HasErrorMessage("Cannot find source sort field"));
}
TEST(TableMetadataTest, InvalidProperties) {
auto spec = PartitionSpec::Unpartitioned();
auto order = SortOrder::Unsorted();
{
// Invalid metrics config
ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema());
std::unordered_map<std::string, std::string> invlaid_metric_config = {
{std::string(TableProperties::kMetricModeColumnConfPrefix) + "unknown_col",
"value"}};
auto res = TableMetadata::Make(*schema, *spec, *order, "s3://bucket/test",
invlaid_metric_config);
EXPECT_THAT(res, IsError(ErrorKind::kValidationFailed));
EXPECT_THAT(res, HasErrorMessage("Invalid metrics config"));
}
{
// Invaid commit properties
ICEBERG_UNWRAP_OR_FAIL(auto schema, CreateDisorderedSchema());
std::unordered_map<std::string, std::string> invlaid_commit_properties = {
{TableProperties::kCommitNumRetries.key(), "-1"}};
auto res = TableMetadata::Make(*schema, *spec, *order, "s3://bucket/test",
invlaid_commit_properties);
EXPECT_THAT(res, IsError(ErrorKind::kValidationFailed));
EXPECT_THAT(res,
HasErrorMessage(std::format(
"Table property {} must have non negative integer value, but got {}",
TableProperties::kCommitNumRetries.key(), -1)));
}
}
// test construction of TableMetadataBuilder
TEST(TableMetadataBuilderTest, BuildFromEmpty) {
auto builder = TableMetadataBuilder::BuildFromEmpty(2);
ASSERT_NE(builder, nullptr);
auto schema = CreateTestSchema();
builder->SetCurrentSchema(schema, schema->HighestFieldId().value());
builder->SetDefaultSortOrder(SortOrder::Unsorted());
builder->SetDefaultPartitionSpec(PartitionSpec::Unpartitioned());
builder->AssignUUID("new-uuid-5678");
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_NE(metadata, nullptr);
EXPECT_EQ(metadata->format_version, 2);
EXPECT_EQ(metadata->last_sequence_number, TableMetadata::kInitialSequenceNumber);
EXPECT_EQ(metadata->default_spec_id, PartitionSpec::kInitialSpecId);
EXPECT_EQ(metadata->default_sort_order_id, SortOrder::kUnsortedOrderId);
EXPECT_EQ(metadata->current_snapshot_id, kInvalidSnapshotId);
EXPECT_TRUE(metadata->metadata_log.empty());
}
TEST(TableMetadataBuilderTest, BuildFromExisting) {
auto base = CreateBaseMetadata();
std::string base_metadata_location = "s3://bucket/test/00010-xxx.metadata.json";
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetPreviousMetadataLocation(base_metadata_location);
ASSERT_NE(builder, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_NE(metadata, nullptr);
EXPECT_EQ(metadata->format_version, 2);
EXPECT_EQ(metadata->table_uuid, "test-uuid-1234");
EXPECT_EQ(metadata->location, "s3://bucket/test");
ASSERT_EQ(1, metadata->metadata_log.size());
EXPECT_EQ(base_metadata_location, metadata->metadata_log[0].metadata_file);
EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[0].timestamp_ms);
}
TEST(TableMetadataBuilderTest, BuildupMetadataLog) {
auto base = CreateBaseMetadata();
std::string base_metadata_location = "s3://bucket/test/00010-xxx.metadata.json";
base->metadata_log = {
{.timestamp_ms = TimePointMs{std::chrono::milliseconds(100)},
.metadata_file = "s3://bucket/test/00000-aaa.metadata.json"},
{.timestamp_ms = TimePointMs{std::chrono::milliseconds(200)},
.metadata_file = "s3://bucket/test/00001-bbb.metadata.json"},
};
{
// Base metadata_log size less than max size
base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 3);
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetPreviousMetadataLocation(base_metadata_location);
ASSERT_NE(builder, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(3, metadata->metadata_log.size());
EXPECT_EQ(base->metadata_log[0].metadata_file,
metadata->metadata_log[0].metadata_file);
EXPECT_EQ(base->metadata_log[1].metadata_file,
metadata->metadata_log[1].metadata_file);
EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[2].timestamp_ms);
EXPECT_EQ(base_metadata_location, metadata->metadata_log[2].metadata_file);
}
{
// Base metadata_log size greater than max size
base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 2);
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetPreviousMetadataLocation(base_metadata_location);
ASSERT_NE(builder, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(2, metadata->metadata_log.size());
EXPECT_EQ(base->metadata_log[1].metadata_file,
metadata->metadata_log[0].metadata_file);
EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[1].timestamp_ms);
EXPECT_EQ(base_metadata_location, metadata->metadata_log[1].metadata_file);
}
}
// Test AssignUUID
TEST(TableMetadataBuilderTest, AssignUUID) {
// Assign UUID for new table
auto builder = TableMetadataBuilder::BuildFromEmpty(2);
auto schema = CreateTestSchema();
builder->SetCurrentSchema(schema, schema->HighestFieldId().value());
builder->SetDefaultSortOrder(SortOrder::Unsorted());
builder->SetDefaultPartitionSpec(PartitionSpec::Unpartitioned());
builder->AssignUUID("new-uuid-5678");
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->table_uuid, "new-uuid-5678");
// Update existing table's UUID
auto base = CreateBaseMetadata();
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->AssignUUID("updated-uuid-9999");
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->table_uuid, "updated-uuid-9999");
// Empty UUID should fail
builder = TableMetadataBuilder::BuildFromEmpty(2);
builder->AssignUUID("");
ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot assign empty UUID"));
// Assign same UUID (no-op)
base = CreateBaseMetadata();
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->AssignUUID("test-uuid-1234");
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->table_uuid, "test-uuid-1234");
// Auto-generate UUID
builder = TableMetadataBuilder::BuildFromEmpty(2);
builder->SetCurrentSchema(schema, schema->HighestFieldId().value());
builder->SetDefaultSortOrder(SortOrder::Unsorted());
builder->SetDefaultPartitionSpec(PartitionSpec::Unpartitioned());
builder->AssignUUID();
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_FALSE(metadata->table_uuid.empty());
// Case insensitive comparison
base = CreateBaseMetadata();
base->table_uuid = "TEST-UUID-ABCD";
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->AssignUUID("test-uuid-abcd"); // Different case - should be no-op
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->table_uuid, "TEST-UUID-ABCD"); // Original case preserved
}
TEST(TableMetadataBuilderTest, SetProperties) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetProperties({{"key1", "value1"}, {"key2", "value2"}});
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->properties.configs().size(), 2);
EXPECT_EQ(metadata->properties.configs().at("key1"), "value1");
EXPECT_EQ(metadata->properties.configs().at("key2"), "value2");
// Update existing property and add new one
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetProperties({{"key1", "value1"}});
builder->SetProperties({{"key1", "new_value1"}, {"key3", "value3"}});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->properties.configs().size(), 2);
EXPECT_EQ(metadata->properties.configs().at("key1"), "new_value1");
EXPECT_EQ(metadata->properties.configs().at("key3"), "value3");
}
TEST(TableMetadataBuilderTest, RemoveProperties) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetProperties({{"key1", "value1"}, {"key2", "value2"}, {"key3", "value3"}});
builder->RemoveProperties({"key2", "key4"}); // key4 does not exist
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->properties.configs().size(), 2);
EXPECT_EQ(metadata->properties.configs().at("key1"), "value1");
EXPECT_EQ(metadata->properties.configs().at("key3"), "value3");
}
TEST(TableMetadataBuilderTest, UpgradeFormatVersion) {
auto builder = TableMetadataBuilder::BuildFromEmpty(1);
auto schema = CreateTestSchema();
builder->SetCurrentSchema(schema, schema->HighestFieldId().value());
builder->SetDefaultSortOrder(SortOrder::Unsorted());
builder->SetDefaultPartitionSpec(PartitionSpec::Unpartitioned());
builder->UpgradeFormatVersion(2);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->format_version, 2);
// Unsupported format version should fail
builder = TableMetadataBuilder::BuildFromEmpty(3);
builder->UpgradeFormatVersion(4);
EXPECT_THAT(builder->Build(),
HasErrorMessage("Cannot upgrade table to unsupported format version"));
// Downgrade should fail
builder = TableMetadataBuilder::BuildFromEmpty(2);
builder->UpgradeFormatVersion(1);
EXPECT_THAT(builder->Build(), HasErrorMessage("Cannot downgrade"));
}
// Test AddSortOrder
TEST(TableMetadataBuilderTest, AddSortOrderBasic) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
auto schema = CreateTestSchema();
// 1. Add unsorted - should reuse existing unsorted order
builder->AddSortOrder(SortOrder::Unsorted());
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->sort_orders.size(), 1);
EXPECT_TRUE(metadata->sort_orders[0]->is_unsorted());
// 2. Add basic sort order
builder = TableMetadataBuilder::BuildFrom(base.get());
SortField field1(1, Transform::Identity(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order1,
SortOrder::Make(*schema, 1, std::vector<SortField>{field1}));
builder->AddSortOrder(std::move(order1));
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->sort_orders.size(), 2);
EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1);
// 3. Add duplicate - should be idempotent
builder = TableMetadataBuilder::BuildFrom(base.get());
ICEBERG_UNWRAP_OR_FAIL(auto order2,
SortOrder::Make(*schema, 1, std::vector<SortField>{field1}));
ICEBERG_UNWRAP_OR_FAIL(auto order3,
SortOrder::Make(*schema, 1, std::vector<SortField>{field1}));
builder->AddSortOrder(std::move(order2));
builder->AddSortOrder(std::move(order3)); // Duplicate
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->sort_orders.size(), 2); // Only one added
// 4. Add multiple different orders + verify ID reassignment
builder = TableMetadataBuilder::BuildFrom(base.get());
SortField field2(2, Transform::Identity(), SortDirection::kDescending,
NullOrder::kLast);
// User provides ID=99, Builder should reassign to ID=1
ICEBERG_UNWRAP_OR_FAIL(auto order4,
SortOrder::Make(*schema, 99, std::vector<SortField>{field1}));
ICEBERG_UNWRAP_OR_FAIL(
auto order5, SortOrder::Make(*schema, 2, std::vector<SortField>{field1, field2}));
builder->AddSortOrder(std::move(order4));
builder->AddSortOrder(std::move(order5));
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->sort_orders.size(), 3);
EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1); // Reassigned from 99
EXPECT_EQ(metadata->sort_orders[2]->order_id(), 2);
}
TEST(TableMetadataBuilderTest, AddSortOrderInvalid) {
auto base = CreateBaseMetadata();
auto schema = CreateTestSchema();
// 1. Invalid field ID
auto builder = TableMetadataBuilder::BuildFrom(base.get());
SortField invalid_field(999, Transform::Identity(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order1,
SortOrder::Make(1, std::vector<SortField>{invalid_field}));
builder->AddSortOrder(std::move(order1));
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot find source column"));
// 2. Invalid transform (Day transform on string type)
builder = TableMetadataBuilder::BuildFrom(base.get());
SortField invalid_transform(2, Transform::Day(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order2,
SortOrder::Make(1, std::vector<SortField>{invalid_transform}));
builder->AddSortOrder(std::move(order2));
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Invalid source type"));
// 3. Without schema
builder = TableMetadataBuilder::BuildFromEmpty(2);
builder->AssignUUID("test-uuid");
SortField field(1, Transform::Identity(), SortDirection::kAscending, NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order3,
SortOrder::Make(*schema, 1, std::vector<SortField>{field}));
builder->AddSortOrder(std::move(order3));
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Schema with ID"));
}
// Test SetDefaultSortOrder
TEST(TableMetadataBuilderTest, SetDefaultSortOrderBasic) {
auto base = CreateBaseMetadata();
auto schema = CreateTestSchema();
// 1. Set default sort order by SortOrder object
auto builder = TableMetadataBuilder::BuildFrom(base.get());
SortField field1(1, Transform::Identity(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order1_unique,
SortOrder::Make(*schema, 1, std::vector<SortField>{field1}));
auto order1 = std::shared_ptr<SortOrder>(std::move(order1_unique));
builder->SetDefaultSortOrder(order1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->sort_orders.size(), 2);
EXPECT_EQ(metadata->default_sort_order_id, 1);
EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1);
// 2. Set default sort order by order ID
builder = TableMetadataBuilder::BuildFrom(base.get());
SortField field2(1, Transform::Identity(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order2_unique,
SortOrder::Make(*schema, 1, std::vector<SortField>{field2}));
auto order2 = std::shared_ptr<SortOrder>(std::move(order2_unique));
builder->AddSortOrder(order2);
builder->SetDefaultSortOrder(1);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->default_sort_order_id, 1);
// 3. Set default sort order using -1 (last added)
builder = TableMetadataBuilder::BuildFrom(base.get());
SortField field3(2, Transform::Identity(), SortDirection::kDescending,
NullOrder::kLast);
ICEBERG_UNWRAP_OR_FAIL(auto order3_unique,
SortOrder::Make(*schema, 1, std::vector<SortField>{field3}));
auto order3 = std::shared_ptr<SortOrder>(std::move(order3_unique));
builder->AddSortOrder(order3);
builder->SetDefaultSortOrder(-1); // Use last added
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->default_sort_order_id, 1);
// 4. Setting same order is no-op
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetDefaultSortOrder(0);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->default_sort_order_id, 0);
}
TEST(TableMetadataBuilderTest, SetDefaultSortOrderInvalid) {
auto base = CreateBaseMetadata();
// Try to use -1 (last added) when no order has been added
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetDefaultSortOrder(-1);
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("no sort order has been added"));
}
// Test AddSchema
TEST(TableMetadataBuilderTest, AddSchemaBasic) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// 1. Add a new schema
auto field1 = SchemaField::MakeRequired(4, "new_field1", int64());
auto field2 = SchemaField::MakeRequired(5, "new_field2", float64());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
builder->AddSchema(new_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->last_column_id, 5);
// 2. Add duplicate schema - should be idempotent
builder = TableMetadataBuilder::BuildFrom(base.get());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 2);
builder->AddSchema(schema1);
builder->AddSchema(schema2); // Same fields, should reuse ID
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2); // Only one new schema added
// 3. Add multiple different schemas
builder = TableMetadataBuilder::BuildFrom(base.get());
auto field3 = SchemaField::MakeRequired(6, "field3", string());
auto schema3 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
auto schema4 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field3}, 2);
builder->AddSchema(schema3);
builder->AddSchema(schema4);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 3);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->schemas[2]->schema_id(), 2);
EXPECT_EQ(metadata->last_column_id, 6);
}
TEST(TableMetadataBuilderTest, AddSchemaWithDuplicateColumnIds) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with column ID that already exists - should still work
auto field1 =
SchemaField::MakeRequired(2, "duplicate_id", int64()); // ID 2 already exists
auto schema_with_duplicate =
std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(schema_with_duplicate);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
// Should work - AddSchema automatically uses max(existing, highest_in_schema)
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->last_column_id, 3); // Should remain 3 (from base metadata)
}
TEST(TableMetadataBuilderTest, AddSchemaWithHigherColumnIds) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with higher column IDs
auto field1 = SchemaField::MakeRequired(10, "high_id1", int64());
auto field2 = SchemaField::MakeRequired(15, "high_id2", string());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
builder->AddSchema(new_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->last_column_id, 15); // Should be updated to highest field ID
}
TEST(TableMetadataBuilderTest, AddSchemaEmptyFields) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with no fields
auto empty_schema = std::make_shared<Schema>(std::vector<SchemaField>{}, 1);
builder->AddSchema(empty_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->last_column_id, 3); // Should remain unchanged
}
TEST(TableMetadataBuilderTest, AddSchemaIdempotent) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add the same schema twice
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto field2 = SchemaField::MakeRequired(5, "field2", string());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2},
2); // Different ID but same fields
builder->AddSchema(schema1);
builder->AddSchema(schema2); // Should reuse existing schema
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2); // Only one new schema should be added
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
}
TEST(TableMetadataBuilderTest, AddSchemaMultipleDifferent) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple different schemas
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto field2 = SchemaField::MakeRequired(5, "field2", string());
auto field3 = SchemaField::MakeRequired(6, "field3", float64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
auto schema3 = std::make_shared<Schema>(std::vector<SchemaField>{field3}, 3);
builder->AddSchema(schema1);
builder->AddSchema(schema2);
builder->AddSchema(schema3);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 4); // Original + 3 new
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->schemas[2]->schema_id(), 2);
EXPECT_EQ(metadata->schemas[3]->schema_id(), 3);
EXPECT_EQ(metadata->last_column_id, 6);
}
// Test SetCurrentSchema
TEST(TableMetadataBuilderTest, SetCurrentSchemaBasic) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// 1. Set current schema by Schema object with explicit last_column_id
auto field1 = SchemaField::MakeRequired(4, "new_field", int64());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->SetCurrentSchema(new_schema, 4);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->current_schema_id, 1);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->last_column_id, 4);
// 2. Set current schema by schema ID
builder = TableMetadataBuilder::BuildFrom(base.get());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(schema1);
builder->SetCurrentSchema(1);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 1);
// 3. Set current schema using -1 (last added)
builder = TableMetadataBuilder::BuildFrom(base.get());
auto field2 = SchemaField::MakeRequired(5, "another_field", float64());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
builder->AddSchema(schema2);
builder->SetCurrentSchema(-1); // Use last added
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 1);
// 4. Setting same schema is no-op
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetCurrentSchema(0);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 0);
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaWithInvalidLastColumnId) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Try to set current schema with last_column_id lower than existing
auto field1 = SchemaField::MakeRequired(4, "new_field", int64());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->SetCurrentSchema(new_schema, 2); // 2 < 3 (existing last_column_id)
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Invalid last column ID"));
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaUpdatesLastColumnId) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Set current schema with higher last_column_id
auto field1 = SchemaField::MakeRequired(4, "new_field1", int64());
auto field2 = SchemaField::MakeRequired(8, "new_field2", string());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
builder->SetCurrentSchema(new_schema, 10); // Higher than field IDs
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 1);
EXPECT_EQ(metadata->last_column_id, 10);
}
// Additional comprehensive tests for AddSchema
TEST(TableMetadataBuilderTest, AddSchemaWithNestedTypes) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with nested struct type
auto inner_field1 = SchemaField::MakeRequired(10, "inner_id", int64());
auto inner_field2 = SchemaField::MakeRequired(11, "inner_name", string());
auto struct_type =
std::make_shared<StructType>(std::vector<SchemaField>{inner_field1, inner_field2});
auto struct_field = SchemaField::MakeRequired(4, "nested_struct", struct_type);
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{struct_field}, 1);
builder->AddSchema(new_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->last_column_id, 11); // Should be highest nested field ID
}
TEST(TableMetadataBuilderTest, AddSchemaWithListAndMapTypes) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with list type
auto list_type = std::make_shared<ListType>(10, int32(), true);
auto list_field = SchemaField::MakeRequired(4, "int_list", list_type);
// Add schema with map type
auto key_field = SchemaField::MakeRequired(11, "key", string());
auto value_field = SchemaField::MakeRequired(12, "value", int64());
auto map_type = std::make_shared<MapType>(key_field, value_field);
auto map_field = SchemaField::MakeRequired(5, "string_int_map", map_type);
auto new_schema =
std::make_shared<Schema>(std::vector<SchemaField>{list_field, map_field}, 1);
builder->AddSchema(new_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->last_column_id, 12); // Should be highest field ID including nested
}
TEST(TableMetadataBuilderTest, AddSchemaSequentialIds) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple schemas and verify ID assignment
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto field2 = SchemaField::MakeRequired(5, "field2", string());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
auto field3 = SchemaField::MakeRequired(6, "field3", float64());
auto schema3 = std::make_shared<Schema>(std::vector<SchemaField>{field3}, 3);
builder->AddSchema(schema1);
builder->AddSchema(schema2);
builder->AddSchema(schema3);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 4); // Original + 3 new
// Verify sequential schema IDs
EXPECT_EQ(metadata->schemas[0]->schema_id(), 0); // Original
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->schemas[2]->schema_id(), 2);
EXPECT_EQ(metadata->schemas[3]->schema_id(), 3);
}
TEST(TableMetadataBuilderTest, AddSchemaWithOptionalFields) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add schema with mix of required and optional fields
auto required_field = SchemaField::MakeRequired(4, "required_field", int64());
auto optional_field = SchemaField::MakeOptional(5, "optional_field", string());
auto new_schema = std::make_shared<Schema>(
std::vector<SchemaField>{required_field, optional_field}, 1);
builder->AddSchema(new_schema);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
// Verify field properties
const auto& fields = metadata->schemas[1]->fields();
ASSERT_EQ(fields.size(), 2);
EXPECT_FALSE(fields[0].optional()); // required_field
EXPECT_TRUE(fields[1].optional()); // optional_field
}
// Additional comprehensive tests for SetCurrentSchema
TEST(TableMetadataBuilderTest, SetCurrentSchemaInvalidId) {
auto base = CreateBaseMetadata();
// 1. Try to use -1 (last added) when no schema has been added
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetCurrentSchema(-1);
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("no schema has been added"));
// 2. Try to set non-existent schema ID
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetCurrentSchema(999);
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("unknown schema: 999"));
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaWithPartitionSpecRebuild) {
auto base = CreateBaseMetadata();
// Add a partition spec to the base metadata
auto schema = CreateTestSchema();
ICEBERG_UNWRAP_OR_FAIL(
auto spec,
PartitionSpec::Make(PartitionSpec::kInitialSpecId,
{PartitionField(1, 1000, "id_bucket", Transform::Bucket(16))},
1000));
base->partition_specs.push_back(std::move(spec));
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add and set a new schema - should rebuild partition specs
auto field1 = SchemaField::MakeRequired(4, "new_id", int64());
auto field2 = SchemaField::MakeRequired(5, "new_data", string());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1, field2}, 1);
builder->SetCurrentSchema(new_schema, 5);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
// Verify schema was set
EXPECT_EQ(metadata->current_schema_id, 1);
EXPECT_EQ(metadata->last_column_id, 5);
// Verify partition specs were rebuilt (they should still exist)
ASSERT_EQ(metadata->partition_specs.size(), 2);
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaMultipleOperations) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple schemas
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto field2 = SchemaField::MakeRequired(5, "field2", string());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
builder->AddSchema(schema1);
builder->AddSchema(schema2);
// Set current to first added schema
builder->SetCurrentSchema(1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 1);
// Change current to second schema
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->SetCurrentSchema(2);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 2);
// Change back to original schema
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->SetCurrentSchema(0);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 0);
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaLastAddedTracking) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple schemas
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto field2 = SchemaField::MakeRequired(5, "field2", string());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
builder->AddSchema(schema1);
builder->AddSchema(schema2); // This becomes the "last added"
// Use -1 to set current to last added schema
builder->SetCurrentSchema(-1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
EXPECT_EQ(metadata->current_schema_id, 2); // Should be schema2
}
TEST(TableMetadataBuilderTest, AddSchemaAndSetCurrentCombined) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add a schema and immediately set it as current
auto field1 = SchemaField::MakeRequired(4, "new_field", int64());
auto new_schema = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(new_schema);
builder->SetCurrentSchema(-1); // Set to last added
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->current_schema_id, 1);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 1);
EXPECT_EQ(metadata->last_column_id, 4);
}
TEST(TableMetadataBuilderTest, SetCurrentSchemaInvalid) {
auto base = CreateBaseMetadata();
// 1. Try to use -1 (last added) when no schema has been added
auto builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetCurrentSchema(-1);
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("no schema has been added"));
// 2. Try to set non-existent schema ID
builder = TableMetadataBuilder::BuildFrom(base.get());
builder->SetCurrentSchema(999);
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("unknown schema: 999"));
}
// Test schema evolution: SetCurrentSchema should rebuild partition specs and sort orders
TEST(TableMetadataBuilderTest, SetCurrentSchemaRebuildsSpecsAndOrders) {
auto base = CreateBaseMetadata();
// Add a partition spec to the base metadata
auto schema = CreateTestSchema();
ICEBERG_UNWRAP_OR_FAIL(
auto spec,
PartitionSpec::Make(PartitionSpec::kInitialSpecId,
{PartitionField(1, 1000, "id_bucket", Transform::Bucket(16))},
1000));
base->partition_specs.push_back(std::move(spec));
// Add a sort order to the base metadata
SortField sort_field(1, Transform::Identity(), SortDirection::kAscending,
NullOrder::kFirst);
ICEBERG_UNWRAP_OR_FAIL(auto order,
SortOrder::Make(*schema, 1, std::vector<SortField>{sort_field}));
base->sort_orders.push_back(std::move(order));
base->default_sort_order_id = 1;
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add and set a new schema
std::vector<SchemaField> new_fields{schema->fields().begin(), schema->fields().end()};
new_fields.push_back(SchemaField::MakeRequired(4, "new_id", int64()));
new_fields.push_back(SchemaField::MakeRequired(5, "new_data", string()));
auto new_schema = std::make_shared<Schema>(std::move(new_fields), 1);
builder->SetCurrentSchema(new_schema, 5);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
// Verify schema was set
EXPECT_EQ(metadata->current_schema_id, 1);
// Verify partition specs were rebuilt (they should still exist)
ASSERT_EQ(metadata->partition_specs.size(), 2);
// Verify sort orders were rebuilt (they should still exist)
ASSERT_EQ(metadata->sort_orders.size(), 2);
}
// Test RemoveSchemas
TEST(TableMetadataBuilderTest, RemoveSchemasBasic) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple schemas
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto field2 = SchemaField::MakeRequired(5, "field2", float64());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
auto field3 = SchemaField::MakeRequired(6, "field3", string());
auto schema3 = std::make_shared<Schema>(std::vector<SchemaField>{field3}, 3);
builder->AddSchema(schema1);
builder->AddSchema(schema2);
builder->AddSchema(schema3);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 4); // Original + 3 new
// Remove one schema
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({1});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 3);
EXPECT_EQ(metadata->schemas[0]->schema_id(), 0);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 2);
EXPECT_EQ(metadata->schemas[2]->schema_id(), 3);
// Remove multiple schemas
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({2, 3});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 1);
EXPECT_EQ(metadata->schemas[0]->schema_id(), Schema::kInitialSchemaId);
}
TEST(TableMetadataBuilderTest, RemoveSchemasCannotRemoveCurrent) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add a new schema
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(schema1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->current_schema_id, 0);
// Try to remove current schema (ID 0)
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({0});
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot remove current schema: 0"));
// Try to remove current schema along with others
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({0, 1});
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot remove current schema: 0"));
}
TEST(TableMetadataBuilderTest, RemoveSchemasNonExistent) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add one schema
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(schema1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
// Try to remove non-existent schema - should be no-op
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({999});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
// Remove mix of existent and non-existent
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({1, 999, 888});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 1);
EXPECT_EQ(metadata->schemas[0]->schema_id(), Schema::kInitialSchemaId);
}
TEST(TableMetadataBuilderTest, RemoveSchemasEmptySet) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add a schema
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
builder->AddSchema(schema1);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
// Remove empty set - should be no-op
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
}
TEST(TableMetadataBuilderTest, RemoveSchemasAfterSchemaChange) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple schemas
auto field1 = SchemaField::MakeRequired(4, "field1", int64());
auto schema1 = std::make_shared<Schema>(std::vector<SchemaField>{field1}, 1);
auto field2 = SchemaField::MakeRequired(5, "field2", float64());
auto schema2 = std::make_shared<Schema>(std::vector<SchemaField>{field2}, 2);
builder->AddSchema(schema1);
builder->AddSchema(schema2);
builder->SetCurrentSchema(1); // Set schema1 as current
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 3);
EXPECT_EQ(metadata->current_schema_id, 1);
// Now remove the old current schema (ID 0)
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({0});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->schemas.size(), 2);
EXPECT_EQ(metadata->current_schema_id, 1);
EXPECT_EQ(metadata->schemas[0]->schema_id(), 1);
EXPECT_EQ(metadata->schemas[1]->schema_id(), 2);
// Cannot remove the new current schema
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSchemas({1});
ASSERT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed));
ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot remove current schema: 1"));
}
TEST(TableMetadataBuilderTest, RemoveSnapshotRef) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple snapshots
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 1}));
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 2}));
// Add multiple refs
ICEBERG_UNWRAP_OR_FAIL(auto ref1, SnapshotRef::MakeBranch(1));
ICEBERG_UNWRAP_OR_FAIL(auto ref2, SnapshotRef::MakeBranch(2));
builder->SetRef("ref1", std::move(ref1));
builder->SetRef("ref2", std::move(ref2));
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->refs.size(), 2);
// Remove one ref
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveRef("ref2");
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->refs.size(), 1);
EXPECT_TRUE(metadata->refs.contains("ref1"));
}
TEST(TableMetadataBuilderTest, RemoveSnapshot) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple snapshots
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 1}));
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 2}));
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->snapshots.size(), 2);
// Remove one snapshot
builder = TableMetadataBuilder::BuildFrom(metadata.get());
std::vector<int64_t> to_remove{2};
builder->RemoveSnapshots(to_remove);
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->snapshots.size(), 1);
ASSERT_THAT(metadata->SnapshotById(2), IsError(ErrorKind::kNotFound));
}
TEST(TableMetadataBuilderTest, RemoveSnapshotNotExist) {
auto base = CreateBaseMetadata();
auto builder = TableMetadataBuilder::BuildFrom(base.get());
// Add multiple snapshots
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 1}));
builder->AddSnapshot(std::make_shared<Snapshot>(Snapshot{.snapshot_id = 2}));
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->snapshots.size(), 2);
// Remove one snapshot
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSnapshots(std::vector<int64_t>{3});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->snapshots.size(), 2);
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemoveSnapshots(std::vector<int64_t>{1, 2});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->snapshots.size(), 0);
}
TEST(TableMetadataBuilderTest, RemovePartitionSpec) {
// Add multiple specs
PartitionField field1(2, 4, "field1", Transform::Identity());
PartitionField field2(3, 5, "field2", Transform::Identity());
ICEBERG_UNWRAP_OR_FAIL(auto spec1, PartitionSpec::Make(1, {field1}));
auto base = CreateBaseMetadata(std::move(spec1));
auto builder = TableMetadataBuilder::BuildFrom(base.get());
ICEBERG_UNWRAP_OR_FAIL(auto spec2, PartitionSpec::Make(2, {field1, field2}));
builder->AddPartitionSpec(std::move(spec2));
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->partition_specs.size(), 2);
// Remove one spec
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemovePartitionSpecs({2});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->partition_specs.size(), 1);
ASSERT_THAT(metadata->PartitionSpecById(2), IsError(ErrorKind::kNotFound));
}
TEST(TableMetadataBuilderTest, RemovePartitionSpecNotExist) {
// Add multiple specs
PartitionField field1(2, 4, "field1", Transform::Identity());
PartitionField field2(3, 5, "field2", Transform::Identity());
ICEBERG_UNWRAP_OR_FAIL(auto spec1, PartitionSpec::Make(1, {field1}));
auto base = CreateBaseMetadata(std::move(spec1));
auto builder = TableMetadataBuilder::BuildFrom(base.get());
ICEBERG_UNWRAP_OR_FAIL(auto spec2, PartitionSpec::Make(2, {field1, field2}));
builder->AddPartitionSpec(std::move(spec2));
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
ASSERT_EQ(metadata->partition_specs.size(), 2);
// Remove one non-existing spec
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemovePartitionSpecs({3});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->partition_specs.size(), 2);
// Remove all
builder = TableMetadataBuilder::BuildFrom(metadata.get());
builder->RemovePartitionSpecs({2, 3});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
ASSERT_EQ(metadata->partition_specs.size(), 1);
}
} // namespace iceberg