blob: 47a1e87e6f54200f2ebf6198fab1de8c621bff1a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "iceberg/transform.h"
#include <format>
#include <memory>
#include <string>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "iceberg/expression/expressions.h"
#include "iceberg/expression/literal.h"
#include "iceberg/expression/predicate.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/test/temporal_test_helper.h"
#include "iceberg/type.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
namespace iceberg {
TEST(TransformTest, Transform) {
auto transform = Transform::Identity();
EXPECT_EQ(TransformType::kIdentity, transform->transform_type());
EXPECT_EQ("identity", transform->ToString());
EXPECT_EQ("identity", std::format("{}", *transform));
auto source_type = iceberg::string();
auto identity_transform = transform->Bind(source_type);
ASSERT_TRUE(identity_transform);
}
TEST(TransformFunctionTest, CreateBucketTransform) {
constexpr int32_t bucket_count = 8;
auto transform = Transform::Bucket(bucket_count);
EXPECT_EQ("bucket[8]", transform->ToString());
EXPECT_EQ("bucket[8]", std::format("{}", *transform));
const auto transformPtr = transform->Bind(iceberg::string());
ASSERT_TRUE(transformPtr);
EXPECT_EQ(transformPtr.value()->transform_type(), TransformType::kBucket);
}
TEST(TransformFunctionTest, CreateTruncateTransform) {
constexpr int32_t width = 16;
auto transform = Transform::Truncate(width);
EXPECT_EQ("truncate[16]", transform->ToString());
EXPECT_EQ("truncate[16]", std::format("{}", *transform));
auto transformPtr = transform->Bind(iceberg::string());
EXPECT_EQ(transformPtr.value()->transform_type(), TransformType::kTruncate);
}
TEST(TransformFromStringTest, PositiveCases) {
struct Case {
std::string str;
TransformType type;
std::optional<int32_t> param;
};
const std::vector<Case> cases = {
{.str = "identity", .type = TransformType::kIdentity, .param = std::nullopt},
{.str = "year", .type = TransformType::kYear, .param = std::nullopt},
{.str = "month", .type = TransformType::kMonth, .param = std::nullopt},
{.str = "day", .type = TransformType::kDay, .param = std::nullopt},
{.str = "hour", .type = TransformType::kHour, .param = std::nullopt},
{.str = "void", .type = TransformType::kVoid, .param = std::nullopt},
{.str = "bucket[16]", .type = TransformType::kBucket, .param = 16},
{.str = "truncate[32]", .type = TransformType::kTruncate, .param = 32},
};
for (const auto& c : cases) {
auto result = TransformFromString(c.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;
const auto& transform = result.value();
EXPECT_EQ(transform->transform_type(), c.type);
if (c.param.has_value()) {
EXPECT_EQ(transform->ToString(),
std::format("{}[{}]", TransformTypeToString(c.type), *c.param));
} else {
EXPECT_EQ(transform->ToString(), TransformTypeToString(c.type));
}
}
}
TEST(TransformFromStringTest, NegativeCases) {
constexpr std::array<std::string_view, 6> invalid_cases = {
"bucket", // missing param
"bucket[]", // empty param
"bucket[abc]", // invalid number
"unknown", // unsupported transform
"bucket[16", // missing closing bracket
"truncate[1]extra" // extra characters
};
for (const auto& str : invalid_cases) {
auto result = TransformFromString(str);
EXPECT_FALSE(result.has_value()) << "Unexpected success for: " << str;
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
}
}
TEST(TransformResultTypeTest, PositiveCases) {
struct Case {
std::string str;
std::shared_ptr<Type> source_type;
std::shared_ptr<Type> expected_result_type;
};
const std::vector<Case> cases = {
{.str = "identity",
.source_type = iceberg::string(),
.expected_result_type = iceberg::string()},
{.str = "year",
.source_type = iceberg::timestamp(),
.expected_result_type = iceberg::int32()},
{.str = "month",
.source_type = iceberg::timestamp(),
.expected_result_type = iceberg::int32()},
{.str = "day",
.source_type = iceberg::timestamp(),
.expected_result_type = iceberg::date()},
{.str = "hour",
.source_type = iceberg::timestamp(),
.expected_result_type = iceberg::int32()},
{.str = "void",
.source_type = iceberg::string(),
.expected_result_type = iceberg::string()},
{.str = "bucket[16]",
.source_type = iceberg::string(),
.expected_result_type = iceberg::int32()},
{.str = "truncate[32]",
.source_type = iceberg::string(),
.expected_result_type = iceberg::string()},
};
for (const auto& c : cases) {
auto result = TransformFromString(c.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;
const auto& transform = result.value();
const auto transformPtr = transform->Bind(c.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind: " << c.str;
auto result_type = transformPtr.value()->ResultType();
EXPECT_EQ(result_type->type_id(), c.expected_result_type->type_id())
<< "Unexpected result type for: " << c.str;
}
}
TEST(TransformResultTypeTest, NegativeCases) {
struct Case {
std::string str;
std::shared_ptr<Type> source_type;
};
const std::vector<Case> cases = {
{.str = "identity", .source_type = nullptr},
{.str = "year", .source_type = iceberg::string()},
{.str = "month", .source_type = iceberg::string()},
{.str = "day", .source_type = iceberg::string()},
{.str = "hour", .source_type = iceberg::string()},
{.str = "void", .source_type = nullptr},
{.str = "bucket[16]", .source_type = iceberg::float32()},
{.str = "truncate[32]", .source_type = iceberg::float64()}};
for (const auto& c : cases) {
auto result = TransformFromString(c.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;
const auto& transform = result.value();
auto transformPtr = transform->Bind(c.source_type);
ASSERT_THAT(transformPtr, IsError(ErrorKind::kNotSupported));
}
}
// Parameterized tests for transform functions
struct TransformParam {
std::string str;
// The integer parameter associated with the transform.
int32_t param;
std::shared_ptr<Type> source_type;
Literal source;
Literal expected;
};
class TransformLiteralTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(TransformLiteralTest, IdentityTransform) {
const auto& param = GetParam();
auto transform = Transform::Identity();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind identity transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
IdentityTransformTests, TransformLiteralTest,
::testing::Values(
TransformParam{.str = "BooleanTrue",
.source_type = iceberg::boolean(),
.source = Literal::Boolean(true),
.expected = Literal::Boolean(true)},
TransformParam{.str = "BooleanFalse",
.source_type = iceberg::boolean(),
.source = Literal::Boolean(false),
.expected = Literal::Boolean(false)},
TransformParam{.str = "Int32",
.source_type = iceberg::int32(),
.source = Literal::Int(42),
.expected = Literal::Int(42)},
TransformParam{.str = "Date",
.source_type = iceberg::int32(),
.source = Literal::Date(30000),
.expected = Literal::Date(30000)},
TransformParam{.str = "Int64",
.source_type = iceberg::int64(),
.source = Literal::Long(1234567890),
.expected = Literal::Long(1234567890)},
TransformParam{.str = "Timestamp",
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(1622547800000000),
.expected = Literal::Timestamp(1622547800000000)},
TransformParam{.str = "TimestampTz",
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(1622547800000000),
.expected = Literal::TimestampTz(1622547800000000)},
TransformParam{.str = "Float",
.source_type = iceberg::float32(),
.source = Literal::Float(3.14),
.expected = Literal::Float(3.14)},
TransformParam{.str = "Double",
.source_type = iceberg::float64(),
.source = Literal::Double(1.23e-5),
.expected = Literal::Double(1.23e-5)},
TransformParam{.str = "Decimal",
.source_type = iceberg::decimal(10, 2),
.source = Literal::Decimal(123456, 10, 2),
.expected = Literal::Decimal(123456, 10, 2)},
TransformParam{.str = "String",
.source_type = iceberg::string(),
.source = Literal::String("Hello, World!"),
.expected = Literal::String("Hello, World!")},
TransformParam{
.str = "Uuid",
.source_type = iceberg::uuid(),
.source = Literal::UUID(
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
.expected = Literal::UUID(
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
TransformParam{.str = "Binary",
.source_type = iceberg::binary(),
.source = Literal::Binary({0x01, 0x02, 0x03}),
.expected = Literal::Binary({0x01, 0x02, 0x03})},
TransformParam{.str = "Fixed",
.source_type = iceberg::fixed(3),
.source = Literal::Fixed({0x01, 0x02, 0x03}),
.expected = Literal::Fixed({0x01, 0x02, 0x03})}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class BucketTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(BucketTransformTest, BucketTransform) {
constexpr int32_t num_buckets = 4;
auto transform = Transform::Bucket(num_buckets);
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind bucket transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
BucketTransformTests, BucketTransformTest,
::testing::Values(
TransformParam{.str = "Int32",
.source_type = iceberg::int32(),
.source = Literal::Int(34),
.expected = Literal::Int(3)},
TransformParam{.str = "Int64",
.source_type = iceberg::int64(),
.source = Literal::Long(34),
.expected = Literal::Int(3)},
TransformParam{.str = "Decimal",
// 14.20
.source_type = iceberg::decimal(4, 2),
.source = Literal::Decimal(1420, 4, 2),
.expected = Literal::Int(3)},
TransformParam{.str = "Date",
.source_type = iceberg::date(),
.source = Literal::Date(TemporalTestHelper::CreateDate(
{.year = 2017, .month = 11, .day = 16})),
.expected = Literal::Int(2)},
TransformParam{.str = "Time",
.source_type = iceberg::time(),
.source = Literal::Time(TemporalTestHelper::CreateTime(
{.hour = 22, .minute = 31, .second = 8})),
.expected = Literal::Int(3)},
TransformParam{.str = "Timestamp",
// 2017-11-16T22:31:08 in microseconds
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(
TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 11,
.day = 16,
.hour = 22,
.minute = 31,
.second = 8})),
.expected = Literal::Int(3)},
TransformParam{
.str = "TimestampTz",
// 2017-11-16T14:31:08.000001-08:00 in microseconds
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(
TemporalTestHelper::CreateTimestampTz({.year = 2017,
.month = 11,
.day = 16,
.hour = 14,
.minute = 31,
.second = 8,
.microsecond = 1,
.tz_offset_minutes = -480})),
.expected = Literal::Int(2)},
TransformParam{.str = "String",
.source_type = iceberg::string(),
.source = Literal::String("iceberg"),
.expected = Literal::Int(1)},
TransformParam{
.str = "Uuid",
.source_type = iceberg::uuid(),
.source = Literal::UUID(
Uuid::FromString("f79c3e09-677c-4bbd-a479-3f349cb785e7").value()),
.expected = Literal::Int(0)},
TransformParam{.str = "Fixed",
.source_type = iceberg::fixed(4),
.source = Literal::Fixed({0, 1, 2, 3}),
.expected = Literal::Int(1)},
TransformParam{.str = "Binary",
.source_type = iceberg::binary(),
.source = Literal::Binary({0, 1, 2, 3}),
.expected = Literal::Int(1)}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class TruncateTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(TruncateTransformTest, TruncateTransform) {
const auto& param = GetParam();
auto transform = Transform::Truncate(param.param);
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind truncate transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
TruncateTransformTests, TruncateTransformTest,
::testing::Values(
TransformParam{.str = "Int32",
.param = 5,
.source_type = iceberg::int32(),
.source = Literal::Int(123456),
.expected = Literal::Int(123455)},
TransformParam{.str = "Int64",
.param = 10,
.source_type = iceberg::int64(),
.source = Literal::Long(-1),
.expected = Literal::Long(-10)},
TransformParam{.str = "Decimal",
.param = 50,
.source_type = iceberg::decimal(5, 2),
.source = Literal::Decimal(12345, 5, 2),
.expected = Literal::Decimal(12300, 5, 2)},
TransformParam{.str = "StringShort",
.param = 5,
.source_type = iceberg::string(),
.source = Literal::String("Hello, World!"),
.expected = Literal::String("Hello")},
TransformParam{.str = "StringEmoji",
.param = 5,
.source_type = iceberg::string(),
.source = Literal::String("😜🧐🤔🤪🥳😵‍💫😂"),
.expected = Literal::String("😜🧐🤔🤪🥳")},
TransformParam{.str = "StringMixed",
.param = 8,
.source_type = iceberg::string(),
.source = Literal::String("a😜b🧐c🤔d🤪e🥳"),
.expected = Literal::String("a😜b🧐c🤔d🤪")},
TransformParam{.str = "Binary",
.param = 5,
.source_type = iceberg::binary(),
.source = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05, 0x06}),
.expected = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05})}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class YearTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(YearTransformTest, YearTransform) {
auto transform = Transform::Year();
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind year transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
YearTransformTests, YearTransformTest,
::testing::Values(
TransformParam{.str = "Timestamp",
// 2021-06-01T11:43:20Z
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(
TemporalTestHelper::CreateTimestamp({.year = 2021,
.month = 6,
.day = 1,
.hour = 11,
.minute = 43,
.second = 20})),
.expected = Literal::Int(2021 - 1970)},
TransformParam{
.str = "TimestampTz",
// 2021-01-01T07:43:20+08:00, which is 2020-12-31T23:43:20Z
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(
TemporalTestHelper::CreateTimestampTz({.year = 2021,
.month = 1,
.day = 1,
.hour = 7,
.minute = 43,
.second = 20,
.tz_offset_minutes = 480})),
.expected = Literal::Int(2020 - 1970)},
TransformParam{.str = "Date",
.source_type = iceberg::date(),
.source = Literal::Date(TemporalTestHelper::CreateDate(
{.year = 2052, .month = 2, .day = 20})),
.expected = Literal::Int(2052 - 1970)}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class MonthTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(MonthTransformTest, MonthTransform) {
auto transform = Transform::Month();
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind month transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
MonthTransformTests, MonthTransformTest,
::testing::Values(TransformParam{.str = "Timestamp",
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(1622547800000000),
.expected = Literal::Int(617)},
TransformParam{.str = "TimestampTz",
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(1622547800000000),
.expected = Literal::Int(617)},
TransformParam{.str = "Date",
.source_type = iceberg::date(),
.source = Literal::Date(30000),
.expected = Literal::Int(985)}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class DayTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(DayTransformTest, DayTransform) {
auto transform = Transform::Day();
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind day transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
DayTransformTests, DayTransformTest,
::testing::Values(
TransformParam{.str = "Timestamp",
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(
TemporalTestHelper::CreateTimestamp({.year = 2021,
.month = 6,
.day = 1,
.hour = 11,
.minute = 43,
.second = 20})),
.expected = Literal::Int(TemporalTestHelper::CreateDate(
{.year = 2021, .month = 6, .day = 1}))},
TransformParam{
.str = "TimestampTz",
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(
TemporalTestHelper::CreateTimestampTz({.year = 2021,
.month = 1,
.day = 1,
.hour = 7,
.minute = 43,
.second = 20,
.tz_offset_minutes = 480})),
.expected = Literal::Int(
TemporalTestHelper::CreateDate({.year = 2020, .month = 12, .day = 31}))},
TransformParam{.str = "Date",
.source_type = iceberg::date(),
.source = Literal::Date(30000),
.expected = Literal::Int(30000)}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class HourTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(HourTransformTest, HourTransform) {
auto transform = Transform::Hour();
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind hour transform";
auto result = transformPtr.value()->Transform(param.source);
ASSERT_TRUE(result.has_value())
<< "Failed to transform literal: " << param.source.ToString();
EXPECT_EQ(result.value(), param.expected)
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
HourTransformTests, HourTransformTest,
::testing::Values(TransformParam{.str = "Timestamp",
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(1622547800000000),
.expected = Literal::Int(450707)},
TransformParam{.str = "TimestampTz",
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(1622547800000000),
.expected = Literal::Int(450707)}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class VoidTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(VoidTransformTest, VoidTransform) {
auto transform = Transform::Void();
const auto& param = GetParam();
auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind void transform";
auto result = transformPtr.value()->Transform(param.source);
EXPECT_TRUE(result->IsNull())
<< "Expected void transform to return null type for source: "
<< param.source.ToString();
EXPECT_EQ(result->type()->type_id(), param.source_type->type_id())
<< "Expected void transform to return same type as source for: "
<< param.source.ToString();
EXPECT_EQ(result->ToString(), param.expected.ToString())
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
VoidTransformTests, VoidTransformTest,
::testing::Values(
TransformParam{.str = "Boolean",
.source_type = iceberg::boolean(),
.source = Literal::Boolean(true),
.expected = Literal::Null(iceberg::boolean())},
TransformParam{.str = "Int32",
.source_type = iceberg::int32(),
.source = Literal::Int(42),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "Date",
.source_type = iceberg::date(),
.source = Literal::Date(30000),
.expected = Literal::Null(iceberg::date())},
TransformParam{.str = "Int64",
.source_type = iceberg::int64(),
.source = Literal::Long(1234567890),
.expected = Literal::Null(iceberg::int64())},
TransformParam{.str = "Timestamp",
.source_type = iceberg::timestamp(),
.source = Literal::Timestamp(1622547800000000),
.expected = Literal::Null(iceberg::timestamp())},
TransformParam{.str = "TimestampTz",
.source_type = iceberg::timestamp_tz(),
.source = Literal::TimestampTz(1622547800000000),
.expected = Literal::Null(iceberg::timestamp_tz())},
TransformParam{.str = "Float",
.source_type = iceberg::float32(),
.source = Literal::Float(3.14),
.expected = Literal::Null(iceberg::float32())},
TransformParam{.str = "Double",
.source_type = iceberg::float64(),
.source = Literal::Double(1.23e-5),
.expected = Literal::Null(iceberg::float64())},
TransformParam{.str = "Decimal",
.source_type = iceberg::decimal(10, 2),
.source = Literal::Decimal(123456, 10, 2),
.expected = Literal::Null(iceberg::decimal(10, 2))},
TransformParam{.str = "String",
.source_type = iceberg::string(),
.source = Literal::String("Hello, World!"),
.expected = Literal::Null(iceberg::string())},
TransformParam{
.str = "Uuid",
.source_type = iceberg::uuid(),
.source = Literal::UUID(
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
.expected = Literal::Null(iceberg::uuid())},
TransformParam{.str = "Binary",
.source_type = iceberg::binary(),
.source = Literal::Binary({0x01, 0x02, 0x03}),
.expected = Literal::Null(iceberg::binary())},
TransformParam{.str = "Fixed",
.source_type = iceberg::fixed(3),
.source = Literal::Fixed({0x01, 0x02, 0x03}),
.expected = Literal::Null(iceberg::fixed(3))}),
[](const ::testing::TestParamInfo<TransformParam>& info) { return info.param.str; });
class NullLiteralTransformTest : public ::testing::TestWithParam<TransformParam> {};
TEST_P(NullLiteralTransformTest, NullLiteralTransform) {
const auto& param = GetParam();
auto result = TransformFromString(param.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << param.str;
const auto& transform = result.value();
const auto transformPtr = transform->Bind(param.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind: " << param.str;
auto transform_result = transformPtr.value()->Transform(param.source);
EXPECT_TRUE(transform_result->IsNull())
<< "Expected transform to return null type for source: " << param.source.ToString();
EXPECT_EQ(transform_result->ToString(), param.expected.ToString())
<< "Unexpected result for source: " << param.source.ToString();
}
INSTANTIATE_TEST_SUITE_P(
NullLiteralTransformTests, NullLiteralTransformTest,
::testing::Values(TransformParam{.str = "identity",
.source_type = iceberg::string(),
.source = Literal::Null(iceberg::string()),
.expected = Literal::Null(iceberg::string())},
TransformParam{.str = "year",
.source_type = iceberg::timestamp(),
.source = Literal::Null(iceberg::timestamp()),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "month",
.source_type = iceberg::timestamp(),
.source = Literal::Null(iceberg::timestamp()),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "day",
.source_type = iceberg::timestamp(),
.source = Literal::Null(iceberg::timestamp()),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "hour",
.source_type = iceberg::timestamp(),
.source = Literal::Null(iceberg::timestamp()),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "void",
.source_type = iceberg::string(),
.source = Literal::Null(iceberg::string()),
.expected = Literal::Null(iceberg::string())},
TransformParam{.str = "bucket[16]",
.source_type = iceberg::string(),
.source = Literal::Null(iceberg::string()),
.expected = Literal::Null(iceberg::int32())},
TransformParam{.str = "truncate[32]",
.source_type = iceberg::string(),
.source = Literal::Null(iceberg::string()),
.expected = Literal::Null(iceberg::string())}));
TEST(TransformPreservesOrderTest, PreservesOrder) {
struct Case {
std::string transform_str;
bool expected;
};
const std::vector<Case> cases = {
{.transform_str = "identity", .expected = true},
{.transform_str = "year", .expected = true},
{.transform_str = "month", .expected = true},
{.transform_str = "day", .expected = true},
{.transform_str = "hour", .expected = true},
{.transform_str = "void", .expected = false},
{.transform_str = "bucket[16]", .expected = false},
{.transform_str = "truncate[32]", .expected = true},
};
for (const auto& c : cases) {
auto transform = TransformFromString(c.transform_str);
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
EXPECT_EQ(transform.value()->PreservesOrder(), c.expected)
<< "Unexpected result for transform: " << c.transform_str;
}
}
TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) {
struct Case {
std::string transform_str;
std::string other_transform_str;
bool expected;
};
const std::vector<Case> cases = {
// Identity satisfies all order-preserving transforms
{.transform_str = "identity", .other_transform_str = "identity", .expected = true},
{.transform_str = "identity", .other_transform_str = "year", .expected = true},
{.transform_str = "identity", .other_transform_str = "month", .expected = true},
{.transform_str = "identity", .other_transform_str = "day", .expected = true},
{.transform_str = "identity", .other_transform_str = "hour", .expected = true},
{.transform_str = "identity",
.other_transform_str = "truncate[32]",
.expected = true},
{.transform_str = "identity",
.other_transform_str = "bucket[16]",
.expected = false},
// Truncate satisfies Truncate with smaller width
{.transform_str = "truncate[32]",
.other_transform_str = "truncate[16]",
.expected = true},
{.transform_str = "truncate[16]",
.other_transform_str = "truncate[16]",
.expected = true},
{.transform_str = "truncate[16]",
.other_transform_str = "truncate[32]",
.expected = false},
{.transform_str = "truncate[16]",
.other_transform_str = "bucket[32]",
.expected = false},
// Hour satisfies hour, day, month, and year
{.transform_str = "hour", .other_transform_str = "hour", .expected = true},
{.transform_str = "hour", .other_transform_str = "day", .expected = true},
{.transform_str = "hour", .other_transform_str = "month", .expected = true},
{.transform_str = "hour", .other_transform_str = "year", .expected = true},
{.transform_str = "hour", .other_transform_str = "identity", .expected = false},
{.transform_str = "hour", .other_transform_str = "bucket[16]", .expected = false},
// Day satisfies day, month, and year
{.transform_str = "day", .other_transform_str = "day", .expected = true},
{.transform_str = "day", .other_transform_str = "month", .expected = true},
{.transform_str = "day", .other_transform_str = "year", .expected = true},
{.transform_str = "day", .other_transform_str = "hour", .expected = false},
{.transform_str = "day", .other_transform_str = "identity", .expected = false},
// Month satisfies month and year
{.transform_str = "month", .other_transform_str = "month", .expected = true},
{.transform_str = "month", .other_transform_str = "year", .expected = true},
{.transform_str = "month", .other_transform_str = "day", .expected = false},
{.transform_str = "month", .other_transform_str = "hour", .expected = false},
// Year satisfies only year
{.transform_str = "year", .other_transform_str = "year", .expected = true},
{.transform_str = "year", .other_transform_str = "month", .expected = false},
{.transform_str = "year", .other_transform_str = "day", .expected = false},
{.transform_str = "year", .other_transform_str = "hour", .expected = false},
// Void satisfies no order-preserving transforms
{.transform_str = "void", .other_transform_str = "identity", .expected = false},
{.transform_str = "void", .other_transform_str = "year", .expected = false},
{.transform_str = "void", .other_transform_str = "month", .expected = false},
{.transform_str = "void", .other_transform_str = "day", .expected = false},
{.transform_str = "void", .other_transform_str = "hour", .expected = false},
// Bucket satisfies only itself
{.transform_str = "bucket[16]",
.other_transform_str = "bucket[16]",
.expected = true},
{.transform_str = "bucket[16]",
.other_transform_str = "bucket[32]",
.expected = false},
{.transform_str = "bucket[16]",
.other_transform_str = "identity",
.expected = false},
};
for (const auto& c : cases) {
auto transform = TransformFromString(c.transform_str);
auto other_transform = TransformFromString(c.other_transform_str);
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
ASSERT_TRUE(other_transform.has_value())
<< "Failed to parse: " << c.other_transform_str;
EXPECT_EQ(transform.value()->SatisfiesOrderOf(*other_transform.value()), c.expected)
<< "Unexpected result for transform: " << c.transform_str
<< " and other transform: " << c.other_transform_str;
}
}
TEST(TransformCanTransformTest, CanTransform) {
struct Case {
std::string transform_str;
std::shared_ptr<Type> source_type;
bool expected;
};
const std::vector<Case> cases = {
// Identity can transform all primitive types
{.transform_str = "identity", .source_type = int32(), .expected = true},
{.transform_str = "identity", .source_type = string(), .expected = true},
{.transform_str = "identity", .source_type = boolean(), .expected = true},
{.transform_str = "identity",
.source_type = list(SchemaField(123, "element", int32(), false)),
.expected = false},
// Void can transform any type
{.transform_str = "void", .source_type = iceberg::int32(), .expected = true},
{.transform_str = "void",
.source_type = iceberg::map(SchemaField(123, "key", iceberg::string(), false),
SchemaField(124, "value", iceberg::int32(), true)),
.expected = true},
// Bucket can transform specific types
{.transform_str = "bucket[16]", .source_type = iceberg::int32(), .expected = true},
{.transform_str = "bucket[16]", .source_type = iceberg::string(), .expected = true},
{.transform_str = "bucket[16]",
.source_type = iceberg::float32(),
.expected = false},
// Truncate can transform specific types
{.transform_str = "truncate[32]",
.source_type = iceberg::int32(),
.expected = true},
{.transform_str = "truncate[32]",
.source_type = iceberg::string(),
.expected = true},
{.transform_str = "truncate[32]",
.source_type = iceberg::boolean(),
.expected = false},
// Year can transform date and timestamp types
{.transform_str = "year", .source_type = iceberg::date(), .expected = true},
{.transform_str = "year", .source_type = iceberg::timestamp(), .expected = true},
{.transform_str = "year", .source_type = iceberg::string(), .expected = false},
// Month can transform date and timestamp types
{.transform_str = "month", .source_type = iceberg::date(), .expected = true},
{.transform_str = "month", .source_type = iceberg::timestamp(), .expected = true},
{.transform_str = "month", .source_type = iceberg::binary(), .expected = false},
// Day can transform date and timestamp types
{.transform_str = "day", .source_type = iceberg::date(), .expected = true},
{.transform_str = "day", .source_type = iceberg::timestamp(), .expected = true},
{.transform_str = "day", .source_type = iceberg::uuid(), .expected = false},
// Hour can transform timestamp types
{.transform_str = "hour", .source_type = iceberg::timestamp(), .expected = true},
{.transform_str = "hour", .source_type = iceberg::timestamp_tz(), .expected = true},
{.transform_str = "hour", .source_type = iceberg::int32(), .expected = false},
};
for (const auto& c : cases) {
auto transform = TransformFromString(c.transform_str);
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
EXPECT_EQ(transform.value()->CanTransform(*c.source_type), c.expected)
<< "Unexpected result for transform: " << c.transform_str
<< " and source type: " << c.source_type->ToString();
}
}
// Test fixture for Transform::Project tests
class TransformProjectTest : public ::testing::Test {
protected:
void SetUp() override {
// Create test schemas for different source types
int_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "key", int32()),
SchemaField::MakeOptional(2, "value", int32())},
/*schema_id=*/0);
long_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", int64())},
/*schema_id=*/0);
string_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", string())},
/*schema_id=*/0);
date_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", date())},
/*schema_id=*/0);
timestamp_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", timestamp())},
/*schema_id=*/0);
}
std::shared_ptr<Schema> int_schema_;
std::shared_ptr<Schema> long_schema_;
std::shared_ptr<Schema> string_schema_;
std::shared_ptr<Schema> date_schema_;
std::shared_ptr<Schema> timestamp_schema_;
};
TEST_F(TransformProjectTest, IdentityProjectEquality) {
auto transform = Transform::Identity();
// Test equality predicate
auto unbound = Expressions::Equal("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectTest, IdentityProjectComparison) {
auto transform = Transform::Identity();
// Test less than predicate
auto unbound_lt = Expressions::LessThan("value", Literal::Int(50));
ICEBERG_UNWRAP_OR_FAIL(auto bound_lt,
unbound_lt->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_lt = std::dynamic_pointer_cast<BoundPredicate>(bound_lt);
ASSERT_NE(bound_pred_lt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_lt, transform->Project("part", bound_pred_lt));
ASSERT_NE(projected_lt, nullptr);
EXPECT_EQ(projected_lt->op(), Expression::Operation::kLt);
// Test greater than or equal predicate
auto unbound_gte = Expressions::GreaterThanOrEqual("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound_gte,
unbound_gte->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_gte = std::dynamic_pointer_cast<BoundPredicate>(bound_gte);
ASSERT_NE(bound_pred_gte, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_gte, transform->Project("part", bound_pred_gte));
ASSERT_NE(projected_gte, nullptr);
EXPECT_EQ(projected_gte->op(), Expression::Operation::kGtEq);
}
TEST_F(TransformProjectTest, IdentityProjectUnary) {
auto transform = Transform::Identity();
// Test IsNull predicate
auto unbound_null = Expressions::IsNull("value");
ICEBERG_UNWRAP_OR_FAIL(auto bound_null,
unbound_null->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_null = std::dynamic_pointer_cast<BoundPredicate>(bound_null);
ASSERT_NE(bound_pred_null, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_null,
transform->Project("part", bound_pred_null));
ASSERT_NE(projected_null, nullptr);
EXPECT_EQ(projected_null->op(), Expression::Operation::kIsNull);
}
TEST_F(TransformProjectTest, IdentityProjectSet) {
auto transform = Transform::Identity();
// Test IN predicate
auto unbound_in =
Expressions::In("value", {Literal::Int(1), Literal::Int(2), Literal::Int(3)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_in,
unbound_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_in = std::dynamic_pointer_cast<BoundPredicate>(bound_in);
ASSERT_NE(bound_pred_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_in, transform->Project("part", bound_pred_in));
ASSERT_NE(projected_in, nullptr);
EXPECT_EQ(projected_in->op(), Expression::Operation::kIn);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_in));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kIn);
EXPECT_EQ(unbound_projected->literals().size(), 3);
std::vector<int32_t> values;
for (const auto& lit : unbound_projected->literals()) {
values.push_back(std::get<int32_t>(lit.value()));
}
EXPECT_THAT(values, testing::UnorderedElementsAre(1, 2, 3));
}
TEST_F(TransformProjectTest, BucketProjectEquality) {
auto transform = Transform::Bucket(4);
// Bucket can project equality predicates
auto unbound = Expressions::Equal("value", Literal::Int(34));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 3);
}
TEST_F(TransformProjectTest, BucketProjectWithMatchingTransformedChild) {
auto partition_transform = Transform::Bucket(16);
// Create a predicate like: bucket(value, 16) = 5
auto bucket_term = Expressions::Bucket("value", 16);
auto unbound = Expressions::Equal<BoundTransform>(bucket_term, Literal::Int(5));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
// The predicate's term should be a transform
EXPECT_EQ(bound_pred->term()->kind(), Term::Kind::kTransform);
// When the transform matches, Project should use RemoveTransform and return the
// predicate
ICEBERG_UNWRAP_OR_FAIL(auto projected,
partition_transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 5);
}
TEST_F(TransformProjectTest, BucketProjectComparisonReturnsNull) {
auto transform = Transform::Bucket(16);
// Bucket cannot project comparison predicates (they return null)
auto unbound_lt = Expressions::LessThan("value", Literal::Int(50));
ICEBERG_UNWRAP_OR_FAIL(auto bound_lt,
unbound_lt->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_lt = std::dynamic_pointer_cast<BoundPredicate>(bound_lt);
ASSERT_NE(bound_pred_lt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_lt, transform->Project("part", bound_pred_lt));
EXPECT_EQ(projected_lt, nullptr);
}
TEST_F(TransformProjectTest, BucketProjectInSet) {
auto transform = Transform::Bucket(16);
// Bucket can project IN predicates
auto unbound_in =
Expressions::In("value", {Literal::Int(1), Literal::Int(2), Literal::Int(3)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_in,
unbound_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_in = std::dynamic_pointer_cast<BoundPredicate>(bound_in);
ASSERT_NE(bound_pred_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_in, transform->Project("part", bound_pred_in));
ASSERT_NE(projected_in, nullptr);
EXPECT_EQ(projected_in->op(), Expression::Operation::kIn);
}
TEST_F(TransformProjectTest, BucketProjectNotInReturnsNull) {
auto transform = Transform::Bucket(16);
// Bucket cannot project NOT IN predicates
auto unbound_not_in =
Expressions::NotIn("value", {Literal::Int(1), Literal::Int(2), Literal::Int(3)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_in,
unbound_not_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_not_in = std::dynamic_pointer_cast<BoundPredicate>(bound_not_in);
ASSERT_NE(bound_pred_not_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_in,
transform->Project("part", bound_pred_not_in));
EXPECT_EQ(projected_not_in, nullptr);
}
TEST_F(TransformProjectTest, TruncateProjectIntEquality) {
auto transform = Transform::Truncate(10);
// Truncate can project equality predicates
auto unbound = Expressions::Equal("value", Literal::Int(123));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 120);
}
TEST_F(TransformProjectTest, TruncateProjectIntLessThan) {
auto transform = Transform::Truncate(10);
// Truncate projects LT as LTE
auto unbound = Expressions::LessThan("value", Literal::Int(25));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLtEq);
}
TEST_F(TransformProjectTest, TruncateProjectIntGreaterThan) {
auto transform = Transform::Truncate(10);
// Truncate projects GT as GTE
auto unbound = Expressions::GreaterThan("value", Literal::Int(25));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kGtEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGtEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 20);
}
TEST_F(TransformProjectTest, TruncateProjectStringEquality) {
auto transform = Transform::Truncate(5);
auto unbound = Expressions::Equal("value", Literal::String("Hello, World!"));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
ASSERT_NE(unbound_projected, nullptr);
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(unbound_projected->literals().front().value()),
"Hello");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWith) {
auto transform = Transform::Truncate(5);
// StartsWith with shorter string than width
auto unbound_short = Expressions::StartsWith("value", "Hi");
ICEBERG_UNWRAP_OR_FAIL(auto bound_short,
unbound_short->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_short = std::dynamic_pointer_cast<BoundPredicate>(bound_short);
ASSERT_NE(bound_pred_short, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_short,
transform->Project("part", bound_pred_short));
ASSERT_NE(projected_short, nullptr);
EXPECT_EQ(projected_short->op(), Expression::Operation::kStartsWith);
auto unbound_projected_short =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_short));
ASSERT_NE(unbound_projected_short, nullptr);
EXPECT_EQ(unbound_projected_short->op(), Expression::Operation::kStartsWith);
EXPECT_EQ(unbound_projected_short->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(unbound_projected_short->literals().front().value()),
"Hi");
// StartsWith with string equal to width
auto unbound_equal = Expressions::StartsWith("value", "Hello");
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal,
unbound_equal->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_equal = std::dynamic_pointer_cast<BoundPredicate>(bound_equal);
ASSERT_NE(bound_pred_equal, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_equal,
transform->Project("part", bound_pred_equal));
ASSERT_NE(projected_equal, nullptr);
EXPECT_EQ(projected_equal->op(), Expression::Operation::kEq);
auto unbound_projected_equal =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_equal));
ASSERT_NE(unbound_projected_equal, nullptr);
EXPECT_EQ(unbound_projected_equal->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected_equal->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(unbound_projected_equal->literals().front().value()),
"Hello");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWithCodePointCountLessThanWidth) {
auto transform = Transform::Truncate(5);
// Code point count < width (multi-byte UTF-8 characters)
// "😜🧐" has 2 code points, width is 5
auto unbound_emoji_short = Expressions::StartsWith("value", "😜🧐");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_emoji_short,
unbound_emoji_short->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_emoji_short =
std::dynamic_pointer_cast<BoundPredicate>(bound_emoji_short);
ASSERT_NE(bound_pred_emoji_short, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_emoji_short,
transform->Project("part", bound_pred_emoji_short));
ASSERT_NE(projected_emoji_short, nullptr);
EXPECT_EQ(projected_emoji_short->op(), Expression::Operation::kStartsWith);
auto unbound_projected_emoji_short =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_emoji_short));
ASSERT_NE(unbound_projected_emoji_short, nullptr);
EXPECT_EQ(unbound_projected_emoji_short->op(), Expression::Operation::kStartsWith);
EXPECT_EQ(unbound_projected_emoji_short->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_emoji_short->literals().front().value()),
"😜🧐");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWithCodePointCountEqualToWidth) {
auto transform = Transform::Truncate(5);
// Code point count == width (exactly 5 code points)
// "😜🧐🤔🤪🥳" has exactly 5 code points
auto unbound_emoji_equal = Expressions::StartsWith("value", "😜🧐🤔🤪🥳");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_emoji_equal,
unbound_emoji_equal->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_emoji_equal =
std::dynamic_pointer_cast<BoundPredicate>(bound_emoji_equal);
ASSERT_NE(bound_pred_emoji_equal, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_emoji_equal,
transform->Project("part", bound_pred_emoji_equal));
ASSERT_NE(projected_emoji_equal, nullptr);
EXPECT_EQ(projected_emoji_equal->op(), Expression::Operation::kEq);
auto unbound_projected_emoji_equal =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_emoji_equal));
ASSERT_NE(unbound_projected_emoji_equal, nullptr);
EXPECT_EQ(unbound_projected_emoji_equal->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected_emoji_equal->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_emoji_equal->literals().front().value()),
"😜🧐🤔🤪🥳");
}
TEST_F(TransformProjectTest,
TruncateProjectStringStartsWithCodePointCountGreaterThanWidth) {
auto transform = Transform::Truncate(5);
// Code point count > width (truncate to 5 code points)
// "😜🧐🤔🤪🥳😵‍💫😂" has 7 code points, should truncate to 5
auto unbound_emoji_long =
Expressions::StartsWith("value", "😜🧐🤔🤪🥳😵‍💫😂");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_emoji_long,
unbound_emoji_long->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_emoji_long =
std::dynamic_pointer_cast<BoundPredicate>(bound_emoji_long);
ASSERT_NE(bound_pred_emoji_long, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_emoji_long,
transform->Project("part", bound_pred_emoji_long));
ASSERT_NE(projected_emoji_long, nullptr);
EXPECT_EQ(projected_emoji_long->op(), Expression::Operation::kStartsWith);
auto unbound_projected_emoji_long =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_emoji_long));
ASSERT_NE(unbound_projected_emoji_long, nullptr);
EXPECT_EQ(unbound_projected_emoji_long->op(), Expression::Operation::kStartsWith);
EXPECT_EQ(unbound_projected_emoji_long->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_emoji_long->literals().front().value()),
"😜🧐🤔🤪🥳");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWithMixedAsciiAndMultiByte) {
auto transform = Transform::Truncate(5);
// Mixed ASCII and multi-byte UTF-8 characters
// "a😜b🧐c" has 5 code points (3 ASCII + 2 emojis)
auto unbound_mixed_equal = Expressions::StartsWith("value", "a😜b🧐c");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_mixed_equal,
unbound_mixed_equal->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_mixed_equal =
std::dynamic_pointer_cast<BoundPredicate>(bound_mixed_equal);
ASSERT_NE(bound_pred_mixed_equal, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_mixed_equal,
transform->Project("part", bound_pred_mixed_equal));
ASSERT_NE(projected_mixed_equal, nullptr);
EXPECT_EQ(projected_mixed_equal->op(), Expression::Operation::kEq);
auto unbound_projected_mixed_equal =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_mixed_equal));
ASSERT_NE(unbound_projected_mixed_equal, nullptr);
EXPECT_EQ(unbound_projected_mixed_equal->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected_mixed_equal->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_mixed_equal->literals().front().value()),
"a😜b🧐c");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWithChineseCharactersShort) {
auto transform = Transform::Truncate(5);
// Chinese characters (3-byte UTF-8)
// "你好世界" has 4 code points, width is 5
auto unbound_chinese_short = Expressions::StartsWith("value", "你好世界");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_chinese_short,
unbound_chinese_short->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_chinese_short =
std::dynamic_pointer_cast<BoundPredicate>(bound_chinese_short);
ASSERT_NE(bound_pred_chinese_short, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_chinese_short,
transform->Project("part", bound_pred_chinese_short));
ASSERT_NE(projected_chinese_short, nullptr);
EXPECT_EQ(projected_chinese_short->op(), Expression::Operation::kStartsWith);
auto unbound_projected_chinese_short =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_chinese_short));
ASSERT_NE(unbound_projected_chinese_short, nullptr);
EXPECT_EQ(unbound_projected_chinese_short->op(), Expression::Operation::kStartsWith);
EXPECT_EQ(unbound_projected_chinese_short->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_chinese_short->literals().front().value()),
"你好世界");
}
TEST_F(TransformProjectTest, TruncateProjectStringStartsWithChineseCharactersEqualWidth) {
auto transform = Transform::Truncate(5);
// Chinese characters exactly matching width
// "你好世界好" has exactly 5 code points
auto unbound_chinese_equal = Expressions::StartsWith("value", "你好世界好");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_chinese_equal,
unbound_chinese_equal->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_chinese_equal =
std::dynamic_pointer_cast<BoundPredicate>(bound_chinese_equal);
ASSERT_NE(bound_pred_chinese_equal, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_chinese_equal,
transform->Project("part", bound_pred_chinese_equal));
ASSERT_NE(projected_chinese_equal, nullptr);
EXPECT_EQ(projected_chinese_equal->op(), Expression::Operation::kEq);
auto unbound_projected_chinese_equal =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_chinese_equal));
ASSERT_NE(unbound_projected_chinese_equal, nullptr);
EXPECT_EQ(unbound_projected_chinese_equal->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected_chinese_equal->literals().size(), 1);
EXPECT_EQ(
std::get<std::string>(unbound_projected_chinese_equal->literals().front().value()),
"你好世界好");
}
TEST_F(TransformProjectTest,
TruncateProjectStringNotStartsWithCodePointCountEqualToWidth) {
auto transform = Transform::Truncate(5);
// NotStartsWith with code point count == width
// Should convert to NotEq
auto unbound_not_starts_equal = Expressions::NotStartsWith("value", "😜🧐🤔🤪🥳");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_not_starts_equal,
unbound_not_starts_equal->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_not_starts_equal =
std::dynamic_pointer_cast<BoundPredicate>(bound_not_starts_equal);
ASSERT_NE(bound_pred_not_starts_equal, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_starts_equal,
transform->Project("part", bound_pred_not_starts_equal));
ASSERT_NE(projected_not_starts_equal, nullptr);
EXPECT_EQ(projected_not_starts_equal->op(), Expression::Operation::kNotEq);
auto unbound_projected_not_starts_equal =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_not_starts_equal));
ASSERT_NE(unbound_projected_not_starts_equal, nullptr);
EXPECT_EQ(unbound_projected_not_starts_equal->op(), Expression::Operation::kNotEq);
EXPECT_EQ(unbound_projected_not_starts_equal->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(
unbound_projected_not_starts_equal->literals().front().value()),
"😜🧐🤔🤪🥳");
}
TEST_F(TransformProjectTest,
TruncateProjectStringNotStartsWithCodePointCountLessThanWidth) {
auto transform = Transform::Truncate(5);
// NotStartsWith with code point count < width
// Should remain NotStartsWith
auto unbound_not_starts_short = Expressions::NotStartsWith("value", "😜🧐");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_not_starts_short,
unbound_not_starts_short->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_not_starts_short =
std::dynamic_pointer_cast<BoundPredicate>(bound_not_starts_short);
ASSERT_NE(bound_pred_not_starts_short, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_starts_short,
transform->Project("part", bound_pred_not_starts_short));
ASSERT_NE(projected_not_starts_short, nullptr);
EXPECT_EQ(projected_not_starts_short->op(), Expression::Operation::kNotStartsWith);
auto unbound_projected_not_starts_short =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_not_starts_short));
ASSERT_NE(unbound_projected_not_starts_short, nullptr);
EXPECT_EQ(unbound_projected_not_starts_short->op(),
Expression::Operation::kNotStartsWith);
EXPECT_EQ(unbound_projected_not_starts_short->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(
unbound_projected_not_starts_short->literals().front().value()),
"😜🧐");
}
TEST_F(TransformProjectTest,
TruncateProjectStringNotStartsWithCodePointCountGreaterThanWidth) {
auto transform = Transform::Truncate(5);
// NotStartsWith with code point count > width
// Should return nullptr (cannot project)
auto unbound_not_starts_long =
Expressions::NotStartsWith("value", "😜🧐🤔🤪🥳😵‍💫😂");
ICEBERG_UNWRAP_OR_FAIL(
auto bound_not_starts_long,
unbound_not_starts_long->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_not_starts_long =
std::dynamic_pointer_cast<BoundPredicate>(bound_not_starts_long);
ASSERT_NE(bound_pred_not_starts_long, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_starts_long,
transform->Project("part", bound_pred_not_starts_long));
EXPECT_EQ(projected_not_starts_long, nullptr);
}
TEST_F(TransformProjectTest, YearProjectEquality) {
auto transform = Transform::Year();
// 2021-06-01 as days from epoch
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 6, .day = 1});
auto unbound = Expressions::Equal("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
}
TEST_F(TransformProjectTest, YearProjectComparison) {
auto transform = Transform::Year();
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 6, .day = 1});
// LT projects to LTE
auto unbound_lt = Expressions::LessThan("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_lt,
unbound_lt->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred_lt = std::dynamic_pointer_cast<BoundPredicate>(bound_lt);
ASSERT_NE(bound_pred_lt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_lt, transform->Project("part", bound_pred_lt));
ASSERT_NE(projected_lt, nullptr);
EXPECT_EQ(projected_lt->op(), Expression::Operation::kLtEq);
// GT projects to GTE
auto unbound_gt = Expressions::GreaterThan("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_gt,
unbound_gt->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred_gt = std::dynamic_pointer_cast<BoundPredicate>(bound_gt);
ASSERT_NE(bound_pred_gt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_gt, transform->Project("part", bound_pred_gt));
ASSERT_NE(projected_gt, nullptr);
EXPECT_EQ(projected_gt->op(), Expression::Operation::kGtEq);
}
TEST_F(TransformProjectTest, MonthProjectEquality) {
auto transform = Transform::Month();
int64_t ts_value =
TemporalTestHelper::CreateTimestamp({.year = 2021, .month = 6, .day = 1});
auto unbound = Expressions::Equal("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
}
TEST_F(TransformProjectTest, DayProjectEquality) {
auto transform = Transform::Day();
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 6, .day = 15});
auto unbound = Expressions::Equal("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
}
TEST_F(TransformProjectTest, HourProjectEquality) {
auto transform = Transform::Hour();
int64_t ts_value = TemporalTestHelper::CreateTimestamp(
{.year = 2021, .month = 6, .day = 1, .hour = 14, .minute = 30});
auto unbound = Expressions::Equal("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
}
TEST_F(TransformProjectTest, VoidProjectReturnsNull) {
auto transform = Transform::Void();
auto unbound = Expressions::Equal("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
// Void transform always returns null (no projection possible)
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
EXPECT_EQ(projected, nullptr);
}
TEST_F(TransformProjectTest, TemporalProjectInSet) {
auto transform = Transform::Year();
int32_t date1 = TemporalTestHelper::CreateDate({.year = 2020, .month = 1, .day = 1});
int32_t date2 = TemporalTestHelper::CreateDate({.year = 2021, .month = 6, .day = 15});
int32_t date3 = TemporalTestHelper::CreateDate({.year = 2022, .month = 12, .day = 31});
auto unbound_in = Expressions::In(
"value", {Literal::Date(date1), Literal::Date(date2), Literal::Date(date3)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_in,
unbound_in->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred_in = std::dynamic_pointer_cast<BoundPredicate>(bound_in);
ASSERT_NE(bound_pred_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_in, transform->Project("part", bound_pred_in));
ASSERT_NE(projected_in, nullptr);
EXPECT_EQ(projected_in->op(), Expression::Operation::kIn);
}
TEST_F(TransformProjectTest, DayTimestampProjectionFix) {
auto transform = Transform::Day();
// Predicate: value < 1970-01-01 00:00:00 (0)
// This implies value <= -1 micros.
// day(-1 micros) = -1 day (1969-12-31).
// If we don't fix, we project to day <= -1.
// If we fix (for buggy writers), we project to day <= 0.
auto unbound = Expressions::LessThan("value", Literal::Timestamp(0));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->Project("part", bound_pred));
ASSERT_NE(projected, nullptr);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLtEq);
ASSERT_EQ(unbound_projected->literals().size(), 1);
int32_t val = std::get<int32_t>(unbound_projected->literals().front().value());
EXPECT_EQ(val, 0) << "Expected projected value to be 0 (fix applied), but got " << val;
}
// Test fixture for Transform::ProjectStrict tests
class TransformProjectStrictTest : public ::testing::Test {
protected:
void SetUp() override {
// Create test schemas for different source types
int_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", int32())},
/*schema_id=*/0);
long_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", int64())},
/*schema_id=*/0);
string_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", string())},
/*schema_id=*/0);
date_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", date())},
/*schema_id=*/0);
timestamp_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", timestamp())},
/*schema_id=*/0);
decimal_schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(1, "value", decimal(9, 2))},
/*schema_id=*/0);
}
std::shared_ptr<Schema> int_schema_;
std::shared_ptr<Schema> long_schema_;
std::shared_ptr<Schema> string_schema_;
std::shared_ptr<Schema> date_schema_;
std::shared_ptr<Schema> timestamp_schema_;
std::shared_ptr<Schema> decimal_schema_;
};
TEST_F(TransformProjectStrictTest, IdentityStrictProjection) {
auto transform = Transform::Identity();
// Identity strict projection should behave the same as inclusive projection
auto unbound = Expressions::Equal("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectStrictTest, BucketStrictEqualityReturnsFalse) {
auto transform = Transform::Bucket(10);
// Bucket strict projection: equality should return FALSE (cannot guarantee equality)
auto unbound = Expressions::Equal("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
EXPECT_EQ(projected, nullptr);
}
TEST_F(TransformProjectStrictTest, BucketStrictNotEqual) {
auto transform = Transform::Bucket(10);
// Bucket strict projection: notEqual can be projected
auto unbound = Expressions::NotEqual("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kNotEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kNotEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
// bucket(100, 10) = 6
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 6);
}
TEST_F(TransformProjectStrictTest, BucketStrictComparisonReturnsNull) {
auto transform = Transform::Bucket(10);
// Bucket strict projection: comparison predicates return null
auto unbound_lt = Expressions::LessThan("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound_lt,
unbound_lt->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_lt = std::dynamic_pointer_cast<BoundPredicate>(bound_lt);
ASSERT_NE(bound_pred_lt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_lt,
transform->ProjectStrict("part", bound_pred_lt));
EXPECT_EQ(projected_lt, nullptr);
}
TEST_F(TransformProjectStrictTest, BucketStrictNotIn) {
auto transform = Transform::Bucket(10);
// Bucket strict projection: NOT_IN can be projected
auto unbound_not_in = Expressions::NotIn(
"value", {Literal::Int(99), Literal::Int(100), Literal::Int(101)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_in,
unbound_not_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_not_in = std::dynamic_pointer_cast<BoundPredicate>(bound_not_in);
ASSERT_NE(bound_pred_not_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_in,
transform->ProjectStrict("part", bound_pred_not_in));
ASSERT_NE(projected_not_in, nullptr);
EXPECT_EQ(projected_not_in->op(), Expression::Operation::kNotIn);
}
TEST_F(TransformProjectStrictTest, BucketStrictInReturnsNull) {
auto transform = Transform::Bucket(10);
// Bucket strict projection: IN returns null (cannot guarantee)
auto unbound_in =
Expressions::In("value", {Literal::Int(99), Literal::Int(100), Literal::Int(101)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_in,
unbound_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_in = std::dynamic_pointer_cast<BoundPredicate>(bound_in);
ASSERT_NE(bound_pred_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_in,
transform->ProjectStrict("part", bound_pred_in));
EXPECT_EQ(projected_in, nullptr);
}
TEST_F(TransformProjectStrictTest, BucketStrictString) {
auto transform = Transform::Bucket(10);
// Bucket strict projection for string
auto unbound_not_eq = Expressions::NotEqual("value", Literal::String("abcdefg"));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_eq,
unbound_not_eq->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_not_eq = std::dynamic_pointer_cast<BoundPredicate>(bound_not_eq);
ASSERT_NE(bound_pred_not_eq, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_eq,
transform->ProjectStrict("part", bound_pred_not_eq));
ASSERT_NE(projected_not_eq, nullptr);
EXPECT_EQ(projected_not_eq->op(), Expression::Operation::kNotEq);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntEqualityReturnsNull) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: equality returns null (cannot guarantee)
auto unbound = Expressions::Equal("value", Literal::Int(123));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
EXPECT_EQ(projected, nullptr);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntLessThan) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: LT projects to LT
auto unbound = Expressions::LessThan("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntLessThanOrEqual) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: LTE projects to LT
auto unbound = Expressions::LessThanOrEqual("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntGreaterThan) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: GT projects to GT
auto unbound = Expressions::GreaterThan("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kGt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntGreaterThanOrEqualLowerBound) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: GTE projects to GT (lower bound, value = 100)
auto unbound = Expressions::GreaterThanOrEqual("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kGt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
// For GTE with value 100 and width 10, truncate(100) = 100, so GT should be 90
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 90);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntGreaterThanOrEqualUpperBound) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: GTE projects to GT (upper bound, value = 99)
auto unbound = Expressions::GreaterThanOrEqual("value", Literal::Int(99));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kGt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
// For GTE with value 99 and width 10, truncate(99) = 90, so GT should be 90
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 90);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntNotEqual) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: notEqual can be projected
auto unbound = Expressions::NotEqual("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kNotEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kNotEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()), 100);
}
TEST_F(TransformProjectStrictTest, TruncateStrictIntNotIn) {
auto transform = Transform::Truncate(10);
// Truncate strict projection: NOT_IN can be projected
auto unbound_not_in = Expressions::NotIn(
"value", {Literal::Int(99), Literal::Int(100), Literal::Int(101)});
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_in,
unbound_not_in->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred_not_in = std::dynamic_pointer_cast<BoundPredicate>(bound_not_in);
ASSERT_NE(bound_pred_not_in, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_not_in,
transform->ProjectStrict("part", bound_pred_not_in));
ASSERT_NE(projected_not_in, nullptr);
EXPECT_EQ(projected_not_in->op(), Expression::Operation::kNotIn);
}
TEST_F(TransformProjectStrictTest, TruncateStrictString) {
auto transform = Transform::Truncate(5);
// Truncate strict projection for string
auto unbound_lt = Expressions::LessThan("value", Literal::String("abcdefg"));
ICEBERG_UNWRAP_OR_FAIL(auto bound_lt,
unbound_lt->Bind(*string_schema_, /*case_sensitive=*/true));
auto bound_pred_lt = std::dynamic_pointer_cast<BoundPredicate>(bound_lt);
ASSERT_NE(bound_pred_lt, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected_lt,
transform->ProjectStrict("part", bound_pred_lt));
ASSERT_NE(projected_lt, nullptr);
EXPECT_EQ(projected_lt->op(), Expression::Operation::kLt);
auto unbound_projected_lt =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected_lt));
EXPECT_EQ(unbound_projected_lt->op(), Expression::Operation::kLt);
EXPECT_EQ(unbound_projected_lt->literals().size(), 1);
EXPECT_EQ(std::get<std::string>(unbound_projected_lt->literals().front().value()),
"abcde");
}
TEST_F(TransformProjectStrictTest, YearStrictEqualityReturnsNull) {
auto transform = Transform::Year();
// Year strict projection: equality returns null (cannot guarantee)
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 6, .day = 1});
auto unbound = Expressions::Equal("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
EXPECT_EQ(projected, nullptr);
}
TEST_F(TransformProjectStrictTest, YearStrictLessThan) {
auto transform = Transform::Year();
// Year strict projection: LT projects to LT
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 1, .day = 1});
auto unbound = Expressions::LessThan("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()),
2021 - 1970);
}
TEST_F(TransformProjectStrictTest, YearStrictGreaterThanOrEqual) {
auto transform = Transform::Year();
// Year strict projection: GTE projects to GT (lower bound)
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 1, .day = 1});
auto unbound = Expressions::GreaterThanOrEqual("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kGt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kGt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()),
2020 - 1970);
}
TEST_F(TransformProjectStrictTest, YearStrictNotEqual) {
auto transform = Transform::Year();
// Year strict projection: notEqual can be projected
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2021, .month = 1, .day = 1});
auto unbound = Expressions::NotEqual("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kNotEq);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kNotEq);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()),
2021 - 1970);
}
TEST_F(TransformProjectStrictTest, MonthStrictLessThan) {
auto transform = Transform::Month();
// Month strict projection: LT projects to LT
int64_t ts_value =
TemporalTestHelper::CreateTimestamp({.year = 2017, .month = 12, .day = 1});
auto unbound = Expressions::LessThan("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
}
TEST_F(TransformProjectStrictTest, DayStrictLessThan) {
auto transform = Transform::Day();
// Day strict projection: LT projects to LT
int64_t ts_value =
TemporalTestHelper::CreateTimestamp({.year = 2017, .month = 12, .day = 1});
auto unbound = Expressions::LessThan("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
}
TEST_F(TransformProjectStrictTest, HourStrictLessThan) {
auto transform = Transform::Hour();
// Hour strict projection: LT projects to LT
int64_t ts_value = TemporalTestHelper::CreateTimestamp(
{.year = 2017, .month = 12, .day = 1, .hour = 10, .minute = 0});
auto unbound = Expressions::LessThan("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
}
TEST_F(TransformProjectStrictTest, DayStrictEpoch) {
auto transform = Transform::Day();
// Day strict projection at epoch: LT projects to LT
auto unbound = Expressions::LessThan("value", Literal::Timestamp(0));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
}
TEST_F(TransformProjectStrictTest, MonthStrictNotEqualNegative) {
auto transform = Transform::Month();
// Month strict projection: notEqual with negative dates may convert to NOT_IN
int64_t ts_value =
TemporalTestHelper::CreateTimestamp({.year = 1969, .month = 1, .day = 1});
auto unbound = Expressions::NotEqual("value", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*timestamp_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
// For negative dates, NOT_EQ may convert to NOT_IN
EXPECT_TRUE(projected->op() == Expression::Operation::kNotEq ||
projected->op() == Expression::Operation::kNotIn);
}
TEST_F(TransformProjectStrictTest, YearStrictUpperBound) {
auto transform = Transform::Year();
// Year strict projection: upper bound (end of year)
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 12, .day = 31});
auto unbound = Expressions::LessThanOrEqual("value", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*date_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
ASSERT_NE(projected, nullptr);
EXPECT_EQ(projected->op(), Expression::Operation::kLt);
auto unbound_projected =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
std::move(projected));
EXPECT_EQ(unbound_projected->op(), Expression::Operation::kLt);
EXPECT_EQ(unbound_projected->literals().size(), 1);
EXPECT_EQ(std::get<int32_t>(unbound_projected->literals().front().value()),
2018 - 1970);
}
TEST_F(TransformProjectStrictTest, VoidStrictReturnsNull) {
auto transform = Transform::Void();
// Void transform always returns null for strict projection
auto unbound = Expressions::Equal("value", Literal::Int(100));
ICEBERG_UNWRAP_OR_FAIL(auto bound,
unbound->Bind(*int_schema_, /*case_sensitive=*/true));
auto bound_pred = std::dynamic_pointer_cast<BoundPredicate>(bound);
ASSERT_NE(bound_pred, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto projected, transform->ProjectStrict("part", bound_pred));
EXPECT_EQ(projected, nullptr);
}
} // namespace iceberg