blob: 475a18da6a93dd48932afe38affbebee1cbe0ac0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "iceberg/expression/projections.h"
#include <algorithm>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include "iceberg/expression/expressions.h"
#include "iceberg/expression/predicate.h"
#include "iceberg/partition_field.h"
#include "iceberg/partition_spec.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/test/temporal_test_helper.h"
#include "iceberg/transform.h"
#include "iceberg/type.h"
#include "iceberg/util/checked_cast.h"
namespace iceberg {
class ProjectionsTest : public ::testing::Test {
protected:
void SetUp() override {
// Create a simple test schema with various field types
schema_ = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(16, "id", int64())},
/*schema_id=*/0);
}
std::shared_ptr<Schema> schema_;
};
// Helper function to extract UnboundPredicate from expression
std::shared_ptr<UnboundPredicate> ExtractUnboundPredicate(
const std::shared_ptr<Expression>& expr) {
if (expr->is_unbound_predicate()) {
return std::dynamic_pointer_cast<UnboundPredicate>(expr);
}
return nullptr;
}
// Helper function to extract BoundPredicate from expression
std::shared_ptr<BoundPredicate> ExtractBoundPredicate(
const std::shared_ptr<Expression>& expr) {
if (expr->is_bound_predicate()) {
return std::dynamic_pointer_cast<BoundPredicate>(expr);
}
return nullptr;
}
// Helper function to assert projection operation
void AssertProjectionOperation(const std::shared_ptr<Expression>& projection,
Expression::Operation expected_op) {
ASSERT_NE(projection, nullptr);
EXPECT_EQ(projection->op(), expected_op);
}
// Helper function to assert projection value for True/False
void AssertProjectionValue(const std::shared_ptr<Expression>& projection,
Expression::Operation expected_op) {
ASSERT_NE(projection, nullptr);
EXPECT_EQ(projection->op(), expected_op);
}
TEST_F(ProjectionsTest, IdentityProjectionInclusive) {
auto identity_transform = Transform::Identity();
PartitionField pt_field(16, 1000, "id", identity_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
std::vector<std::shared_ptr<UnboundPredicate>> predicates = {
Expressions::NotNull("id"),
Expressions::IsNull("id"),
Expressions::LessThan("id", Literal::Long(100)),
Expressions::LessThanOrEqual("id", Literal::Long(101)),
Expressions::GreaterThan("id", Literal::Long(102)),
Expressions::GreaterThanOrEqual("id", Literal::Long(103)),
Expressions::Equal("id", Literal::Long(104)),
Expressions::NotEqual("id", Literal::Long(105)),
};
for (const auto& predicate : predicates) {
// Bind the predicate first
ICEBERG_UNWRAP_OR_FAIL(auto bound_pred, predicate->Bind(*schema_, true));
auto bound = ExtractBoundPredicate(bound_pred);
ASSERT_NE(bound, nullptr);
// Project the bound predicate
auto evaluator = Projections::Inclusive(*spec, *schema_, true);
ICEBERG_UNWRAP_OR_FAIL(auto projected_expr, evaluator->Project(bound_pred));
// Check that we got a predicate back
auto projected = ExtractUnboundPredicate(projected_expr);
ASSERT_NE(projected, nullptr);
// Check that the operation matches
EXPECT_EQ(projected->op(), bound->op());
// Check that the field name matches
EXPECT_EQ(projected->reference()->name(), "id");
if (bound->kind() == BoundPredicate::Kind::kLiteral) {
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(projected);
const auto& bound_literal_predicate =
internal::checked_pointer_cast<BoundLiteralPredicate>(bound);
EXPECT_EQ(literal_predicate->literals().front(),
bound_literal_predicate->literal());
}
}
}
TEST_F(ProjectionsTest, IdentityProjectionStrict) {
auto identity_transform = Transform::Identity();
PartitionField pt_field(16, 1000, "id", identity_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
std::vector<std::shared_ptr<UnboundPredicate>> predicates = {
Expressions::NotNull("id"),
Expressions::IsNull("id"),
Expressions::LessThan("id", Literal::Long(100)),
Expressions::LessThanOrEqual("id", Literal::Long(101)),
Expressions::GreaterThan("id", Literal::Long(102)),
Expressions::GreaterThanOrEqual("id", Literal::Long(103)),
Expressions::Equal("id", Literal::Long(104)),
Expressions::NotEqual("id", Literal::Long(105)),
};
for (const auto& predicate : predicates) {
// Bind the predicate first
ICEBERG_UNWRAP_OR_FAIL(auto bound_pred, predicate->Bind(*schema_, true));
auto bound = ExtractBoundPredicate(bound_pred);
ASSERT_NE(bound, nullptr);
// Project the bound predicate
auto evaluator = Projections::Strict(*spec, *schema_, true);
ICEBERG_UNWRAP_OR_FAIL(auto projected_expr, evaluator->Project(bound_pred));
// Check that we got a predicate back
auto projected = ExtractUnboundPredicate(projected_expr);
ASSERT_NE(projected, nullptr);
// Check that the operation matches
EXPECT_EQ(projected->op(), bound->op());
// Check that the field name matches
EXPECT_EQ(projected->reference()->name(), "id");
if (bound->kind() == BoundPredicate::Kind::kLiteral) {
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(projected);
const auto& bound_literal_predicate =
internal::checked_pointer_cast<BoundLiteralPredicate>(bound);
EXPECT_EQ(literal_predicate->literals().front(),
bound_literal_predicate->literal());
}
}
}
TEST_F(ProjectionsTest, CaseInsensitiveIdentityProjection) {
auto identity_transform = Transform::Identity();
PartitionField pt_field(16, 1000, "id", identity_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
std::vector<std::shared_ptr<UnboundPredicate>> predicates = {
Expressions::NotNull("ID"),
Expressions::IsNull("ID"),
Expressions::LessThan("ID", Literal::Long(100)),
Expressions::LessThanOrEqual("ID", Literal::Long(101)),
Expressions::GreaterThan("ID", Literal::Long(102)),
Expressions::GreaterThanOrEqual("ID", Literal::Long(103)),
Expressions::Equal("ID", Literal::Long(104)),
Expressions::NotEqual("ID", Literal::Long(105)),
};
for (const auto& predicate : predicates) {
// Bind the predicate first (case insensitive)
ICEBERG_UNWRAP_OR_FAIL(auto bound_pred, predicate->Bind(*schema_, false));
auto bound = ExtractBoundPredicate(bound_pred);
ASSERT_NE(bound, nullptr);
// Project the bound predicate (case insensitive)
auto evaluator = Projections::Inclusive(*spec, *schema_, false);
ICEBERG_UNWRAP_OR_FAIL(auto projected_expr, evaluator->Project(bound_pred));
// Check that we got a predicate back
auto projected = ExtractUnboundPredicate(projected_expr);
ASSERT_NE(projected, nullptr);
// Check that the operation matches
EXPECT_EQ(projected->op(), bound->op());
// Check that the field name matches
EXPECT_EQ(projected->reference()->name(), "id");
if (bound->kind() == BoundPredicate::Kind::kLiteral) {
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(projected);
const auto& bound_literal_predicate =
internal::checked_pointer_cast<BoundLiteralPredicate>(bound);
EXPECT_EQ(literal_predicate->literals().front(),
bound_literal_predicate->literal());
}
}
}
TEST_F(ProjectionsTest, CaseSensitiveIdentityProjectionFailure) {
auto identity_transform = Transform::Identity();
PartitionField pt_field(16, 1000, "id", identity_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto predicate = Expressions::NotNull("ID");
// Binding should fail with case sensitive
auto bound_result = predicate->Bind(*schema_, true);
EXPECT_THAT(bound_result, IsError(ErrorKind::kInvalidExpression));
}
// Bucketing projection tests
class BucketingProjectionTest : public ::testing::Test {
protected:
void AssertProjectionStrict(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op,
const std::string& expected_literal) {
auto evaluator = Projections::Strict(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
if (expected_op != Expression::Operation::kFalse) {
auto predicate = ExtractUnboundPredicate(projection);
ASSERT_NE(predicate, nullptr);
if (predicate->op() == Expression::Operation::kNotIn) {
// For NOT_IN, check literals
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
predicate);
auto literals = literal_predicate->literals();
std::vector<std::string> values;
for (const auto& lit : literals) {
values.push_back(std::to_string(std::get<int32_t>(lit.value())));
}
std::ranges::sort(values);
std::string actual = "[";
for (size_t i = 0; i < values.size(); ++i) {
if (i > 0) actual += ", ";
actual += values[i];
}
actual += "]";
EXPECT_EQ(actual, expected_literal);
} else {
// For other operations, check single literal
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
predicate);
auto literal = literal_predicate->literals().front();
std::string output = std::to_string(std::get<int32_t>(literal.value()));
EXPECT_EQ(output, expected_literal);
}
}
}
void AssertProjectionStrictValue(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Strict(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionValue(projection, expected_op);
}
void AssertProjectionInclusive(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op,
const std::string& expected_literal) {
auto evaluator = Projections::Inclusive(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
if (expected_op != Expression::Operation::kTrue) {
auto predicate = ExtractUnboundPredicate(projection);
ASSERT_NE(predicate, nullptr);
if (predicate->op() == Expression::Operation::kIn) {
// For IN, check literals
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
predicate);
auto literals = literal_predicate->literals();
std::vector<std::string> values;
for (const auto& lit : literals) {
values.push_back(std::to_string(std::get<int32_t>(lit.value())));
}
std::ranges::sort(values);
std::string actual = "[";
for (size_t i = 0; i < values.size(); ++i) {
if (i > 0) actual += ", ";
actual += values[i];
}
actual += "]";
EXPECT_EQ(actual, expected_literal);
} else {
// For other operations, check single literal
const auto& literal_predicate =
internal::checked_pointer_cast<UnboundPredicateImpl<BoundReference>>(
predicate);
auto literal = literal_predicate->literals().front();
std::string output = std::to_string(std::get<int32_t>(literal.value()));
EXPECT_EQ(output, expected_literal);
}
}
}
void AssertProjectionInclusiveValue(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Inclusive(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionValue(projection, expected_op);
}
};
TEST_F(BucketingProjectionTest, BucketIntegerStrict) {
int32_t value = 100;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int32())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
// Bind predicates first
auto not_equal_pred = Expressions::NotEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
auto less_than_pred = Expressions::LessThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto less_equal_pred = Expressions::LessThanOrEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_equal, less_equal_pred->Bind(*schema, true));
auto greater_than_pred = Expressions::GreaterThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_than, greater_than_pred->Bind(*schema, true));
auto greater_equal_pred = Expressions::GreaterThanOrEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_equal,
greater_equal_pred->Bind(*schema, true));
// The bucket number of 100 with 10 buckets is 6
AssertProjectionStrict(*spec, *schema, bound_not_equal, Expression::Operation::kNotEq,
"6");
AssertProjectionStrictValue(*spec, *schema, bound_equal, Expression::Operation::kFalse);
AssertProjectionStrictValue(*spec, *schema, bound_less_than,
Expression::Operation::kFalse);
AssertProjectionStrictValue(*spec, *schema, bound_less_equal,
Expression::Operation::kFalse);
AssertProjectionStrictValue(*spec, *schema, bound_greater_than,
Expression::Operation::kFalse);
AssertProjectionStrictValue(*spec, *schema, bound_greater_equal,
Expression::Operation::kFalse);
}
TEST_F(BucketingProjectionTest, BucketIntegerInclusive) {
int32_t value = 100;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int32())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
// Bind predicates first
auto equal_pred = Expressions::Equal("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
auto not_equal_pred = Expressions::NotEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
auto less_than_pred = Expressions::LessThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto less_equal_pred = Expressions::LessThanOrEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_equal, less_equal_pred->Bind(*schema, true));
auto greater_than_pred = Expressions::GreaterThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_than, greater_than_pred->Bind(*schema, true));
auto greater_equal_pred = Expressions::GreaterThanOrEqual("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_equal,
greater_equal_pred->Bind(*schema, true));
// The bucket number of 100 with 10 buckets is 6
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq, "6");
AssertProjectionInclusiveValue(*spec, *schema, bound_not_equal,
Expression::Operation::kTrue);
AssertProjectionInclusiveValue(*spec, *schema, bound_less_than,
Expression::Operation::kTrue);
AssertProjectionInclusiveValue(*spec, *schema, bound_less_equal,
Expression::Operation::kTrue);
AssertProjectionInclusiveValue(*spec, *schema, bound_greater_than,
Expression::Operation::kTrue);
AssertProjectionInclusiveValue(*spec, *schema, bound_greater_equal,
Expression::Operation::kTrue);
}
TEST_F(BucketingProjectionTest, BucketLongStrict) {
int64_t value = 100L;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int64())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto not_equal_pred = Expressions::NotEqual("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
// The bucket number of 100 with 10 buckets is 6
AssertProjectionStrict(*spec, *schema, bound_not_equal, Expression::Operation::kNotEq,
"6");
AssertProjectionStrictValue(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(BucketingProjectionTest, BucketLongInclusive) {
int64_t value = 100L;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int64())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto equal_pred = Expressions::Equal("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
auto not_equal_pred = Expressions::NotEqual("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
// The bucket number of 100 with 10 buckets is 6
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq, "6");
AssertProjectionInclusiveValue(*spec, *schema, bound_not_equal,
Expression::Operation::kTrue);
}
TEST_F(BucketingProjectionTest, BucketStringStrict) {
std::string value = "abcdefg";
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", string())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto not_equal_pred = Expressions::NotEqual("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
// The bucket number of "abcdefg" with 10 buckets is 4
AssertProjectionStrict(*spec, *schema, bound_not_equal, Expression::Operation::kNotEq,
"4");
AssertProjectionStrictValue(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(BucketingProjectionTest, BucketStringInclusive) {
std::string value = "abcdefg";
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", string())}, 0);
auto bucket_transform = Transform::Bucket(10);
PartitionField pt_field(1, 1000, "value_bucket", bucket_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto equal_pred = Expressions::Equal("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
auto not_equal_pred = Expressions::NotEqual("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_not_equal, not_equal_pred->Bind(*schema, true));
// The bucket number of "abcdefg" with 10 buckets is 4
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq, "4");
AssertProjectionInclusiveValue(*spec, *schema, bound_not_equal,
Expression::Operation::kTrue);
}
// Date projection tests
class DateProjectionTest : public ::testing::Test {
protected:
void AssertProjectionStrict(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Strict(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
void AssertProjectionInclusive(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Inclusive(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
};
TEST_F(DateProjectionTest, DayStrict) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto day_transform = Transform::Day();
PartitionField pt_field(1, 1000, "date_day", day_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto less_equal_pred = Expressions::LessThanOrEqual("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_equal, less_equal_pred->Bind(*schema, true));
auto greater_than_pred = Expressions::GreaterThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_than, greater_than_pred->Bind(*schema, true));
auto greater_equal_pred =
Expressions::GreaterThanOrEqual("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_equal,
greater_equal_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_less_equal, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_greater_than, Expression::Operation::kGt);
AssertProjectionStrict(*spec, *schema, bound_greater_equal, Expression::Operation::kGt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(DateProjectionTest, DayInclusive) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto day_transform = Transform::Day();
PartitionField pt_field(1, 1000, "date_day", day_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto less_equal_pred = Expressions::LessThanOrEqual("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_equal, less_equal_pred->Bind(*schema, true));
auto greater_than_pred = Expressions::GreaterThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_than, greater_than_pred->Bind(*schema, true));
auto greater_equal_pred =
Expressions::GreaterThanOrEqual("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_greater_equal,
greater_equal_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_less_equal,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_greater_than,
Expression::Operation::kGtEq);
AssertProjectionInclusive(*spec, *schema, bound_greater_equal,
Expression::Operation::kGtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(DateProjectionTest, MonthStrict) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto month_transform = Transform::Month();
PartitionField pt_field(1, 1000, "date_month", month_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(DateProjectionTest, MonthInclusive) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto month_transform = Transform::Month();
PartitionField pt_field(1, 1000, "date_month", month_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(DateProjectionTest, YearStrict) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto year_transform = Transform::Year();
PartitionField pt_field(1, 1000, "date_year", year_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(DateProjectionTest, YearInclusive) {
int32_t date_value =
TemporalTestHelper::CreateDate({.year = 2017, .month = 1, .day = 1});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "date", date())}, 0);
auto year_transform = Transform::Year();
PartitionField pt_field(1, 1000, "date_year", year_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("date", Literal::Date(date_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
// Timestamp projection tests
class TimestampProjectionTest : public ::testing::Test {
protected:
void AssertProjectionStrict(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Strict(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
void AssertProjectionInclusive(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Inclusive(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
};
TEST_F(TimestampProjectionTest, DayStrict) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto day_transform = Transform::Day();
PartitionField pt_field(1, 1000, "timestamp_day", day_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TimestampProjectionTest, DayInclusive) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto day_transform = Transform::Day();
PartitionField pt_field(1, 1000, "timestamp_day", day_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(TimestampProjectionTest, MonthStrict) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto month_transform = Transform::Month();
PartitionField pt_field(1, 1000, "timestamp_month", month_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TimestampProjectionTest, MonthInclusive) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto month_transform = Transform::Month();
PartitionField pt_field(1, 1000, "timestamp_month", month_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(TimestampProjectionTest, YearStrict) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 1,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto year_transform = Transform::Year();
PartitionField pt_field(1, 1000, "timestamp_year", year_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TimestampProjectionTest, YearInclusive) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 1,
.day = 1,
.hour = 0,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto year_transform = Transform::Year();
PartitionField pt_field(1, 1000, "timestamp_year", year_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(TimestampProjectionTest, HourStrict) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 10,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto hour_transform = Transform::Hour();
PartitionField pt_field(1, 1000, "timestamp_hour", hour_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TimestampProjectionTest, HourInclusive) {
int64_t ts_value = TemporalTestHelper::CreateTimestamp({.year = 2017,
.month = 12,
.day = 1,
.hour = 10,
.minute = 0,
.second = 0,
.microsecond = 0});
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "timestamp", timestamp())},
0);
auto hour_transform = Transform::Hour();
PartitionField pt_field(1, 1000, "timestamp_hour", hour_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("timestamp", Literal::Timestamp(ts_value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
// Truncate projection tests
class TruncateProjectionTest : public ::testing::Test {
protected:
void AssertProjectionStrict(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Strict(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
void AssertProjectionInclusive(const PartitionSpec& spec, const Schema& schema,
const std::shared_ptr<Expression>& filter,
Expression::Operation expected_op) {
auto evaluator = Projections::Inclusive(spec, schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
AssertProjectionOperation(projection, expected_op);
}
};
TEST_F(TruncateProjectionTest, IntegerStrict) {
int32_t value = 100;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int32())}, 0);
auto truncate_transform = Transform::Truncate(10);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TruncateProjectionTest, IntegerInclusive) {
int32_t value = 100;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int32())}, 0);
auto truncate_transform = Transform::Truncate(10);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Int(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(TruncateProjectionTest, LongStrict) {
int64_t value = 100L;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int64())}, 0);
auto truncate_transform = Transform::Truncate(10);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TruncateProjectionTest, LongInclusive) {
int64_t value = 100L;
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", int64())}, 0);
auto truncate_transform = Transform::Truncate(10);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::Long(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
TEST_F(TruncateProjectionTest, StringStrict) {
std::string value = "abcdefg";
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", string())}, 0);
auto truncate_transform = Transform::Truncate(5);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionStrict(*spec, *schema, bound_less_than, Expression::Operation::kLt);
AssertProjectionStrict(*spec, *schema, bound_equal, Expression::Operation::kFalse);
}
TEST_F(TruncateProjectionTest, StringInclusive) {
std::string value = "abcdefg";
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "value", string())}, 0);
auto truncate_transform = Transform::Truncate(5);
PartitionField pt_field(1, 1000, "value_trunc", truncate_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(0, {pt_field}));
auto less_than_pred = Expressions::LessThan("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_less_than, less_than_pred->Bind(*schema, true));
auto equal_pred = Expressions::Equal("value", Literal::String(value));
ICEBERG_UNWRAP_OR_FAIL(auto bound_equal, equal_pred->Bind(*schema, true));
AssertProjectionInclusive(*spec, *schema, bound_less_than,
Expression::Operation::kLtEq);
AssertProjectionInclusive(*spec, *schema, bound_equal, Expression::Operation::kEq);
}
// Complex expression tests
TEST_F(ProjectionsTest, ComplexExpressionWithOr) {
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{
SchemaField::MakeRequired(1, "id", int64()),
SchemaField::MakeOptional(2, "data", string()),
SchemaField::MakeRequired(3, "hour", int32()),
SchemaField::MakeRequired(4, "dateint", int32()),
},
0);
auto identity_transform = Transform::Identity();
PartitionField pt_field(4, 1000, "dateint", identity_transform);
ICEBERG_UNWRAP_OR_FAIL(auto spec, PartitionSpec::Make(*schema, 0, {pt_field}, false));
// Create filter: dateint = 20180416 OR ((dateint = 20180415 AND hour >= 20) OR
// (dateint = 20180417 AND hour <= 4))
auto dateint_eq1 = Expressions::Equal("dateint", Literal::Int(20180416));
auto dateint_eq2 = Expressions::Equal("dateint", Literal::Int(20180415));
auto hour_ge = Expressions::GreaterThanOrEqual("hour", Literal::Int(20));
auto dateint_eq3 = Expressions::Equal("dateint", Literal::Int(20180417));
auto hour_le = Expressions::LessThanOrEqual("hour", Literal::Int(4));
auto and1 = Expressions::And(dateint_eq2, hour_ge);
auto and2 = Expressions::And(dateint_eq3, hour_le);
auto or1 = Expressions::Or(and1, and2);
auto filter = Expressions::Or(dateint_eq1, or1);
// Project
auto evaluator = Projections::Inclusive(*spec, *schema, true);
ICEBERG_UNWRAP_OR_FAIL(auto projection, evaluator->Project(filter));
// The projection should be an OR expression
// Non-partition predicates (hour) are removed, and AND expressions simplify
// Expected: dateint = 20180416 OR (dateint = 20180415 OR dateint = 20180417)
EXPECT_EQ(projection->op(), Expression::Operation::kOr);
auto or_expr = internal::checked_pointer_cast<Or>(projection);
// Left side: dateint = 20180416
auto dateint1_expr =
std::dynamic_pointer_cast<UnboundPredicateImpl<BoundReference>>(or_expr->left());
EXPECT_EQ(dateint1_expr->reference()->name(), "dateint");
EXPECT_EQ(dateint1_expr->op(), Expression::Operation::kEq);
EXPECT_EQ(dateint1_expr->literals().front(), Literal::Int(20180416));
// Right side: OR of the two dateint predicates (AND expressions simplified)
auto or1_expr = internal::checked_pointer_cast<Or>(or_expr->right());
EXPECT_EQ(or1_expr->op(), Expression::Operation::kOr);
// Left of inner OR: dateint = 20180415 (simplified from AND with hour >= 20)
auto dateint2_expr =
std::dynamic_pointer_cast<UnboundPredicateImpl<BoundReference>>(or1_expr->left());
EXPECT_EQ(dateint2_expr->reference()->name(), "dateint");
EXPECT_EQ(dateint2_expr->op(), Expression::Operation::kEq);
EXPECT_EQ(dateint2_expr->literals().front(), Literal::Int(20180415));
// Right of inner OR: dateint = 20180417 (simplified from AND with hour <= 4)
auto dateint3_expr =
std::dynamic_pointer_cast<UnboundPredicateImpl<BoundReference>>(or1_expr->right());
EXPECT_EQ(dateint3_expr->reference()->name(), "dateint");
EXPECT_EQ(dateint3_expr->op(), Expression::Operation::kEq);
EXPECT_EQ(dateint3_expr->literals().front(), Literal::Int(20180417));
}
} // namespace iceberg