blob: 798f41e1f02c877b2985509e82ccda188427f688 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "iceberg/expression/evaluator.h"
#include <cstddef>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include <arrow/array.h>
#include <arrow/array/builder_primitive.h>
#include <arrow/c/bridge.h>
#include <arrow/json/from_string.h>
#include <arrow/testing/gtest_util.h>
#include <arrow/type.h>
#include <gtest/gtest.h>
#include "iceberg/arrow_c_data_guard_internal.h"
#include "iceberg/expression/expressions.h"
#include "iceberg/expression/literal.h"
#include "iceberg/result.h"
#include "iceberg/row/arrow_array_wrapper.h"
#include "iceberg/schema.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"
namespace iceberg {
class EvaluatorTest : public ::testing::Test {
protected:
void SetUp() override {
schema_ = std::make_unique<Schema>(std::vector<SchemaField>{
SchemaField::MakeRequired(13, "x", int32()),
SchemaField::MakeRequired(14, "y", float64()),
SchemaField::MakeOptional(15, "z", int32()),
SchemaField::MakeOptional(
16, "s1",
std::make_shared<StructType>(
std::vector<SchemaField>{SchemaField::MakeRequired(
17, "s2",
std::make_shared<StructType>(
std::vector<SchemaField>{SchemaField::MakeRequired(
18, "s3",
std::make_shared<StructType>(
std::vector<SchemaField>{SchemaField::MakeRequired(
19, "s4",
std::make_shared<StructType>(std::vector<SchemaField>{
SchemaField::MakeRequired(20, "i",
int32())}))}))}))})),
SchemaField::MakeOptional(
21, "s5",
std::make_shared<StructType>(
std::vector<SchemaField>{SchemaField::MakeRequired(
22, "s6",
std::make_shared<StructType>(std::vector<SchemaField>{
SchemaField::MakeRequired(23, "f", float32())}))}))});
arrow_data_type_ = ::arrow::struct_({
::arrow::field("x", ::arrow::int32(), /*nullable=*/false),
::arrow::field("y", ::arrow::float64(), /*nullable=*/false),
::arrow::field("z", ::arrow::int32(), /*nullable=*/true),
::arrow::field("s1",
::arrow::struct_({::arrow::field(
"s2",
::arrow::struct_({::arrow::field(
"s3",
::arrow::struct_({::arrow::field(
"s4",
::arrow::struct_({::arrow::field("i", ::arrow::int32(),
/*nullable=*/false)}),
/*nullable=*/false)}),
/*nullable=*/false)}),
/*nullable=*/false)}),
/*nullable=*/true),
::arrow::field("s5",
::arrow::struct_({::arrow::field(
"s6",
::arrow::struct_({::arrow::field("f", ::arrow::float32(),
/*nullable=*/false)}),
/*nullable=*/false)}),
/*nullable=*/true),
});
ASSERT_TRUE(::arrow::ExportType(*arrow_data_type_, &arrow_c_schema_).ok());
}
void TearDown() override {
if (arrow_c_schema_.release != nullptr) {
ArrowSchemaRelease(&arrow_c_schema_);
}
}
void TestData(const std::string& json_data, Evaluator& evaluator,
bool expected_result) {
auto arrow_array =
::arrow::json::ArrayFromJSONString(arrow_data_type_, json_data).ValueOrDie();
ASSERT_EQ(arrow_array->length(), 1)
<< "Expected 1 row, got " << arrow_array->length();
ArrowArray arrow_c_array;
internal::ArrowArrayGuard array_guard(&arrow_c_array);
ASSERT_TRUE(::arrow::ExportArray(*arrow_array, &arrow_c_array).ok());
ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
ArrowArrayStructLike::Make(arrow_c_schema_, arrow_c_array, 0));
ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator.Evaluate(*struct_like));
ASSERT_EQ(result, expected_result);
}
std::unique_ptr<Schema> schema_;
std::shared_ptr<::arrow::DataType> arrow_data_type_;
ArrowSchema arrow_c_schema_;
};
TEST_F(EvaluatorTest, LessThan) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::LessThan("x", Literal::Int(7))));
// 7 < 7 => false
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 6 < 7 => true
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::LessThan("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 < 7 => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// 6 < 7 => true
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, LessThanOrEqual) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::LessThanOrEqual("x", Literal::Int(7))));
// 7 <= 7 => true
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 6 <= 7 => true
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 8 <= 7 => false
TestData(R"([{"x": 8, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_,
Expressions::LessThanOrEqual("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 <= 7 => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 6 <= 7 => true
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, true);
// 8 <= 7 => false
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, false);
}
TEST_F(EvaluatorTest, GreaterThan) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::GreaterThan("x", Literal::Int(7))));
// 7 > 7 => false
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 6 > 7 => false
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 8 > 7 => true
TestData(R"([{"x": 8, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_,
Expressions::GreaterThan("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 > 7 => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// 6 > 7 => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, false);
// 8 > 7 => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, GreaterThanOrEqual) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::GreaterThanOrEqual("x", Literal::Int(7))));
// 7 >= 7 => true
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 6 >= 7 => false
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 8 >= 7 => true
TestData(R"([{"x": 8, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_,
Expressions::GreaterThanOrEqual("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 >= 7 => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 6 >= 7 => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, false);
// 8 >= 7 => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, Equal) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::Equal("x", Literal::Int(7))));
// 7 == 7 => true
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 6 == 7 => false
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::Equal("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 == 7 => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 6 == 7 => false
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, false);
}
TEST_F(EvaluatorTest, NotEqual) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::NotEqual("x", Literal::Int(7))));
// 7 != 7 => false
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 6 != 7 => true
TestData(R"([{"x": 6, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::NotEqual("s1.s2.s3.s4.i", Literal::Int(7))));
// 7 != 7 => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// 6 != 7 => true
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, StartsWith) {
auto string_schema = std::make_unique<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(24, "s", string())});
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*string_schema, Expressions::StartsWith("s", "abc")));
auto arrow_string_type = ::arrow::struct_({::arrow::field("s", ::arrow::utf8())});
auto arrow_string_array = ::arrow::json::ArrayFromJSONString(arrow_string_type, R"([
{"s": "abc"},
{"s": "xabc"},
{"s": "Abc"},
{"s": "a"},
{"s": "abcd"},
{"s": null}
])")
.ValueOrDie();
ArrowSchema c_schema;
ArrowArray c_array;
internal::ArrowSchemaGuard schema_guard(&c_schema);
internal::ArrowArrayGuard array_guard(&c_array);
ASSERT_TRUE(::arrow::ExportType(*arrow_string_type, &c_schema).ok());
ASSERT_TRUE(::arrow::ExportArray(*arrow_string_array, &c_array).ok());
ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
ArrowArrayStructLike::Make(c_schema, c_array, /*row_index=*/0));
// abc startsWith abc => true
ASSERT_THAT(struct_like->Reset(0), IsOk());
ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// xabc startsWith abc => false
ASSERT_THAT(struct_like->Reset(1), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// Abc startsWith abc => false
ASSERT_THAT(struct_like->Reset(2), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// a startsWith abc => false
ASSERT_THAT(struct_like->Reset(3), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// abcd startsWith abc => true
ASSERT_THAT(struct_like->Reset(4), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// null startsWith abc => false
ASSERT_THAT(struct_like->Reset(5), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
}
TEST_F(EvaluatorTest, NotStartsWith) {
auto string_schema = std::make_unique<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(24, "s", string())});
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*string_schema, Expressions::NotStartsWith("s", "abc")));
auto arrow_string_type = ::arrow::struct_({::arrow::field("s", ::arrow::utf8())});
auto arrow_string_array = ::arrow::json::ArrayFromJSONString(arrow_string_type, R"([
{"s": "abc"},
{"s": "xabc"},
{"s": "Abc"},
{"s": "a"},
{"s": "abcde"},
{"s": "Abcde"}
])")
.ValueOrDie();
ArrowSchema c_schema;
ArrowArray c_array;
internal::ArrowSchemaGuard schema_guard(&c_schema);
internal::ArrowArrayGuard array_guard(&c_array);
ASSERT_TRUE(::arrow::ExportType(*arrow_string_type, &c_schema).ok());
ASSERT_TRUE(::arrow::ExportArray(*arrow_string_array, &c_array).ok());
ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
ArrowArrayStructLike::Make(c_schema, c_array, /*row_index=*/0));
// abc notStartsWith abc => false
ASSERT_THAT(struct_like->Reset(0), IsOk());
ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// xabc notStartsWith abc => true
ASSERT_THAT(struct_like->Reset(1), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// Abc notStartsWith abc => true
ASSERT_THAT(struct_like->Reset(2), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// a notStartsWith abc => true
ASSERT_THAT(struct_like->Reset(3), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// abcde notStartsWith abc => false
ASSERT_THAT(struct_like->Reset(4), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// Abcde notStartsWith abc => true
ASSERT_THAT(struct_like->Reset(5), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
}
TEST_F(EvaluatorTest, AlwaysTrue) {
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*schema_, Expressions::AlwaysTrue()));
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
}
TEST_F(EvaluatorTest, AlwaysFalse) {
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*schema_, Expressions::AlwaysFalse()));
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
}
TEST_F(EvaluatorTest, IsNull) {
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*schema_, Expressions::IsNull("z")));
// null is null => true
TestData(R"([{"x": 1, "y": 2.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 3 is not null => false
TestData(R"([{"x": 1, "y": 2.0, "z": 3, "s1": null, "s5": null}])", *evaluator, false);
ICEBERG_UNWRAP_OR_FAIL(auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::IsNull("s1.s2.s3.s4.i")));
// 3 is not null => false
TestData(
R"([{"x": 1, "y": 2.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 3}}}}, "s5": null}])",
*struct_evaluator, false);
}
TEST_F(EvaluatorTest, NotNull) {
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*schema_, Expressions::NotNull("z")));
// null is null => false
TestData(R"([{"x": 1, "y": 2.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 3 is not null => true
TestData(R"([{"x": 1, "y": 2.0, "z": 3, "s1": null, "s5": null}])", *evaluator, true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::NotNull("s1.s2.s3.s4.i")));
// 3 is not null => true
TestData(
R"([{"x": 1, "y": 2.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 3}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, IsNaN) {
auto double_schema = std::make_unique<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(25, "d", float64())});
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*double_schema, Expressions::IsNaN("d")));
auto arrow_double_type = ::arrow::struct_({::arrow::field("d", ::arrow::float64())});
// Build array with NaN and regular values
::arrow::DoubleBuilder builder;
ASSERT_TRUE(builder.Append(std::numeric_limits<double>::quiet_NaN()).ok());
ASSERT_TRUE(builder.Append(2.0).ok());
ASSERT_TRUE(builder.Append(std::numeric_limits<double>::infinity()).ok());
auto double_array = builder.Finish().ValueOrDie();
auto struct_array =
::arrow::StructArray::Make({double_array}, {arrow_double_type->field(0)})
.ValueOrDie();
ArrowSchema c_schema;
ArrowArray c_array;
internal::ArrowSchemaGuard schema_guard(&c_schema);
internal::ArrowArrayGuard array_guard(&c_array);
ASSERT_TRUE(::arrow::ExportType(*arrow_double_type, &c_schema).ok());
ASSERT_TRUE(::arrow::ExportArray(*struct_array, &c_array).ok());
ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
ArrowArrayStructLike::Make(c_schema, c_array, /*row_index=*/0));
// NaN is NaN => true
ASSERT_THAT(struct_like->Reset(0), IsOk());
ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// 2.0 is not NaN => false
ASSERT_THAT(struct_like->Reset(1), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// Infinity is not NaN => false
ASSERT_THAT(struct_like->Reset(2), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
}
TEST_F(EvaluatorTest, NotNaN) {
auto double_schema = std::make_unique<Schema>(
std::vector<SchemaField>{SchemaField::MakeRequired(25, "d", float64())});
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*double_schema, Expressions::NotNaN("d")));
auto arrow_double_type = ::arrow::struct_({::arrow::field("d", ::arrow::float64())});
// Build array with NaN and regular values
::arrow::DoubleBuilder builder;
ASSERT_TRUE(builder.Append(std::numeric_limits<double>::quiet_NaN()).ok());
ASSERT_TRUE(builder.Append(2.0).ok());
ASSERT_TRUE(builder.Append(std::numeric_limits<double>::infinity()).ok());
auto double_array = builder.Finish().ValueOrDie();
auto struct_array =
::arrow::StructArray::Make({double_array}, {arrow_double_type->field(0)})
.ValueOrDie();
ArrowSchema c_schema;
ArrowArray c_array;
internal::ArrowSchemaGuard schema_guard(&c_schema);
internal::ArrowArrayGuard array_guard(&c_array);
ASSERT_TRUE(::arrow::ExportType(*arrow_double_type, &c_schema).ok());
ASSERT_TRUE(::arrow::ExportArray(*struct_array, &c_array).ok());
ICEBERG_UNWRAP_OR_FAIL(auto struct_like,
ArrowArrayStructLike::Make(c_schema, c_array, /*row_index=*/0));
// NaN is NaN => false
ASSERT_THAT(struct_like->Reset(0), IsOk());
ICEBERG_UNWRAP_OR_FAIL(auto result, evaluator->Evaluate(*struct_like));
EXPECT_FALSE(result);
// 2.0 is not NaN => true
ASSERT_THAT(struct_like->Reset(1), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
// Infinity is not NaN => true
ASSERT_THAT(struct_like->Reset(2), IsOk());
ICEBERG_UNWRAP_OR_FAIL(result, evaluator->Evaluate(*struct_like));
EXPECT_TRUE(result);
}
TEST_F(EvaluatorTest, And) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::And(Expressions::Equal("x", Literal::Int(7)),
Expressions::NotNull("z"))));
// 7, 3 => true
TestData(R"([{"x": 7, "y": 0.0, "z": 3, "s1": null, "s5": null}])", *evaluator, true);
// 8, 3 => false
TestData(R"([{"x": 8, "y": 0.0, "z": 3, "s1": null, "s5": null}])", *evaluator, false);
// 7, null => false
TestData(R"([{"x": 7, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 8, null => false
TestData(R"([{"x": 8, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(
*schema_, Expressions::And(Expressions::Equal("s1.s2.s3.s4.i", Literal::Int(7)),
Expressions::NotNull("s1.s2.s3.s4.i"))));
// 7, 7 => true
TestData(
R"([{"x": 7, "y": 0.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 8, 8 => false
TestData(
R"([{"x": 8, "y": 0.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, false);
// 8, 8 => false (different x value)
TestData(
R"([{"x": 8, "y": 0.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, false);
}
TEST_F(EvaluatorTest, Or) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_, Expressions::Or(Expressions::Equal("x", Literal::Int(7)),
Expressions::NotNull("z"))));
// 7, 3 => true
TestData(R"([{"x": 7, "y": 0.0, "z": 3, "s1": null, "s5": null}])", *evaluator, true);
// 8, 3 => true
TestData(R"([{"x": 8, "y": 0.0, "z": 3, "s1": null, "s5": null}])", *evaluator, true);
// 7, null => true
TestData(R"([{"x": 7, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 8, null => false
TestData(R"([{"x": 8, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(
*schema_, Expressions::Or(Expressions::Equal("s1.s2.s3.s4.i", Literal::Int(7)),
Expressions::NotNull("s1.s2.s3.s4.i"))));
// 7, 7 => true
TestData(
R"([{"x": 7, "y": 0.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 8, 8 => true
TestData(
R"([{"x": 8, "y": 0.0, "z": 3, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, true);
// 7, notnull => true
TestData(
R"([{"x": 7, "y": 0.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, Not) {
ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
Evaluator::Make(*schema_, Expressions::Not(Expressions::Equal(
"x", Literal::Int(7)))));
// not(7 == 7) => false
TestData(R"([{"x": 7, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// not(8 == 7) => true
TestData(R"([{"x": 8, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_, Expressions::Not(Expressions::Equal("s1.s2.s3.s4.i",
Literal::Int(7)))));
// not(7 == 7) => false
TestData(
R"([{"x": 7, "y": null, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// not(8 == 7) => true
TestData(
R"([{"x": 8, "y": null, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, CaseInsensitiveNot) {
// Use case-insensitive binding (false)
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(*schema_,
Expressions::Not(Expressions::Equal("X", Literal::Int(7))),
/*case_sensitive=*/false));
// not(7 == 7) => false
TestData(R"([{"x": 7, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// not(8 == 7) => true
TestData(R"([{"x": 8, "y": 0.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// Test with nested struct
ICEBERG_UNWRAP_OR_FAIL(auto struct_evaluator,
Evaluator::Make(*schema_,
Expressions::Not(Expressions::Equal(
"s1.s2.s3.s4.i", Literal::Int(7))),
/*case_sensitive=*/false));
// not(7 == 7) => false
TestData(
R"([{"x": 7, "y": null, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// not(8 == 7) => true
TestData(
R"([{"x": 8, "y": null, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 8}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, CaseSensitiveNot) {
// Should fail to bind with case-sensitive matching
auto result = Evaluator::Make(
*schema_, Expressions::Not(Expressions::Equal("X", Literal::Int(7))),
/*case_sensitive=*/true);
EXPECT_THAT(result, IsError(ErrorKind::kInvalidExpression));
EXPECT_THAT(result, HasErrorMessage("Cannot find field 'X'"));
}
TEST_F(EvaluatorTest, In) {
ASSERT_EQ(Expressions::In("s", {Literal::Int(7), Literal::Int(8), Literal::Int(9)})
->literals()
.size(),
size_t{3});
ASSERT_EQ(Expressions::In("s", {Literal::Int(7), Literal::Double(8.1),
Literal::Long(std::numeric_limits<int64_t>::max())})
->literals()
.size(),
size_t{3});
ASSERT_EQ(Expressions::In("s", {Literal::String("abc"), Literal::String("abd"),
Literal::String("abc")})
->literals()
.size(),
size_t{3});
ASSERT_EQ(Expressions::In("s", {Literal::Int(5)})->literals().size(), size_t{1});
ASSERT_EQ(Expressions::In("s", {Literal::Int(5), Literal::Int(5)})->literals().size(),
size_t{2});
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(
*schema_,
Expressions::In("x", {Literal::Int(7), Literal::Int(8),
Literal::Long(std::numeric_limits<int64_t>::max())})));
// 7 in [7, 8] => true
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// 9 in [7, 8] => false
TestData(R"([{"x": 9, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// Test with integer field
ICEBERG_UNWRAP_OR_FAIL(
auto integer_evaluator,
Evaluator::Make(
*schema_,
Expressions::In("x", {Literal::Long(std::numeric_limits<int64_t>::max()),
Literal::Int(std::numeric_limits<int32_t>::max()),
Literal::Long(std::numeric_limits<int64_t>::min())})));
// Integer.MAX_VALUE in [Integer.MAX_VALUE] => true
TestData(R"([{"x": 2147483647, "y": 8.0, "z": null, "s1": null, "s5": null}])",
*integer_evaluator, true);
// 6 in [Integer.MAX_VALUE] => false
TestData(R"([{"x": 6, "y": 6.8, "z": null, "s1": null, "s5": null}])",
*integer_evaluator, false);
// Test with double field
ICEBERG_UNWRAP_OR_FAIL(
auto double_evaluator,
Evaluator::Make(*schema_, Expressions::In("y", {Literal::Int(7), Literal::Int(8),
Literal::Double(9.1)})));
// 7.0 in [7, 8, 9.1] => true
TestData(R"([{"x": 0, "y": 7.0, "z": null, "s1": null, "s5": null}])",
*double_evaluator, true);
// 9.1 in [7, 8, 9.1] => true
TestData(R"([{"x": 7, "y": 9.1, "z": null, "s1": null, "s5": null}])",
*double_evaluator, true);
// 6.8 in [7, 8, 9.1] => false
TestData(R"([{"x": 6, "y": 6.8, "z": null, "s1": null, "s5": null}])",
*double_evaluator, false);
// Test with nested struct
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(*schema_,
Expressions::In("s1.s2.s3.s4.i", {Literal::Int(7), Literal::Int(8),
Literal::Int(9)})));
// 7 in [7, 8, 9] => true
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, true);
// 6 in [7, 8, 9] => false
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, false);
}
TEST_F(EvaluatorTest, NotIn) {
ICEBERG_UNWRAP_OR_FAIL(
auto evaluator,
Evaluator::Make(
*schema_,
Expressions::NotIn("x", {Literal::Int(7), Literal::Int(8),
Literal::Long(std::numeric_limits<int64_t>::max())})));
// 7 not in [7, 8] => false
TestData(R"([{"x": 7, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
false);
// 9 not in [7, 8] => true
TestData(R"([{"x": 9, "y": 8.0, "z": null, "s1": null, "s5": null}])", *evaluator,
true);
// Test with double field
ICEBERG_UNWRAP_OR_FAIL(
auto double_evaluator,
Evaluator::Make(*schema_, Expressions::NotIn("y", {Literal::Int(7), Literal::Int(8),
Literal::Double(9.1)})));
// 7.0 not in [7, 8, 9.1] => false
TestData(R"([{"x": 0, "y": 7.0, "z": null, "s1": null, "s5": null}])",
*double_evaluator, false);
// 9.1 not in [7, 8, 9.1] => false
TestData(R"([{"x": 7, "y": 9.1, "z": null, "s1": null, "s5": null}])",
*double_evaluator, false);
// 6.8 not in [7, 8, 9.1] => true
TestData(R"([{"x": 6, "y": 6.8, "z": null, "s1": null, "s5": null}])",
*double_evaluator, true);
// Test with nested struct
ICEBERG_UNWRAP_OR_FAIL(
auto struct_evaluator,
Evaluator::Make(
*schema_, Expressions::NotIn("s1.s2.s3.s4.i", {Literal::Int(7), Literal::Int(8),
Literal::Int(9)})));
// 7 not in [7, 8, 9] => false
TestData(
R"([{"x": 7, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 7}}}}, "s5": null}])",
*struct_evaluator, false);
// 6 not in [7, 8, 9] => true
TestData(
R"([{"x": 6, "y": 8.0, "z": null, "s1": {"s2": {"s3": {"s4": {"i": 6}}}}, "s5": null}])",
*struct_evaluator, true);
}
TEST_F(EvaluatorTest, InExceptions) {
{
auto result = Evaluator::Make(
*schema_,
Expressions::In("x", {Literal::Int(7), Literal::Int(8), Literal::Null(int32())}),
/*case_sensitive=*/false);
EXPECT_THAT(result, IsError(ErrorKind::kInvalidExpression));
EXPECT_THAT(result,
HasErrorMessage("Invalid value for conversion to type int: null (int)"));
}
{
auto result = Evaluator::Make(
*schema_,
Expressions::In("x", {Literal::Int(7), Literal::Int(8), Literal::Double(9.1)}),
/*case_sensitive=*/false);
EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
EXPECT_THAT(result, HasErrorMessage("Cast from Double to int is not supported"));
}
{
auto result = UnboundPredicateImpl<BoundReference>::Make(Expression::Operation::kIn,
Expressions::Ref("x"), {});
EXPECT_THAT(result, IsError(ErrorKind::kInvalidExpression));
EXPECT_THAT(result, HasErrorMessage("Cannot create IN predicate without a value"));
}
}
} // namespace iceberg