blob: f1543dd56db28d51196fc36c00ee82f3bf994aad [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "iceberg/util/struct_like_set.h"
#include <limits>
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"
namespace iceberg {
class SimpleStructLike : public StructLike {
public:
explicit SimpleStructLike(std::vector<Scalar> fields) : fields_(std::move(fields)) {}
Result<Scalar> GetField(size_t pos) const override {
if (pos >= fields_.size()) {
return NotFound("field position {} out of range [0, {})", pos, fields_.size());
}
return fields_[pos];
}
size_t num_fields() const override { return fields_.size(); }
void SetField(size_t pos, Scalar value) { fields_[pos] = std::move(value); }
private:
std::vector<Scalar> fields_;
};
class SimpleArrayLike : public ArrayLike {
public:
explicit SimpleArrayLike(std::vector<Scalar> elements)
: elements_(std::move(elements)) {}
Result<Scalar> GetElement(size_t pos) const override {
if (pos >= elements_.size()) {
return NotFound("element position {} out of range [0, {})", pos, elements_.size());
}
return elements_[pos];
}
size_t size() const override { return elements_.size(); }
private:
std::vector<Scalar> elements_;
};
class SimpleMapLike : public MapLike {
public:
SimpleMapLike(std::vector<Scalar> keys, std::vector<Scalar> values)
: keys_(std::move(keys)), values_(std::move(values)) {}
Result<Scalar> GetKey(size_t pos) const override {
if (pos >= keys_.size()) {
return NotFound("key position {} out of range [0, {})", pos, keys_.size());
}
return keys_[pos];
}
Result<Scalar> GetValue(size_t pos) const override {
if (pos >= values_.size()) {
return NotFound("value position {} out of range [0, {})", pos, values_.size());
}
return values_[pos];
}
size_t size() const override { return keys_.size(); }
private:
std::vector<Scalar> keys_;
std::vector<Scalar> values_;
};
class FailingStructLike : public StructLike {
public:
explicit FailingStructLike(size_t num_fields) : num_fields_(num_fields) {}
Result<Scalar> GetField(size_t pos) const override {
return NotFound("boom at field {}", pos);
}
size_t num_fields() const override { return num_fields_; }
private:
size_t num_fields_;
};
StructType MakeStructType(
std::vector<std::pair<std::string, std::shared_ptr<Type>>> fields) {
std::vector<SchemaField> schema_fields;
schema_fields.reserve(fields.size());
int32_t id = 1;
for (auto& [name, type] : fields) {
schema_fields.push_back(SchemaField::MakeOptional(id++, name, std::move(type)));
}
return StructType(std::move(schema_fields));
}
TEST(StructLikeSetTest, EmptySet) {
auto type = MakeStructType({{"id", int32()}});
StructLikeSet set(type);
EXPECT_TRUE(set.IsEmpty());
EXPECT_EQ(set.Size(), 0);
SimpleStructLike row({Scalar{int32_t{1}}});
EXPECT_THAT(set.Contains(row), HasValue(::testing::Eq(false)));
}
TEST(StructLikeSetTest, InsertAndContains) {
auto type = MakeStructType({{"id", int32()}, {"name", string()}});
StructLikeSet set(type);
std::string name1 = "alice";
std::string name2 = "bob";
SimpleStructLike row1({Scalar{int32_t{1}}, Scalar{std::string_view(name1)}});
SimpleStructLike row2({Scalar{int32_t{2}}, Scalar{std::string_view(name2)}});
ASSERT_THAT(set.Insert(row1), IsOk());
ASSERT_THAT(set.Insert(row2), IsOk());
EXPECT_EQ(set.Size(), 2);
EXPECT_FALSE(set.IsEmpty());
EXPECT_THAT(set.Contains(row1), HasValue(::testing::Eq(true)));
EXPECT_THAT(set.Contains(row2), HasValue(::testing::Eq(true)));
// Row not in the set
std::string name3 = "charlie";
SimpleStructLike row3({Scalar{int32_t{3}}, Scalar{std::string_view(name3)}});
EXPECT_THAT(set.Contains(row3), HasValue(::testing::Eq(false)));
}
TEST(StructLikeSetTest, InsertAndContainsWithCustomArenaInitialSize) {
auto type = MakeStructType({{"id", int32()}, {"name", string()}});
StructLikeSet set(type, 8);
std::string name = "alice";
SimpleStructLike row({Scalar{int32_t{1}}, Scalar{std::string_view(name)}});
ASSERT_THAT(set.Insert(row), IsOk());
EXPECT_THAT(set.Contains(row), HasValue(::testing::Eq(true)));
}
TEST(StructLikeSetTest, DuplicateInsert) {
auto type = MakeStructType({{"id", int32()}});
StructLikeSet set(type);
SimpleStructLike row({Scalar{int32_t{42}}});
ASSERT_THAT(set.Insert(row), IsOk());
EXPECT_EQ(set.Size(), 1);
// Duplicate insertion should not increase size
ASSERT_THAT(set.Insert(row), IsOk());
EXPECT_EQ(set.Size(), 1);
}
TEST(StructLikeSetTest, FieldsWithNulls) {
auto type = MakeStructType({{"id", int32()}, {"data", int64()}});
StructLikeSet set(type);
// Row with null in second field
SimpleStructLike row1({Scalar{int32_t{1}}, Scalar{std::monostate{}}});
SimpleStructLike row2({Scalar{int32_t{2}}, Scalar{std::monostate{}}});
ASSERT_THAT(set.Insert(row1), IsOk());
ASSERT_THAT(set.Insert(row2), IsOk());
EXPECT_EQ(set.Size(), 2);
EXPECT_THAT(set.Contains(row1), HasValue(::testing::Eq(true)));
EXPECT_THAT(set.Contains(row2), HasValue(::testing::Eq(true)));
// Same key as row1 — should match
SimpleStructLike row1_copy({Scalar{int32_t{1}}, Scalar{std::monostate{}}});
EXPECT_THAT(set.Contains(row1_copy), HasValue(::testing::Eq(true)));
}
TEST(StructLikeSetTest, StringFieldOwnership) {
auto type = MakeStructType({{"name", std::make_shared<StringType>()}});
StructLikeSet set(type);
// Insert with a temporary string that will be destroyed
{
std::string temp = "temporary_string_data";
SimpleStructLike row({Scalar{std::string_view(temp)}});
ASSERT_THAT(set.Insert(row), IsOk());
}
// temp is destroyed here — arena should hold the copy
EXPECT_EQ(set.Size(), 1);
// Look up with a new string that has the same content
std::string lookup = "temporary_string_data";
SimpleStructLike lookup_row({Scalar{std::string_view(lookup)}});
EXPECT_THAT(set.Contains(lookup_row), HasValue(::testing::Eq(true)));
}
TEST(StructLikeSetTest, MultipleTypes) {
auto type = MakeStructType({{"b", boolean()},
{"i", int32()},
{"l", int64()},
{"f", float32()},
{"d", float64()},
{"s", string()},
{"dt", date()}});
StructLikeSet set(type);
std::string str = "hello";
SimpleStructLike row({Scalar{true}, Scalar{int32_t{1}}, Scalar{int64_t{2}},
Scalar{1.0f}, Scalar{2.0}, Scalar{std::string_view(str)},
Scalar{int32_t{19000}}});
ASSERT_THAT(set.Insert(row), IsOk());
EXPECT_THAT(set.Contains(row), HasValue(::testing::Eq(true)));
// Different values → not found
SimpleStructLike row2({Scalar{false}, Scalar{int32_t{1}}, Scalar{int64_t{2}},
Scalar{1.0f}, Scalar{2.0}, Scalar{std::string_view(str)},
Scalar{int32_t{19000}}});
EXPECT_THAT(set.Contains(row2), HasValue(::testing::Eq(false)));
}
TEST(StructLikeSetTest, NestedStruct) {
auto inner_type = struct_({SchemaField::MakeOptional(10, "x", int32()),
SchemaField::MakeOptional(11, "y", string())});
auto outer_type = MakeStructType({{"id", int32()}, {"nested", inner_type}});
StructLikeSet set(outer_type);
// Create nested StructLike
std::string inner_str = "nested_value";
auto inner = std::make_shared<SimpleStructLike>(
std::vector<Scalar>{Scalar{int32_t{10}}, Scalar{std::string_view(inner_str)}});
SimpleStructLike row({Scalar{int32_t{1}}, Scalar{std::shared_ptr<StructLike>(inner)}});
ASSERT_THAT(set.Insert(row), IsOk());
EXPECT_EQ(set.Size(), 1);
// Look up with same nested content (different object)
std::string inner_str2 = "nested_value";
auto inner2 = std::make_shared<SimpleStructLike>(
std::vector<Scalar>{Scalar{int32_t{10}}, Scalar{std::string_view(inner_str2)}});
SimpleStructLike lookup(
{Scalar{int32_t{1}}, Scalar{std::shared_ptr<StructLike>(inner2)}});
EXPECT_THAT(set.Contains(lookup), HasValue(::testing::Eq(true)));
// Different nested content → not found
std::string inner_str3 = "different";
auto inner3 = std::make_shared<SimpleStructLike>(
std::vector<Scalar>{Scalar{int32_t{10}}, Scalar{std::string_view(inner_str3)}});
SimpleStructLike different(
{Scalar{int32_t{1}}, Scalar{std::shared_ptr<StructLike>(inner3)}});
EXPECT_THAT(set.Contains(different), HasValue(::testing::Eq(false)));
}
TEST(StructLikeSetTest, NestedStructOwnership) {
auto inner_type = struct_({SchemaField::MakeOptional(10, "s", string())});
auto outer_type = MakeStructType({{"nested", inner_type}});
StructLikeSet set(outer_type);
// Insert with temporary inner data
{
std::string temp = "will_be_destroyed";
auto inner = std::make_shared<SimpleStructLike>(
std::vector<Scalar>{Scalar{std::string_view(temp)}});
SimpleStructLike row({Scalar{std::shared_ptr<StructLike>(inner)}});
ASSERT_THAT(set.Insert(row), IsOk());
}
// temp and inner are destroyed here. Arena should hold copies.
EXPECT_EQ(set.Size(), 1);
// Look up with new identical content
std::string lookup_str = "will_be_destroyed";
auto inner2 = std::make_shared<SimpleStructLike>(
std::vector<Scalar>{Scalar{std::string_view(lookup_str)}});
SimpleStructLike lookup({Scalar{std::shared_ptr<StructLike>(inner2)}});
EXPECT_THAT(set.Contains(lookup), HasValue(::testing::Eq(true)));
}
TEST(StructLikeSetTest, AllNullRow) {
auto type = MakeStructType({{"a", int32()}, {"b", string()}});
StructLikeSet set(type);
SimpleStructLike null_row({Scalar{std::monostate{}}, Scalar{std::monostate{}}});
ASSERT_THAT(set.Insert(null_row), IsOk());
EXPECT_EQ(set.Size(), 1);
EXPECT_THAT(set.Contains(null_row), HasValue(::testing::Eq(true)));
// Duplicate null row
SimpleStructLike null_row2({Scalar{std::monostate{}}, Scalar{std::monostate{}}});
ASSERT_THAT(set.Insert(null_row2), IsOk());
EXPECT_EQ(set.Size(), 1);
}
TEST(StructLikeSetTest, ContainsPropagatesFieldAccessError) {
auto type = MakeStructType({{"id", int32()}});
StructLikeSet set(type);
FailingStructLike row(1);
EXPECT_THAT(set.Contains(row), IsError(ErrorKind::kNotFound));
}
TEST(StructLikeSetTest, InsertPropagatesFieldAccessError) {
auto type = MakeStructType({{"id", int32()}});
StructLikeSet set(type);
FailingStructLike row(1);
EXPECT_THAT(set.Insert(row), IsError(ErrorKind::kNotFound));
}
TEST(StructLikeSetTest, InsertRejectsFieldCountMismatch) {
auto type = MakeStructType({{"id", int32()}, {"name", string()}});
StructLikeSet set(type);
SimpleStructLike row({Scalar{int32_t{1}}});
EXPECT_THAT(set.Insert(row), IsError(ErrorKind::kInvalidArgument));
}
TEST(StructLikeSetTest, ContainsRejectsFieldTypeMismatch) {
auto type = MakeStructType({{"id", int32()}});
StructLikeSet set(type);
SimpleStructLike row({Scalar{std::string_view("not_an_int")}});
EXPECT_THAT(set.Contains(row), IsError(ErrorKind::kInvalidArgument));
}
TEST(StructLikeSetTest, FloatAndDoubleFollowJavaEqualitySemantics) {
auto type = MakeStructType({{"f", float32()}, {"d", float64()}});
StructLikeSet set(type);
float float_nan = std::numeric_limits<float>::quiet_NaN();
double double_nan = std::numeric_limits<double>::quiet_NaN();
SimpleStructLike nan_row({Scalar{float_nan}, Scalar{double_nan}});
ASSERT_THAT(set.Insert(nan_row), IsOk());
float another_float_nan = std::numeric_limits<float>::signaling_NaN();
double another_double_nan = std::numeric_limits<double>::signaling_NaN();
SimpleStructLike lookup_nan({Scalar{another_float_nan}, Scalar{another_double_nan}});
EXPECT_THAT(set.Contains(lookup_nan), HasValue(::testing::Eq(true)));
SimpleStructLike neg_zero({Scalar{-0.0f}, Scalar{-0.0}});
SimpleStructLike pos_zero({Scalar{0.0f}, Scalar{0.0}});
ASSERT_THAT(set.Insert(neg_zero), IsOk());
EXPECT_THAT(set.Contains(pos_zero), HasValue(::testing::Eq(false)));
ASSERT_THAT(set.Insert(pos_zero), IsOk());
EXPECT_EQ(set.Size(), 3);
}
TEST(StructLikeSetTest, NestedMapIsHashedAndComparedRecursively) {
auto map_type =
std::make_shared<MapType>(SchemaField::MakeRequired(10, "key", string()),
SchemaField::MakeOptional(11, "value", int32()));
auto type = MakeStructType({{"m", map_type}});
StructLikeSet set(type);
std::string key1 = "a";
auto map1 =
std::make_shared<SimpleMapLike>(std::vector<Scalar>{Scalar{std::string_view(key1)}},
std::vector<Scalar>{Scalar{int32_t{7}}});
SimpleStructLike row({Scalar{std::shared_ptr<MapLike>(map1)}});
ASSERT_THAT(set.Insert(row), IsOk());
std::string key2 = "a";
auto map2 =
std::make_shared<SimpleMapLike>(std::vector<Scalar>{Scalar{std::string_view(key2)}},
std::vector<Scalar>{Scalar{int32_t{7}}});
SimpleStructLike same({Scalar{std::shared_ptr<MapLike>(map2)}});
EXPECT_THAT(set.Contains(same), HasValue(::testing::Eq(true)));
std::string key3 = "b";
auto map3 =
std::make_shared<SimpleMapLike>(std::vector<Scalar>{Scalar{std::string_view(key3)}},
std::vector<Scalar>{Scalar{int32_t{7}}});
SimpleStructLike different({Scalar{std::shared_ptr<MapLike>(map3)}});
EXPECT_THAT(set.Contains(different), HasValue(::testing::Eq(false)));
}
} // namespace iceberg