blob: 8b31897b7de89a27aef269672f4c91a3a50527cb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <initializer_list>
#include <type_traits>
#include <utility>
#include <vector>
#include "core/column/column_vector.h"
#include "core/value/timestamptz_value.h"
#include "exec/common/join_utils.h"
namespace doris {
static ColumnPtr make_int32_column(const std::vector<int32_t>& values) {
auto col = ColumnInt32::create();
for (auto v : values) {
col->insert_value(v);
}
return col;
}
static ColumnPtr make_datev2_column(const std::vector<uint32_t>& values) {
auto col = ColumnDateV2::create();
for (auto v : values) {
col->insert_value(v);
}
return col;
}
template <typename IntType>
static AsofIndexGroup<IntType> make_group(
std::initializer_list<std::pair<IntType, uint32_t>> rows) {
AsofIndexGroup<IntType> group;
for (const auto& [value, row_idx] : rows) {
group.add_row(value, row_idx);
}
group.sort_and_finalize();
return group;
}
template <typename IntType>
static void expect_sorted(const std::vector<IntType>& values) {
for (size_t i = 1; i < values.size(); ++i) {
EXPECT_LE(values[i - 1], values[i]);
}
}
static bool contains_row(uint32_t row_idx, std::initializer_list<uint32_t> candidates) {
for (uint32_t candidate : candidates) {
if (candidate == row_idx) {
return true;
}
}
return false;
}
class AsofIndexGroupFinalizeTest : public ::testing::Test {};
TEST_F(AsofIndexGroupFinalizeTest, EmptyGroup) {
AsofIndexGroup<uint32_t> group;
group.sort_and_finalize();
EXPECT_TRUE(group.entries.empty());
EXPECT_TRUE(group.asof_values.empty());
EXPECT_TRUE(group.row_indexes.empty());
}
TEST_F(AsofIndexGroupFinalizeTest, SortAndFinalizeBuildsSoA) {
auto group = make_group<uint32_t>({{40, 1}, {10, 2}, {30, 3}, {20, 4}, {50, 5}});
EXPECT_TRUE(group.entries.empty());
EXPECT_EQ(group.asof_values, (std::vector<uint32_t> {10, 20, 30, 40, 50}));
EXPECT_EQ(group.row_indexes, (std::vector<uint32_t> {2, 4, 3, 1, 5}));
EXPECT_EQ(group.values_data(), group.asof_values.data());
}
TEST_F(AsofIndexGroupFinalizeTest, SortAndFinalizeRetainsValueRowAlignment) {
std::vector<std::pair<uint32_t, uint32_t>> rows = {{30, 7}, {10, 2}, {30, 5}, {20, 9}, {10, 4}};
AsofIndexGroup<uint32_t> group;
for (const auto& [value, row_idx] : rows) {
group.add_row(value, row_idx);
}
group.sort_and_finalize();
EXPECT_TRUE(group.entries.empty());
expect_sorted(group.asof_values);
ASSERT_EQ(group.asof_values.size(), rows.size());
ASSERT_EQ(group.row_indexes.size(), rows.size());
for (size_t i = 0; i < group.row_indexes.size(); ++i) {
bool found = false;
for (const auto& [value, row_idx] : rows) {
if (row_idx == group.row_indexes[i]) {
EXPECT_EQ(value, group.asof_values[i]);
found = true;
break;
}
}
EXPECT_TRUE(found);
}
}
TEST_F(AsofIndexGroupFinalizeTest, SingleElementSkipsPdqsort) {
auto group = make_group<uint32_t>({{42, 7}});
EXPECT_TRUE(group.entries.empty());
ASSERT_EQ(group.asof_values.size(), 1U);
EXPECT_EQ(group.asof_values[0], 42U);
EXPECT_EQ(group.row_indexes[0], 7U);
}
TEST_F(AsofIndexGroupFinalizeTest, DoubleFinalizeIsIdempotent) {
auto group = make_group<uint32_t>({{20, 2}, {10, 1}});
EXPECT_EQ(group.asof_values, (std::vector<uint32_t> {10, 20}));
group.sort_and_finalize();
EXPECT_EQ(group.asof_values, (std::vector<uint32_t> {10, 20}));
EXPECT_EQ(group.row_indexes, (std::vector<uint32_t> {1, 2}));
}
TEST_F(AsofIndexGroupFinalizeTest, UInt64GroupsAreSupported) {
auto group =
make_group<uint64_t>({{30000000003ULL, 3}, {10000000001ULL, 1}, {20000000002ULL, 2}});
EXPECT_EQ(group.asof_values,
(std::vector<uint64_t> {10000000001ULL, 20000000002ULL, 30000000003ULL}));
EXPECT_EQ(group.row_indexes, (std::vector<uint32_t> {1, 2, 3}));
}
class AsofIndexGroupBoundTest : public ::testing::Test {};
TEST_F(AsofIndexGroupBoundTest, EmptyGroupBoundReturnsZero) {
AsofIndexGroup<uint32_t> group;
group.sort_and_finalize();
EXPECT_EQ(group.lower_bound(42), 0U);
EXPECT_EQ(group.upper_bound(42), 0U);
}
TEST_F(AsofIndexGroupBoundTest, UInt64LowerAndUpperBound) {
auto group = make_group<uint64_t>(
{{10000000001ULL, 1}, {20000000002ULL, 2}, {20000000002ULL, 3}, {40000000004ULL, 4}});
EXPECT_EQ(group.lower_bound(5000000000ULL), 0U);
EXPECT_EQ(group.upper_bound(5000000000ULL), 0U);
EXPECT_EQ(group.lower_bound(20000000002ULL), 1U);
EXPECT_EQ(group.upper_bound(20000000002ULL), 3U);
EXPECT_EQ(group.lower_bound(30000000000ULL), 3U);
EXPECT_EQ(group.upper_bound(30000000000ULL), 3U);
EXPECT_EQ(group.lower_bound(50000000000ULL), 4U);
EXPECT_EQ(group.upper_bound(50000000000ULL), 4U);
}
TEST_F(AsofIndexGroupBoundTest, LowerAndUpperBoundRespectDuplicates) {
auto group = make_group<uint32_t>({{10, 1}, {20, 2}, {20, 3}, {40, 4}});
EXPECT_EQ(group.lower_bound(5), 0U);
EXPECT_EQ(group.upper_bound(5), 0U);
EXPECT_EQ(group.lower_bound(20), 1U);
EXPECT_EQ(group.upper_bound(20), 3U);
EXPECT_EQ(group.lower_bound(30), 3U);
EXPECT_EQ(group.upper_bound(30), 3U);
EXPECT_EQ(group.lower_bound(50), 4U);
EXPECT_EQ(group.upper_bound(50), 4U);
}
class AsofIndexGroupMatchTest : public ::testing::Test {
protected:
AsofIndexGroup<uint32_t> group;
void SetUp() override {
group = make_group<uint32_t>({{10, 1}, {20, 2}, {30, 3}, {40, 4}, {50, 5}});
}
};
TEST_F(AsofIndexGroupMatchTest, FourInequalitiesAcrossRepresentativeValues) {
struct MatchCase {
uint32_t probe;
uint32_t ge;
uint32_t gt;
uint32_t le;
uint32_t lt;
};
std::vector<MatchCase> cases = {{5, 0, 0, 1, 1}, {10, 1, 0, 1, 2}, {25, 2, 2, 3, 3},
{30, 3, 2, 3, 4}, {50, 5, 4, 5, 0}, {100, 5, 5, 0, 0}};
for (const auto& test_case : cases) {
EXPECT_EQ((group.template find_best_match<true, false>(test_case.probe)), test_case.ge);
EXPECT_EQ((group.template find_best_match<true, true>(test_case.probe)), test_case.gt);
EXPECT_EQ((group.template find_best_match<false, false>(test_case.probe)), test_case.le);
EXPECT_EQ((group.template find_best_match<false, true>(test_case.probe)), test_case.lt);
}
}
TEST_F(AsofIndexGroupMatchTest, SingleElementGroupAcrossAllModes) {
auto single = make_group<uint32_t>({{30, 3}});
EXPECT_EQ((single.template find_best_match<true, false>(30)), 3U);
EXPECT_EQ((single.template find_best_match<true, false>(40)), 3U);
EXPECT_EQ((single.template find_best_match<true, false>(20)), 0U);
EXPECT_EQ((single.template find_best_match<true, true>(30)), 0U);
EXPECT_EQ((single.template find_best_match<true, true>(40)), 3U);
EXPECT_EQ((single.template find_best_match<false, false>(30)), 3U);
EXPECT_EQ((single.template find_best_match<false, false>(20)), 3U);
EXPECT_EQ((single.template find_best_match<false, false>(40)), 0U);
EXPECT_EQ((single.template find_best_match<false, true>(30)), 0U);
EXPECT_EQ((single.template find_best_match<false, true>(20)), 3U);
EXPECT_EQ((single.template find_best_match<false, true>(40)), 0U);
}
TEST_F(AsofIndexGroupMatchTest, AllEqualValuesAcrossAllModes) {
auto equal_group = make_group<uint32_t>({{30, 1}, {30, 2}, {30, 3}});
uint32_t row = equal_group.template find_best_match<true, false>(30);
EXPECT_TRUE(contains_row(row, {1, 2, 3}));
EXPECT_EQ((equal_group.template find_best_match<true, true>(30)), 0U);
row = equal_group.template find_best_match<true, false>(40);
EXPECT_TRUE(contains_row(row, {1, 2, 3}));
row = equal_group.template find_best_match<false, false>(30);
EXPECT_TRUE(contains_row(row, {1, 2, 3}));
EXPECT_EQ((equal_group.template find_best_match<false, true>(30)), 0U);
row = equal_group.template find_best_match<false, false>(20);
EXPECT_TRUE(contains_row(row, {1, 2, 3}));
}
TEST_F(AsofIndexGroupMatchTest, LargeGroupBinarySearch) {
AsofIndexGroup<uint32_t> large_group;
for (uint32_t i = 1; i <= 1000; ++i) {
large_group.add_row(i * 2, i);
}
large_group.sort_and_finalize();
EXPECT_EQ((large_group.template find_best_match<true, false>(1)), 0U);
EXPECT_EQ((large_group.template find_best_match<true, false>(500)), 250U);
EXPECT_EQ((large_group.template find_best_match<true, false>(999)), 499U);
EXPECT_EQ((large_group.template find_best_match<true, false>(1000)), 500U);
EXPECT_EQ((large_group.template find_best_match<true, false>(1001)), 500U);
EXPECT_EQ((large_group.template find_best_match<true, false>(2000)), 1000U);
EXPECT_EQ((large_group.template find_best_match<true, false>(2001)), 1000U);
EXPECT_EQ((large_group.template find_best_match<false, false>(1)), 1U);
EXPECT_EQ((large_group.template find_best_match<false, false>(999)), 500U);
EXPECT_EQ((large_group.template find_best_match<false, false>(2001)), 0U);
EXPECT_EQ((large_group.template find_best_match<true, true>(2)), 0U);
EXPECT_EQ((large_group.template find_best_match<true, true>(500)), 249U);
EXPECT_EQ((large_group.template find_best_match<true, true>(1000)), 499U);
EXPECT_EQ((large_group.template find_best_match<true, true>(2000)), 999U);
EXPECT_EQ((large_group.template find_best_match<true, true>(2001)), 1000U);
EXPECT_EQ((large_group.template find_best_match<false, true>(1)), 1U);
EXPECT_EQ((large_group.template find_best_match<false, true>(2)), 2U);
EXPECT_EQ((large_group.template find_best_match<false, true>(999)), 500U);
EXPECT_EQ((large_group.template find_best_match<false, true>(2000)), 0U);
EXPECT_EQ((large_group.template find_best_match<false, true>(2001)), 0U);
}
TEST_F(AsofIndexGroupMatchTest, EmptyGroupFindBestMatchReturnsZero) {
AsofIndexGroup<uint32_t> empty_group;
empty_group.sort_and_finalize();
EXPECT_EQ((empty_group.template find_best_match<true, false>(42)), 0U);
EXPECT_EQ((empty_group.template find_best_match<true, true>(42)), 0U);
EXPECT_EQ((empty_group.template find_best_match<false, false>(42)), 0U);
EXPECT_EQ((empty_group.template find_best_match<false, true>(42)), 0U);
}
TEST_F(AsofIndexGroupMatchTest, UInt64FindBestMatch) {
auto group64 = make_group<uint64_t>(
{{10000000001ULL, 1}, {20000000002ULL, 2}, {30000000003ULL, 3}, {40000000004ULL, 4}});
EXPECT_EQ((group64.template find_best_match<true, false>(25000000000ULL)), 2U);
EXPECT_EQ((group64.template find_best_match<true, true>(30000000003ULL)), 2U);
EXPECT_EQ((group64.template find_best_match<false, false>(25000000000ULL)), 3U);
EXPECT_EQ((group64.template find_best_match<false, true>(40000000004ULL)), 0U);
}
class AsofColumnDispatchTest : public ::testing::Test {};
TEST_F(AsofColumnDispatchTest, DateV2Dispatch) {
auto col = make_datev2_column({1, 2, 3});
bool dispatched_to_datev2 = false;
asof_column_dispatch(col.get(), [&](const auto* typed_col) {
if constexpr (std::is_same_v<std::decay_t<decltype(*typed_col)>, ColumnDateV2>) {
dispatched_to_datev2 = true;
}
});
EXPECT_TRUE(dispatched_to_datev2);
}
TEST_F(AsofColumnDispatchTest, DateTimeV2Dispatch) {
auto col = ColumnDateTimeV2::create();
uint64_t v1 = 1, v2 = 2;
col->insert_data(reinterpret_cast<const char*>(&v1), 0);
col->insert_data(reinterpret_cast<const char*>(&v2), 0);
bool dispatched_to_dtv2 = false;
asof_column_dispatch(col.get(), [&](const auto* typed_col) {
if constexpr (std::is_same_v<std::decay_t<decltype(*typed_col)>, ColumnDateTimeV2>) {
dispatched_to_dtv2 = true;
}
});
EXPECT_TRUE(dispatched_to_dtv2);
}
TEST_F(AsofColumnDispatchTest, TimestampTZDispatch) {
auto col = ColumnTimeStampTz::create();
uint64_t v1 = 1, v2 = 2;
col->insert_data(reinterpret_cast<const char*>(&v1), 0);
col->insert_data(reinterpret_cast<const char*>(&v2), 0);
bool dispatched_to_tstz = false;
asof_column_dispatch(col.get(), [&](const auto* typed_col) {
if constexpr (std::is_same_v<std::decay_t<decltype(*typed_col)>, ColumnTimeStampTz>) {
dispatched_to_tstz = true;
}
});
EXPECT_TRUE(dispatched_to_tstz);
}
TEST_F(AsofColumnDispatchTest, FallbackDispatch) {
auto col = make_int32_column({1, 2, 3});
bool dispatched_to_icol = false;
asof_column_dispatch(col.get(), [&](const auto* typed_col) {
if constexpr (std::is_same_v<std::decay_t<decltype(*typed_col)>, IColumn>) {
dispatched_to_icol = true;
}
});
EXPECT_TRUE(dispatched_to_icol);
}
class AsofJoinHelperTest : public ::testing::Test {};
TEST_F(AsofJoinHelperTest, IsAsofJoinRuntime) {
EXPECT_TRUE(is_asof_join(TJoinOp::ASOF_LEFT_INNER_JOIN));
EXPECT_TRUE(is_asof_join(TJoinOp::ASOF_LEFT_OUTER_JOIN));
EXPECT_FALSE(is_asof_join(TJoinOp::INNER_JOIN));
EXPECT_FALSE(is_asof_join(TJoinOp::LEFT_OUTER_JOIN));
}
TEST_F(AsofJoinHelperTest, IsAsofJoinCompileTime) {
static_assert(is_asof_join_op_v<TJoinOp::ASOF_LEFT_INNER_JOIN>);
static_assert(is_asof_join_op_v<TJoinOp::ASOF_LEFT_OUTER_JOIN>);
static_assert(!is_asof_join_op_v<TJoinOp::INNER_JOIN>);
static_assert(!is_asof_join_op_v<TJoinOp::LEFT_OUTER_JOIN>);
static_assert(!is_asof_outer_join_op_v<TJoinOp::ASOF_LEFT_INNER_JOIN>);
static_assert(is_asof_outer_join_op_v<TJoinOp::ASOF_LEFT_OUTER_JOIN>);
static_assert(!is_asof_outer_join_op_v<TJoinOp::INNER_JOIN>);
}
class AsofIndexVariantTest : public ::testing::Test {};
TEST_F(AsofIndexVariantTest, MonostateIsDefault) {
AsofIndexVariant variant;
EXPECT_TRUE(std::holds_alternative<std::monostate>(variant));
}
TEST_F(AsofIndexVariantTest, EmplaceUint32Groups) {
AsofIndexVariant variant;
auto& groups = variant.emplace<std::vector<AsofIndexGroup<uint32_t>>>();
groups.emplace_back();
groups[0].add_row(10, 1);
groups[0].sort_and_finalize();
EXPECT_TRUE(std::holds_alternative<std::vector<AsofIndexGroup<uint32_t>>>(variant));
EXPECT_EQ(std::get<std::vector<AsofIndexGroup<uint32_t>>>(variant)[0].asof_values[0], 10U);
}
TEST_F(AsofIndexVariantTest, EmplaceUint64Groups) {
AsofIndexVariant variant;
auto& groups = variant.emplace<std::vector<AsofIndexGroup<uint64_t>>>();
groups.emplace_back();
groups[0].add_row(99999999999ULL, 1);
groups[0].sort_and_finalize();
EXPECT_TRUE(std::holds_alternative<std::vector<AsofIndexGroup<uint64_t>>>(variant));
EXPECT_EQ(std::get<std::vector<AsofIndexGroup<uint64_t>>>(variant)[0].asof_values[0],
99999999999ULL);
}
} // namespace doris