blob: 2f64f813c46060ff73daf673d81c1e4efc9bc522 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/utils/fields_comparator.h"
#include <cstddef>
#include <string>
#include <variant>
#include "arrow/api.h"
#include "gtest/gtest.h"
#include "paimon/common/data/binary_row.h"
#include "paimon/common/data/data_define.h"
#include "paimon/common/types/data_field.h"
#include "paimon/common/utils/date_time_utils.h"
#include "paimon/common/utils/decimal_utils.h"
#include "paimon/data/decimal.h"
#include "paimon/data/timestamp.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/status.h"
#include "paimon/testing/utils/binary_row_generator.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
class FieldsComparatorTest : public ::testing::Test {
public:
void SetUp() override {}
void TearDown() override {}
void CheckResult(const InternalRow& row1, const InternalRow& row2,
const std::vector<std::shared_ptr<arrow::DataType>>& input_types,
const std::vector<int32_t>& sort_fields, bool has_null = false) {
std::vector<DataField> data_fields;
data_fields.reserve(input_types.size());
for (const auto& type : input_types) {
data_fields.emplace_back(/*id=*/0, arrow::field("fake_name", type));
}
ASSERT_OK_AND_ASSIGN(auto comp1, FieldsComparator::Create(data_fields, sort_fields,
/*is_ascending_order=*/true));
ASSERT_EQ(-1, comp1->CompareTo(row1, row2));
ASSERT_EQ(1, comp1->CompareTo(row2, row1));
ASSERT_EQ(0, comp1->CompareTo(row1, row1));
ASSERT_EQ(0, comp1->CompareTo(row2, row2));
if (!has_null) {
ASSERT_OK_AND_ASSIGN(auto comp2,
FieldsComparator::Create(data_fields, sort_fields,
/*is_ascending_order=*/false));
ASSERT_EQ(1, comp2->CompareTo(row1, row2));
ASSERT_EQ(-1, comp2->CompareTo(row2, row1));
ASSERT_EQ(0, comp2->CompareTo(row1, row1));
ASSERT_EQ(0, comp2->CompareTo(row2, row2));
}
}
void CheckResult(const InternalRow& row1, const InternalRow& row2,
const std::vector<std::shared_ptr<arrow::DataType>>& input_types,
bool has_null = false) {
std::vector<int32_t> sort_fields;
for (size_t i = 0; i < input_types.size(); ++i) {
sort_fields.push_back(i);
}
CheckResult(row1, row2, input_types, sort_fields, has_null);
}
};
TEST_F(FieldsComparatorTest, TestSimple) {
auto pool = GetDefaultPool();
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{false, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int8(), arrow::int16(), arrow::int32()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(20), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int8(), arrow::int16(), arrow::int32()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(200000)},
pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int8(), arrow::int16(), arrow::int32()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775800), static_cast<float>(100.1), 123.45},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775801), static_cast<float>(100.1), 123.45},
pool.get());
CheckResult(row1, row2, {arrow::int64(), arrow::float32(), arrow::float64()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775800), static_cast<float>(100.1), 123.45},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775800), static_cast<float>(100.4), 123.45},
pool.get());
CheckResult(row1, row2, {arrow::int64(), arrow::float32(), arrow::float64()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775800), static_cast<float>(100.1), 1.237E+2},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(9223372036854775800), static_cast<float>(100.1), 1.238E+2},
pool.get());
CheckResult(row1, row2, {arrow::int64(), arrow::float32(), arrow::float64()});
}
{
auto bytes = std::make_shared<Bytes>("快乐每一天", pool.get());
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{std::string("abandon"), bytes, TimestampType(Timestamp(1725875365442l, 120000), 9)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{std::string("abandon-"), bytes, TimestampType(Timestamp(1725875365442l, 120000), 9)},
pool.get());
CheckResult(row1, row2,
{arrow::utf8(), arrow::binary(), arrow::timestamp(arrow::TimeUnit::NANO)});
}
{
auto bytes1 = std::make_shared<Bytes>("快乐每一天", pool.get());
auto bytes2 = std::make_shared<Bytes>("快乐每一天!", pool.get());
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{std::string("abandon"), bytes1, TimestampType(Timestamp(1725875365442l, 120000), 9)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{std::string("abandon"), bytes2, TimestampType(Timestamp(1725875365442l, 120000), 9)},
pool.get());
CheckResult(row1, row2,
{arrow::utf8(), arrow::binary(), arrow::timestamp(arrow::TimeUnit::NANO)});
}
{
auto bytes = std::make_shared<Bytes>("快乐每一天", pool.get());
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{std::string("abandon"), bytes, TimestampType(Timestamp(1725875365442l, 120000), 9)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{std::string("abandon"), bytes, TimestampType(Timestamp(1725875365442l, 120100), 9)},
pool.get());
CheckResult(row1, row2,
{arrow::utf8(), arrow::binary(), arrow::timestamp(arrow::TimeUnit::NANO)});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{Decimal(38, 10, DecimalUtils::StrToInt128("12345678998765432145678").value()),
static_cast<int32_t>(10)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{Decimal(38, 10, DecimalUtils::StrToInt128("12345678998765432145679").value()),
static_cast<int32_t>(10)},
pool.get());
CheckResult(row1, row2, {arrow::decimal128(38, 10), arrow::date32()});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{Decimal(38, 10, DecimalUtils::StrToInt128("12345678998765432145678").value()),
static_cast<int32_t>(10)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{Decimal(38, 10, DecimalUtils::StrToInt128("12345678998765432145678").value()),
static_cast<int32_t>(20)},
pool.get());
CheckResult(row1, row2, {arrow::decimal128(38, 10), arrow::date32()});
}
}
TEST_F(FieldsComparatorTest, TestTimestampType) {
auto pool = GetDefaultPool();
// test ts with different precision
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(2000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
CheckResult(
row1, row2,
{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::timestamp(arrow::TimeUnit::MILLI),
arrow::timestamp(arrow::TimeUnit::MICRO)});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1001l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
CheckResult(
row1, row2,
{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::timestamp(arrow::TimeUnit::MILLI),
arrow::timestamp(arrow::TimeUnit::MICRO)});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 2000), 6)},
pool.get());
CheckResult(
row1, row2,
{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::timestamp(arrow::TimeUnit::MILLI),
arrow::timestamp(arrow::TimeUnit::MICRO)});
}
{
auto timezone = DateTimeUtils::GetLocalTimezoneName();
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 1000), 6)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{TimestampType(Timestamp(1000l, 0), 0), TimestampType(Timestamp(1000l, 0), 3),
TimestampType(Timestamp(1000l, 2000), 6)},
pool.get());
CheckResult(row1, row2,
{arrow::timestamp(arrow::TimeUnit::SECOND, timezone),
arrow::timestamp(arrow::TimeUnit::MILLI, timezone),
arrow::timestamp(arrow::TimeUnit::MICRO, timezone)});
}
}
TEST_F(FieldsComparatorTest, TestNull) {
auto pool = GetDefaultPool();
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow({false, NullType()}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({false, 21}, pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int32()}, /*has_null=*/true);
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow({NullType(), false}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({NullType(), true}, pool.get());
CheckResult(row1, row2, {arrow::int32(), arrow::boolean()}, /*has_null=*/true);
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow({true, NullType()}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({false, 21}, pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int32()}, {1, 0}, /*has_null=*/true);
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow({21, false}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({NullType(), true}, pool.get());
CheckResult(row1, row2, {arrow::int32(), arrow::boolean()}, {1, 0}, /*has_null=*/true);
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow({false, NullType()}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({true, NullType()}, pool.get());
CheckResult(row1, row2, {arrow::int32(), arrow::boolean()}, {1, 0}, /*has_null=*/true);
}
}
TEST_F(FieldsComparatorTest, TestWithSortFields) {
auto pool = GetDefaultPool();
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(200),
static_cast<int32_t>(100000)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{false, static_cast<int8_t>(5), static_cast<int16_t>(100),
static_cast<int32_t>(200000)},
pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int8(), arrow::int16(), arrow::int32()},
{3, 2});
}
{
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{true, static_cast<int8_t>(10), static_cast<int16_t>(100),
static_cast<int32_t>(100000)},
pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow(
{false, static_cast<int8_t>(5), static_cast<int16_t>(200),
static_cast<int32_t>(100000)},
pool.get());
CheckResult(row1, row2, {arrow::boolean(), arrow::int8(), arrow::int16(), arrow::int32()},
{3, 2});
}
}
TEST_F(FieldsComparatorTest, TestInvalidType) {
auto map_type = arrow::map(arrow::int8(), arrow::int16());
ASSERT_NOK_WITH_MSG(FieldsComparator::Create({DataField(0, arrow::field("f0", arrow::int32())),
DataField(1, arrow::field("f1", map_type))},
/*is_ascending_order=*/true),
"Do not support comparing map<int8, int16> type in idx 1");
}
} // namespace paimon::test