blob: fc5ca42a2db12f54d8f454f4a70dc15defc34855 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/data/binary_row_writer.h"
#include <string>
#include <string_view>
#include <variant>
#include "arrow/type_fwd.h"
#include "gtest/gtest.h"
#include "paimon/common/data/binary_array.h"
#include "paimon/common/data/binary_map.h"
#include "paimon/common/data/binary_string.h"
#include "paimon/common/data/internal_row.h"
#include "paimon/common/utils/date_time_utils.h"
#include "paimon/data/decimal.h"
#include "paimon/data/timestamp.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/status.h"
#include "paimon/testing/utils/binary_row_generator.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(BinaryRowWriterTest, TestFieldSetter) {
int32_t arity = 24;
BinaryRow row(arity);
BinaryRowWriter writer(&row, /*initial_size=*/0, GetDefaultPool().get());
writer.Reset();
auto pool = GetDefaultPool();
// set process
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter0,
BinaryRowWriter::CreateFieldSetter(0, arrow::boolean()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter1,
BinaryRowWriter::CreateFieldSetter(1, arrow::int8()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter2,
BinaryRowWriter::CreateFieldSetter(2, arrow::int16()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter3,
BinaryRowWriter::CreateFieldSetter(3, arrow::int32()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter4,
BinaryRowWriter::CreateFieldSetter(4, arrow::int64()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter5,
BinaryRowWriter::CreateFieldSetter(5, arrow::float32()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter6,
BinaryRowWriter::CreateFieldSetter(6, arrow::float64()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter7,
BinaryRowWriter::CreateFieldSetter(7, arrow::utf8()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter8,
BinaryRowWriter::CreateFieldSetter(8, arrow::binary()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter9,
BinaryRowWriter::CreateFieldSetter(9, arrow::date32()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter10,
BinaryRowWriter::CreateFieldSetter(10, arrow::decimal128(5, 2)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter11,
BinaryRowWriter::CreateFieldSetter(11, arrow::timestamp(arrow::TimeUnit::NANO)));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter12,
BinaryRowWriter::CreateFieldSetter(12, arrow::utf8()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter13,
BinaryRowWriter::CreateFieldSetter(13, arrow::binary()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter14,
BinaryRowWriter::CreateFieldSetter(14, arrow::decimal128(37, 2)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter15,
BinaryRowWriter::CreateFieldSetter(15, arrow::timestamp(arrow::TimeUnit::NANO)));
// for timestamp type
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter16,
BinaryRowWriter::CreateFieldSetter(16, arrow::timestamp(arrow::TimeUnit::SECOND)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter17,
BinaryRowWriter::CreateFieldSetter(17, arrow::timestamp(arrow::TimeUnit::MILLI)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter18,
BinaryRowWriter::CreateFieldSetter(18, arrow::timestamp(arrow::TimeUnit::MICRO)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter19,
BinaryRowWriter::CreateFieldSetter(19, arrow::timestamp(arrow::TimeUnit::NANO)));
auto timezone = DateTimeUtils::GetLocalTimezoneName();
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter20,
BinaryRowWriter::CreateFieldSetter(
20, arrow::timestamp(arrow::TimeUnit::SECOND, timezone)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter21,
BinaryRowWriter::CreateFieldSetter(21, arrow::timestamp(arrow::TimeUnit::MILLI, timezone)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter22,
BinaryRowWriter::CreateFieldSetter(22, arrow::timestamp(arrow::TimeUnit::MICRO, timezone)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter23,
BinaryRowWriter::CreateFieldSetter(23, arrow::timestamp(arrow::TimeUnit::NANO, timezone)));
setter0(true, &writer);
setter1(static_cast<char>(1), &writer);
setter2(static_cast<int16_t>(2), &writer);
setter3(static_cast<int32_t>(3), &writer);
setter4(static_cast<int64_t>(4), &writer);
setter5(static_cast<float>(5.5), &writer);
setter6(6.66, &writer);
std::string data = "abc";
setter7(std::string_view(data.data(), data.size()), &writer);
setter8(std::string_view(data.data(), data.size()), &writer);
setter9(9, &writer);
setter10(Decimal(5, 2, 123), &writer);
setter11(Timestamp(11, 12), &writer);
setter12(BinaryString::FromString("hello", pool.get()), &writer);
auto bytes = std::make_shared<Bytes>("world", pool.get());
setter13(bytes, &writer);
setter14(NullType(), &writer);
setter15(NullType(), &writer);
setter16(Timestamp(1758030901000l, 0), &writer);
setter17(Timestamp(1758030901001l, 0), &writer);
setter18(NullType(), &writer);
setter19(Timestamp(1758030901001l, 1001), &writer);
setter20(NullType(), &writer);
setter21(Timestamp(1758030902002l, 0), &writer);
setter22(Timestamp(1758030902002l, 2000), &writer);
setter23(Timestamp(1758030902002l, 2002), &writer);
writer.Complete();
// get process
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter0,
InternalRow::CreateFieldGetter(0, arrow::boolean(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter1,
InternalRow::CreateFieldGetter(1, arrow::int8(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter2,
InternalRow::CreateFieldGetter(2, arrow::int16(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter3,
InternalRow::CreateFieldGetter(3, arrow::int32(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter4,
InternalRow::CreateFieldGetter(4, arrow::int64(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter5,
InternalRow::CreateFieldGetter(5, arrow::float32(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter6,
InternalRow::CreateFieldGetter(6, arrow::float64(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter7,
InternalRow::CreateFieldGetter(7, arrow::utf8(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter8,
InternalRow::CreateFieldGetter(8, arrow::binary(),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter9,
InternalRow::CreateFieldGetter(9, arrow::date32(), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter10,
InternalRow::CreateFieldGetter(10, arrow::decimal128(5, 2), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter11,
InternalRow::CreateFieldGetter(11, arrow::timestamp(arrow::TimeUnit::NANO),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter12,
InternalRow::CreateFieldGetter(12, arrow::utf8(), /*use_view=*/false));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter13,
InternalRow::CreateFieldGetter(13, arrow::binary(),
/*use_view=*/false));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter14,
InternalRow::CreateFieldGetter(14, arrow::decimal128(37, 2), /*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter15,
InternalRow::CreateFieldGetter(15, arrow::timestamp(arrow::TimeUnit::NANO),
/*use_view=*/true));
// for timestamp type
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter16,
InternalRow::CreateFieldGetter(16, arrow::timestamp(arrow::TimeUnit::SECOND),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter17,
InternalRow::CreateFieldGetter(17, arrow::timestamp(arrow::TimeUnit::MILLI),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter18,
InternalRow::CreateFieldGetter(18, arrow::timestamp(arrow::TimeUnit::MICRO),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter19,
InternalRow::CreateFieldGetter(19, arrow::timestamp(arrow::TimeUnit::NANO),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter20,
InternalRow::CreateFieldGetter(20, arrow::timestamp(arrow::TimeUnit::SECOND),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter21,
InternalRow::CreateFieldGetter(21, arrow::timestamp(arrow::TimeUnit::MILLI),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(
InternalRow::FieldGetterFunc getter22,
InternalRow::CreateFieldGetter(22, arrow::timestamp(arrow::TimeUnit::MICRO),
/*use_view=*/true));
ASSERT_OK_AND_ASSIGN(InternalRow::FieldGetterFunc getter23,
InternalRow::CreateFieldGetter(23, arrow::timestamp(arrow::TimeUnit::NANO),
/*use_view=*/true));
ASSERT_EQ(DataDefine::GetVariantValue<bool>(getter0(row)), true);
ASSERT_EQ(DataDefine::GetVariantValue<char>(getter1(row)), static_cast<char>(1));
ASSERT_EQ(DataDefine::GetVariantValue<int16_t>(getter2(row)), static_cast<int16_t>(2));
ASSERT_EQ(DataDefine::GetVariantValue<int32_t>(getter3(row)), static_cast<int32_t>(3));
ASSERT_EQ(DataDefine::GetVariantValue<int64_t>(getter4(row)), static_cast<int64_t>(4));
ASSERT_EQ(DataDefine::GetVariantValue<float>(getter5(row)), static_cast<float>(5.5));
ASSERT_EQ(DataDefine::GetVariantValue<double>(getter6(row)), static_cast<double>(6.66));
ASSERT_EQ(DataDefine::GetVariantValue<std::string_view>(getter7(row)), std::string_view(data));
ASSERT_EQ(DataDefine::GetVariantValue<std::string_view>(getter8(row)), std::string_view(data));
ASSERT_EQ(DataDefine::GetVariantValue<int32_t>(getter9(row)), 9);
ASSERT_EQ(DataDefine::GetVariantValue<Decimal>(getter10(row)), Decimal(5, 2, 123));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter11(row)), Timestamp(11, 12));
ASSERT_EQ(DataDefine::GetVariantValue<BinaryString>(getter12(row)).ToString(), "hello");
ASSERT_EQ(*DataDefine::GetVariantValue<std::shared_ptr<Bytes>>(getter13(row)),
Bytes("world", pool.get()));
ASSERT_TRUE(DataDefine::IsVariantNull(getter14(row)));
ASSERT_TRUE(DataDefine::IsVariantNull(getter15(row)));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter16(row)), Timestamp(1758030901000l, 0));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter17(row)), Timestamp(1758030901001l, 0));
ASSERT_TRUE(DataDefine::IsVariantNull(getter18(row)));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter19(row)),
Timestamp(1758030901001l, 1001));
ASSERT_TRUE(DataDefine::IsVariantNull(getter20(row)));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter21(row)), Timestamp(1758030902002l, 0));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter22(row)),
Timestamp(1758030902002l, 2000));
ASSERT_EQ(DataDefine::GetVariantValue<Timestamp>(getter23(row)),
Timestamp(1758030902002l, 2002));
}
TEST(BinaryRowWriterTest, TestFieldSetterWithNull) {
int32_t arity = 5;
BinaryRow row(arity);
BinaryRowWriter writer(&row, /*initial_size=*/0, GetDefaultPool().get());
writer.Reset();
auto timezone = DateTimeUtils::GetLocalTimezoneName();
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter0,
BinaryRowWriter::CreateFieldSetter(0, arrow::boolean()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter1,
BinaryRowWriter::CreateFieldSetter(1, arrow::int8()));
ASSERT_OK_AND_ASSIGN(BinaryRowWriter::FieldSetterFunc setter2,
BinaryRowWriter::CreateFieldSetter(2, arrow::decimal128(5, 2)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter3,
BinaryRowWriter::CreateFieldSetter(3, arrow::timestamp(arrow::TimeUnit::SECOND)));
ASSERT_OK_AND_ASSIGN(
BinaryRowWriter::FieldSetterFunc setter4,
BinaryRowWriter::CreateFieldSetter(4, arrow::timestamp(arrow::TimeUnit::NANO, timezone)));
setter0(true, &writer);
setter1(NullType(), &writer);
setter2(NullType(), &writer);
setter3(NullType(), &writer);
setter4(NullType(), &writer);
writer.Complete();
ASSERT_EQ(row.GetBoolean(0), true);
ASSERT_TRUE(row.IsNullAt(1));
ASSERT_TRUE(row.IsNullAt(2));
ASSERT_TRUE(row.IsNullAt(3));
ASSERT_TRUE(row.IsNullAt(4));
}
TEST(BinaryRowWriterTest, TestWriteNested) {
auto pool = GetDefaultPool();
BinaryRow inner_row =
BinaryRowGenerator::GenerateRow({std::string("Alice"), 30, 12.1, NullType()}, pool.get());
BinaryArray inner_array = BinaryArray::FromIntArray({10, 20, 30}, pool.get());
auto key = BinaryArray::FromIntArray({1, 2, 3, 5}, pool.get());
auto value = BinaryArray::FromLongArray({100ll, 200ll, 300ll, 500ll}, pool.get());
auto inner_map = BinaryMap::ValueOf(key, value, pool.get());
BinaryRow row(3);
BinaryRowWriter writer(&row, /*initial_size=*/1024, pool.get());
writer.WriteRow(0, inner_row);
writer.WriteArray(1, inner_array);
writer.WriteMap(2, *inner_map);
writer.Complete();
ASSERT_EQ(3, row.GetFieldCount());
ASSERT_FALSE(row.IsNullAt(0));
auto de_row = row.GetRow(0, 4);
ASSERT_EQ(std::dynamic_pointer_cast<BinaryRow>(de_row)->HashCode(), inner_row.HashCode());
ASSERT_EQ(4, de_row->GetFieldCount());
ASSERT_EQ(de_row->GetString(0).ToString(), "Alice");
ASSERT_EQ(de_row->GetInt(1), 30);
ASSERT_EQ(de_row->GetDouble(2), 12.1);
ASSERT_TRUE(de_row->IsNullAt(3));
auto de_array = row.GetArray(1);
ASSERT_EQ(std::dynamic_pointer_cast<BinaryArray>(de_array)->HashCode(), inner_array.HashCode());
ASSERT_EQ(de_array->ToIntArray().value(), std::vector<int32_t>({10, 20, 30}));
auto de_map = row.GetMap(2);
ASSERT_EQ(std::dynamic_pointer_cast<BinaryArray>(de_map->KeyArray())->HashCode(),
key.HashCode());
ASSERT_EQ(std::dynamic_pointer_cast<BinaryArray>(de_map->ValueArray())->HashCode(),
value.HashCode());
ASSERT_EQ(de_map->KeyArray()->ToIntArray().value(), std::vector<int32_t>({1, 2, 3, 5}));
ASSERT_EQ(de_map->ValueArray()->ToLongArray().value(),
std::vector<int64_t>({100ll, 200ll, 300ll, 500ll}));
}
} // namespace paimon::test