blob: 47b7f54bec3910600913eee05274d68d24b31ce7 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/reader/data_evolution_row.h"
#include <cstdint>
#include <memory>
#include <string>
#include <variant>
#include <vector>
#include "gtest/gtest.h"
#include "paimon/common/data/binary_array_writer.h"
#include "paimon/common/data/binary_map.h"
#include "paimon/common/data/binary_row_writer.h"
#include "paimon/common/data/data_define.h"
#include "paimon/common/types/row_kind.h"
#include "paimon/data/decimal.h"
#include "paimon/data/timestamp.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/status.h"
#include "paimon/testing/utils/binary_row_generator.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(DataEvolutionRowTest, TestSimple) {
// f0:int32
// f1:int32
// f2:string
// f3:int32
// f4:string
// f5:int32
std::vector<int32_t> row_offsets = {0, 2, 0, 1, 2, 1};
std::vector<int32_t> field_offsets = {0, 0, 1, 1, 1, 0};
auto pool = GetDefaultPool();
BinaryRow row1 = BinaryRowGenerator::GenerateRow({0, std::string("00")}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({10, 110}, pool.get());
BinaryRow row3 = BinaryRowGenerator::GenerateRow({20, std::string("20")}, pool.get());
DataEvolutionRow row({row1, row2, row3}, row_offsets, field_offsets);
ASSERT_EQ(row.GetFieldCount(), 6);
ASSERT_FALSE(row.IsNullAt(0));
ASSERT_EQ(row.GetInt(0), 0);
ASSERT_EQ(row.GetInt(1), 20);
ASSERT_EQ(row.GetString(2).ToString(), "00");
ASSERT_EQ(row.GetInt(3), 110);
ASSERT_EQ(row.GetString(4).ToString(), "20");
ASSERT_EQ(row.GetInt(5), 10);
// test set and get row kind
ASSERT_OK_AND_ASSIGN(const RowKind* row_kind, row.GetRowKind());
ASSERT_EQ(*row_kind, *RowKind::Insert());
row.SetRowKind(RowKind::UpdateAfter());
ASSERT_OK_AND_ASSIGN(const RowKind* new_row_kind, row.GetRowKind());
ASSERT_EQ(*new_row_kind, *RowKind::UpdateAfter());
ASSERT_EQ(row.ToString(), "DataEvolutionRow");
}
TEST(DataEvolutionRowTest, TestNull) {
// f0:int32
// f1:int32
// f2:string
// f3:int32
// f4:string
// f5:int32
// f6:non-exist
std::vector<int32_t> row_offsets = {0, 2, 0, 1, 2, 1, -1, -2};
std::vector<int32_t> field_offsets = {0, 0, 1, 1, 1, 0, -1, -1};
auto pool = GetDefaultPool();
BinaryRow row1 = BinaryRowGenerator::GenerateRow({0, std::string("00")}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({10, 110}, pool.get());
BinaryRow row3 = BinaryRowGenerator::GenerateRow({20, NullType()}, pool.get());
DataEvolutionRow row({row1, row2, row3}, row_offsets, field_offsets);
ASSERT_EQ(row.GetFieldCount(), 8);
ASSERT_FALSE(row.IsNullAt(0));
ASSERT_EQ(row.GetInt(0), 0);
ASSERT_EQ(row.GetInt(1), 20);
ASSERT_EQ(row.GetString(2).ToString(), "00");
ASSERT_EQ(row.GetInt(3), 110);
ASSERT_TRUE(row.IsNullAt(4));
ASSERT_EQ(row.GetInt(5), 10);
ASSERT_TRUE(row.IsNullAt(6));
ASSERT_TRUE(row.IsNullAt(7));
}
TEST(DataEvolutionRowTest, TestAllFieldTypes) {
auto pool = GetDefaultPool();
// Row0: boolean(true), byte(42), short(1024), int(100000), date(19000)
BinaryRow row0 =
BinaryRowGenerator::GenerateRow({true, static_cast<int8_t>(42), static_cast<int16_t>(1024),
static_cast<int32_t>(100000), static_cast<int32_t>(19000)},
pool.get());
// Row1: long(-987654321), float(3.14f), double(-9.87654321)
BinaryRow row1 = BinaryRowGenerator::GenerateRow(
{static_cast<int64_t>(-987654321LL), static_cast<float>(3.14f),
static_cast<double>(-9.87654321)},
pool.get());
// Row2: string("hello"), string("world")
BinaryRow row2 =
BinaryRowGenerator::GenerateRow({std::string("hello"), std::string("world")}, pool.get());
// Row3: decimal(10,2,67890), timestamp(1000,500) with precision 9
BinaryRow row3 = BinaryRowGenerator::GenerateRow(
{Decimal(10, 2, 67890), TimestampType(Timestamp(1000, 500), 9)}, pool.get());
// Row4: binary("fghij")
auto bytes_val = std::make_shared<Bytes>("fghij", pool.get());
BinaryRow row4 = BinaryRowGenerator::GenerateRow({bytes_val}, pool.get());
// Row5: nested array [10,20], nested map {1:100, 2:200}, nested row ("Alice", 30)
// Built manually via BinaryRowWriter since BinaryRowGenerator doesn't support nested types
BinaryRow row5(3);
{
BinaryRowWriter writer(&row5, 0, pool.get());
// field 0: array [10, 20]
auto inner_array = BinaryArray::FromIntArray({10, 20}, pool.get());
writer.WriteArray(0, inner_array);
// field 1: map {1->100, 2->200}
auto map_key = BinaryArray::FromIntArray({1, 2}, pool.get());
auto map_val = BinaryArray::FromLongArray({100LL, 200LL}, pool.get());
auto map_obj = BinaryMap::ValueOf(map_key, map_val, pool.get());
writer.WriteMap(1, *map_obj);
// field 2: row ("Alice", 30)
BinaryRow inner_row = BinaryRowGenerator::GenerateRow(
{std::string("Alice"), static_cast<int32_t>(30)}, pool.get());
writer.WriteRow(2, inner_row);
writer.Complete();
}
// Build the DataEvolutionRow that picks specific fields from each row.
// evolution field -> (row_index, field_index_within_row)
// pos 0: boolean -> row0[0] = true
// pos 1: byte -> row0[1] = 42
// pos 2: short -> row0[2] = 1024
// pos 3: int -> row0[3] = 100000
// pos 4: date -> row0[4] = 19000
// pos 5: long -> row1[0] = -987654321
// pos 6: float -> row1[1] = 3.14f
// pos 7: double -> row1[2] = -9.87654321
// pos 8: string -> row2[0] = "hello"
// pos 9: stringview-> row2[1] = "world"
// pos 10: decimal -> row3[0] = Decimal(10,2,67890)
// pos 11: timestamp -> row3[1] = Timestamp(1000,500)
// pos 12: binary -> row4[0] = "fghij"
// pos 13: array -> row5[0] = [10,20]
// pos 14: map -> row5[1] = {1:100, 2:200}
// pos 15: row -> row5[2] = ("Alice", 30)
std::vector<int32_t> row_offsets = {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5};
std::vector<int32_t> field_offsets = {0, 1, 2, 3, 4, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1, 2};
DataEvolutionRow evolution({row0, row1, row2, row3, row4, row5}, row_offsets, field_offsets);
ASSERT_EQ(evolution.GetFieldCount(), 16);
// Boolean
ASSERT_FALSE(evolution.IsNullAt(0));
ASSERT_EQ(evolution.GetBoolean(0), true);
// Byte
ASSERT_EQ(evolution.GetByte(1), 42);
// Short
ASSERT_EQ(evolution.GetShort(2), 1024);
// Int
ASSERT_EQ(evolution.GetInt(3), 100000);
// Date
ASSERT_EQ(evolution.GetDate(4), 19000);
// Long
ASSERT_EQ(evolution.GetLong(5), -987654321LL);
// Float
ASSERT_FLOAT_EQ(evolution.GetFloat(6), 3.14f);
// Double
ASSERT_DOUBLE_EQ(evolution.GetDouble(7), -9.87654321);
// String
ASSERT_EQ(evolution.GetString(8).ToString(), "hello");
// StringView
ASSERT_EQ(evolution.GetStringView(9), "world");
// Decimal
ASSERT_EQ(evolution.GetDecimal(10, 10, 2), Decimal(10, 2, 67890));
// Timestamp
ASSERT_EQ(evolution.GetTimestamp(11, 9), Timestamp(1000, 500));
// Binary
auto retrieved_binary = evolution.GetBinary(12);
Bytes expected_binary("fghij", pool.get());
ASSERT_EQ(*retrieved_binary, expected_binary);
// Nested Array
auto retrieved_array = evolution.GetArray(13);
ASSERT_OK_AND_ASSIGN(auto inner_values, retrieved_array->ToIntArray());
ASSERT_EQ(inner_values, std::vector<int32_t>({10, 20}));
// Map
auto retrieved_map = evolution.GetMap(14);
ASSERT_EQ(retrieved_map->Size(), 2);
// Nested Row
auto retrieved_row = evolution.GetRow(15, 2);
ASSERT_EQ(retrieved_row->GetString(0).ToString(), "Alice");
ASSERT_EQ(retrieved_row->GetInt(1), 30);
// ToString
ASSERT_EQ(evolution.ToString(), "DataEvolutionRow");
}
} // namespace paimon::test