blob: b0dcdcff282ee1c6a77c66db941bc1909b117d97 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/utils/projected_array.h"
#include <algorithm>
#include <optional>
#include <string>
#include "gtest/gtest.h"
#include "paimon/common/data/binary_array.h"
#include "paimon/common/data/binary_array_writer.h"
#include "paimon/common/data/binary_map.h"
#include "paimon/common/data/binary_row.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/testing/utils/binary_row_generator.h"
namespace paimon::test {
TEST(ProjectedArrayTest, TestSimple) {
auto pool = GetDefaultPool();
{
std::vector<bool> arr = {true, false};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(bool), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteBoolean(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetBoolean(1), true);
ASSERT_EQ(projected_array.GetBoolean(0), false);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
{
std::vector<int8_t> arr = {1, 2, 3, 4, 5};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(int8_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteByte(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {4, 3, 2, 1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetByte(0), 5);
ASSERT_EQ(projected_array.GetByte(1), 4);
ASSERT_EQ(projected_array.GetByte(4), 1);
ASSERT_TRUE(projected_array.IsNullAt(5));
}
{
std::vector<int16_t> arr = {1, 2, 3, 4, 5};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(int16_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteShort(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {4, 3, 2, 1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetShort(0), 5);
ASSERT_EQ(projected_array.GetShort(1), 4);
ASSERT_EQ(projected_array.GetShort(4), 1);
ASSERT_TRUE(projected_array.IsNullAt(5));
}
{
std::vector<int32_t> arr = {1, 2, 3, 4, 5};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteInt(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {4, 3, 2, 1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetInt(0), 5);
ASSERT_EQ(projected_array.GetInt(1), 4);
ASSERT_EQ(projected_array.GetInt(4), 1);
ASSERT_TRUE(projected_array.IsNullAt(5));
}
{
// test date
std::vector<int32_t> arr = {10000, 20006};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteInt(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetDate(0), 20006);
ASSERT_EQ(projected_array.GetDate(1), 10000);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
{
std::vector<float> arr = {1.0, 2.0};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(float), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteFloat(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetFloat(0), 2.0);
ASSERT_EQ(projected_array.GetFloat(1), 1.0);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
{
std::vector<double> arr = {1.0, 2.0};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), sizeof(double), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDouble(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetDouble(0), 2.0);
ASSERT_EQ(projected_array.GetDouble(1), 1.0);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
// decimal
{
// not compact (precision <= 18)
std::vector<Decimal> arr = {Decimal(6, 2, 123456), Decimal(6, 3, 123456)};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDecimal(i, arr[i], 6);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetDecimal(0, 6, 3), Decimal(6, 3, 123456));
ASSERT_EQ(projected_array.GetDecimal(1, 6, 2), Decimal(6, 2, 123456));
ASSERT_TRUE(projected_array.IsNullAt(2));
}
{
// compact (precision > 18)
std::vector<Decimal> arr = {Decimal(/*precision=*/20, /*scale=*/3, 123456),
Decimal(/*precision=*/20, /*scale=*/3, 123456789)};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDecimal(i, arr[i], /*precision=*/20);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetDecimal(0, 20, 3), Decimal(20, 3, 123456789));
ASSERT_EQ(projected_array.GetDecimal(1, 20, 3), Decimal(20, 3, 123456));
ASSERT_TRUE(projected_array.IsNullAt(2));
}
// timestamp
{
std::vector<Timestamp> arr = {Timestamp(0, 0), Timestamp(12345, 1)};
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteTimestamp(i, arr[i], 9);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetTimestamp(0, 9), Timestamp(12345, 1));
ASSERT_EQ(projected_array.GetTimestamp(1, 9), Timestamp(0, 0));
ASSERT_TRUE(projected_array.IsNullAt(2));
}
// binary
{
std::vector<Bytes> arr;
arr.emplace_back("hello", pool.get());
arr.emplace_back("world", pool.get());
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteBinary(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(*projected_array.GetBinary(0), arr[1]);
ASSERT_EQ(*projected_array.GetBinary(1), arr[0]);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
// string
{
std::vector<BinaryString> arr;
arr.push_back(BinaryString::FromString("hello", pool.get()));
arr.push_back(BinaryString::FromString("world", pool.get()));
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteString(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetString(0), arr[1]);
ASSERT_EQ(projected_array.GetString(1), arr[0]);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
// array
{
// element1
std::vector<int32_t> arr1 = {1, 2};
auto array1 = std::make_shared<BinaryArray>();
BinaryArrayWriter writer1 =
BinaryArrayWriter(array1.get(), arr1.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr1.size(); i++) {
writer1.WriteInt(i, arr1[i]);
}
writer1.Complete();
// element2
std::vector<int32_t> arr2 = {100, 200};
auto array2 = std::make_shared<BinaryArray>();
BinaryArrayWriter writer2 =
BinaryArrayWriter(array2.get(), arr2.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr2.size(); i++) {
writer2.WriteInt(i, arr2[i]);
}
writer2.Complete();
// array
std::vector<std::shared_ptr<BinaryArray>> arr;
arr.push_back(array1);
arr.push_back(array2);
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer = BinaryArrayWriter(array.get(), arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteArray(i, *arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
auto ret0 = std::dynamic_pointer_cast<BinaryArray>(projected_array.GetArray(0));
auto ret1 = std::dynamic_pointer_cast<BinaryArray>(projected_array.GetArray(1));
ASSERT_TRUE(ret0);
ASSERT_TRUE(ret1);
ASSERT_EQ(*ret0, *arr[1]);
ASSERT_EQ(*ret1, *arr[0]);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
}
TEST(ProjectedArrayTest, TestGetStringView) {
auto pool = GetDefaultPool();
std::vector<BinaryString> arr;
arr.push_back(BinaryString::FromString("hello", pool.get()));
arr.push_back(BinaryString::FromString("world", pool.get()));
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), arr.size(), /*element_size=*/8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteString(i, arr[i]);
}
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
ASSERT_EQ(projected_array.GetStringView(0), "world");
ASSERT_EQ(projected_array.GetStringView(1), "hello");
ASSERT_TRUE(projected_array.IsNullAt(2));
}
TEST(ProjectedArrayTest, TestGetMap) {
auto pool = GetDefaultPool();
auto key = BinaryArray::FromIntArray({1, 2, 3}, pool.get());
auto value = BinaryArray::FromLongArray({100ll, 200ll, 300ll}, pool.get());
auto map1 = BinaryMap::ValueOf(key, value, pool.get());
auto key2 = BinaryArray::FromIntArray({10, 20}, pool.get());
auto value2 = BinaryArray::FromLongArray({1000ll, 2000ll}, pool.get());
auto map2 = BinaryMap::ValueOf(key2, value2, pool.get());
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), /*num_elements=*/2, /*element_size=*/8, pool.get());
writer.WriteMap(0, *map1);
writer.WriteMap(1, *map2);
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
// mapping[0] -> original index 1 -> map2
auto result0 = projected_array.GetMap(0);
ASSERT_EQ(result0->Size(), 2);
ASSERT_EQ(result0->KeyArray()->ToIntArray().value(), (std::vector<int32_t>{10, 20}));
// mapping[1] -> original index 0 -> map1
auto result1 = projected_array.GetMap(1);
ASSERT_EQ(result1->Size(), 3);
ASSERT_EQ(result1->KeyArray()->ToIntArray().value(), (std::vector<int32_t>{1, 2, 3}));
ASSERT_TRUE(projected_array.IsNullAt(2));
}
TEST(ProjectedArrayTest, TestGetRow) {
auto pool = GetDefaultPool();
BinaryRow row1 = BinaryRowGenerator::GenerateRow({std::string("Alice"), 30}, pool.get());
BinaryRow row2 = BinaryRowGenerator::GenerateRow({std::string("Bob"), 40}, pool.get());
auto array = std::make_shared<BinaryArray>();
BinaryArrayWriter writer =
BinaryArrayWriter(array.get(), /*num_elements=*/2, /*element_size=*/8, pool.get());
writer.WriteRow(0, row1);
writer.WriteRow(1, row2);
writer.Complete();
std::vector<int32_t> mapping = {1, 0, -1};
ProjectedArray projected_array(array, mapping);
// mapping[0] -> original index 1 -> row2
auto result0 = std::dynamic_pointer_cast<BinaryRow>(projected_array.GetRow(0, 2));
ASSERT_TRUE(result0);
ASSERT_EQ(*result0, row2);
// mapping[1] -> original index 0 -> row1
auto result1 = std::dynamic_pointer_cast<BinaryRow>(projected_array.GetRow(1, 2));
ASSERT_TRUE(result1);
ASSERT_EQ(*result1, row1);
ASSERT_TRUE(projected_array.IsNullAt(2));
}
} // namespace paimon::test