blob: 2bba3b7f187db3fad2c4ada8cbed05d097b9296e [file] [log] [blame]
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/common/data/binary_array.h"
#include <cstdlib>
#include <optional>
#include <utility>
#include "arrow/api.h"
#include "arrow/array/array_nested.h"
#include "arrow/ipc/json_simple.h"
#include "arrow/util/checked_cast.h"
#include "gtest/gtest.h"
#include "paimon/common/data/binary_array_writer.h"
#include "paimon/common/data/columnar/columnar_array.h"
#include "paimon/common/utils/date_time_utils.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(BinaryArrayTest, TestBinaryArraySimple) {
auto pool = GetDefaultPool();
int64_t seed = DateTimeUtils::GetCurrentUTCTimeUs();
std::srand(seed);
std::vector<int64_t> values(100, 0);
for (auto& value : values) {
value = paimon::test::RandomNumber(0, 10000000);
}
auto binary_array = BinaryArray::FromLongArray(values, pool.get());
ASSERT_EQ(values.size(), binary_array.Size()) << "seed:" << seed;
for (size_t i = 0; i < values.size(); i++) {
ASSERT_EQ(values[i], binary_array.GetLong(i)) << "seed:" << seed << ", idx: " << i;
}
ASSERT_FALSE(binary_array.IsNullAt(40)) << "seed:" << seed;
ASSERT_FALSE(binary_array.IsNullAt(70)) << "seed:" << seed;
auto res = binary_array.ToLongArray();
ASSERT_EQ(res.value(), values);
}
TEST(BinaryArrayTest, TestSetAndGet) {
auto pool = GetDefaultPool();
{
std::vector<bool> arr = {true, false};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(bool), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteBoolean(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(true, array.GetBoolean(0));
ASSERT_EQ(false, array.GetBoolean(1));
ASSERT_OK_AND_ASSIGN(auto res, array.ToBooleanArray());
std::vector<char> char_arr = {1, 0};
ASSERT_EQ(res, char_arr);
}
{
std::vector<int8_t> arr = {1, 2, 3, 4, 5};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size(), sizeof(int8_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteByte(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(1, array.GetByte(0));
ASSERT_EQ(5, array.GetByte(4));
ASSERT_OK_AND_ASSIGN(auto res, array.ToByteArray());
std::vector<char> char_arr = {1, 2, 3, 4, 5};
ASSERT_EQ(res, char_arr);
}
{
std::vector<int16_t> arr = {1, 2, 3, 4, 5};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size(), sizeof(int16_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteShort(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(1, array.GetShort(0));
ASSERT_EQ(5, array.GetShort(4));
ASSERT_OK_AND_ASSIGN(auto res, array.ToShortArray());
ASSERT_EQ(res, arr);
}
{
std::vector<int32_t> arr = {1, 2, 3, 4, 5};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteInt(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(1, array.GetInt(0));
ASSERT_EQ(5, array.GetInt(4));
ASSERT_OK_AND_ASSIGN(auto res, array.ToIntArray());
ASSERT_EQ(res, arr);
}
{
// test date
std::vector<int32_t> arr = {10000, 20006};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteInt(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(10000, array.GetDate(0));
ASSERT_EQ(20006, array.GetDate(1));
ASSERT_OK_AND_ASSIGN(auto res, array.ToIntArray());
ASSERT_EQ(res, arr);
}
{
std::vector<float> arr = {1.0, 2.0};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(float), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteFloat(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(1.0, array.GetFloat(0));
ASSERT_EQ(2.0, array.GetFloat(1));
ASSERT_OK_AND_ASSIGN(auto res, array.ToFloatArray());
ASSERT_EQ(res, arr);
}
{
std::vector<double> arr = {1.0, 2.0};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size(), sizeof(double), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDouble(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(2, writer.GetNumElements());
ASSERT_EQ(1.0, array.GetDouble(0));
ASSERT_EQ(2.0, array.GetDouble(1));
ASSERT_OK_AND_ASSIGN(auto res, array.ToDoubleArray());
ASSERT_EQ(res, arr);
}
// decimal
{
// not compact (precision <= 18)
std::vector<Decimal> arr = {Decimal(6, 2, 123456), Decimal(6, 3, 123456)};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDecimal(i, arr[i], 6);
}
writer.Complete();
ASSERT_EQ(arr[0], array.GetDecimal(0, 6, 2));
ASSERT_EQ(arr[1], array.GetDecimal(1, 6, 3));
}
{
// compact (precision > 18)
std::vector<Decimal> arr = {Decimal(/*precision=*/20, /*scale=*/3, 123456),
Decimal(/*precision=*/20, /*scale=*/3, 123456789)};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteDecimal(i, arr[i], /*precision=*/20);
}
writer.Complete();
ASSERT_EQ(arr[0], array.GetDecimal(0, /*precision=*/20, /*scale=*/3));
ASSERT_EQ(arr[1], array.GetDecimal(1, /*precision=*/20, /*scale=*/3));
}
// timestamp
{
std::vector<Timestamp> arr = {Timestamp(0, 0), Timestamp(12345, 1)};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteTimestamp(i, arr[i], 9);
}
writer.Complete();
ASSERT_EQ(arr[0], array.GetTimestamp(0, 9));
ASSERT_EQ(arr[1], array.GetTimestamp(1, 9));
}
// binary
{
std::vector<Bytes> arr;
arr.emplace_back("hello", pool.get());
arr.emplace_back("world", pool.get());
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteBinary(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(arr[0], *array.GetBinary(0));
ASSERT_EQ(arr[1], *array.GetBinary(1));
}
// array
{
// element1
std::vector<int32_t> arr1 = {1, 2};
BinaryArray array1;
BinaryArrayWriter writer1 =
BinaryArrayWriter(&array1, arr1.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr1.size(); i++) {
writer1.WriteInt(i, arr1[i]);
}
writer1.Complete();
// element2
std::vector<int32_t> arr2 = {100, 200};
BinaryArray array2;
BinaryArrayWriter writer2 =
BinaryArrayWriter(&array2, arr2.size(), sizeof(int32_t), pool.get());
for (size_t i = 0; i < arr2.size(); i++) {
writer2.WriteInt(i, arr2[i]);
}
writer2.Complete();
// array
std::vector<BinaryArray> arr;
arr.push_back(array1);
arr.push_back(array2);
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteArray(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(arr[0].ToIntArray().value(), array.GetArray(0)->ToIntArray().value());
ASSERT_EQ(arr[1].ToIntArray().value(), array.GetArray(1)->ToIntArray().value());
}
}
TEST(BinaryArrayTest, TestCopy) {
auto pool = GetDefaultPool();
std::vector<bool> arr = {true, false};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(bool), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteBoolean(i, arr[i]);
}
writer.Complete();
auto copy_array = array.Copy(pool.get());
ASSERT_EQ(array.ToBooleanArray().value(), copy_array.ToBooleanArray().value());
}
TEST(BinaryArrayTest, TestNullValue) {
auto pool = GetDefaultPool();
std::vector<int64_t> arr = {1, 2, 3, 4, 5};
BinaryArray array;
BinaryArrayWriter writer =
BinaryArrayWriter(&array, arr.size() + 2, sizeof(int64_t), pool.get());
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteLong(i, arr[i]);
}
// last two element is null
writer.SetNullValue<int64_t>(5);
writer.SetNullAt(6);
writer.Complete();
ASSERT_TRUE(array.AnyNull());
auto ret = BinaryArray::FromLongArray(&array, pool.get());
ASSERT_EQ(ret, array);
}
TEST(BinaryArrayTest, TestFromLongArray) {
auto pool = GetDefaultPool();
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int64()),
R"([[123, null], [789], [12345], [12]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
BinaryArray ret = BinaryArray::FromLongArray(&array, pool.get());
BinaryArray expected_array;
BinaryArrayWriter writer = BinaryArrayWriter(&expected_array, 2, sizeof(int64_t), pool.get());
writer.Reset();
writer.WriteLong(0, 123);
writer.SetNullAt(1);
writer.Complete();
ASSERT_EQ(ret, expected_array);
ASSERT_NOK_WITH_MSG(expected_array.ToLongArray(),
"Primitive array must not contain a null value.");
}
TEST(BinaryArrayTest, TestFromIntArray) {
auto pool = GetDefaultPool();
std::vector<int32_t> values = {10, 20, 30, 100};
BinaryArray array = BinaryArray::FromIntArray(values, pool.get());
ASSERT_OK_AND_ASSIGN(std::vector<int32_t> result, array.ToIntArray());
ASSERT_EQ(result, values);
}
TEST(BinaryArrayTest, TestFromAllNullLongArray) {
auto pool = GetDefaultPool();
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int64()),
R"([[null, null], [789], [12345], [12]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
BinaryArray ret = BinaryArray::FromLongArray(&array, pool.get());
BinaryArray expected_array;
BinaryArrayWriter writer = BinaryArrayWriter(&expected_array, 2, sizeof(int64_t), pool.get());
writer.Reset();
writer.SetNullAt(0);
writer.SetNullAt(1);
writer.Complete();
ASSERT_EQ(ret, expected_array);
ASSERT_NOK_WITH_MSG(expected_array.ToLongArray(),
"Primitive array must not contain a null value.");
}
TEST(BinaryArrayTest, TestReset) {
auto pool = GetDefaultPool();
std::vector<int64_t> arr = {1, 2, 3, 4, 5};
BinaryArray array;
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), sizeof(int64_t), pool.get());
writer.Reset();
for (size_t i = 0; i < arr.size(); i++) {
writer.WriteLong(i, arr[i]);
}
writer.Complete();
ASSERT_EQ(arr, array.ToLongArray().value());
}
} // namespace paimon::test