blob: 650d7eb252d36e3e012373f51a362d15f875f551 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/common/data/columnar/columnar_array.h"
#include <string>
#include <utility>
#include "arrow/api.h"
#include "arrow/array/array_nested.h"
#include "arrow/ipc/json_simple.h"
#include "arrow/util/checked_cast.h"
#include "gtest/gtest.h"
#include "paimon/common/data/internal_map.h"
#include "paimon/common/data/internal_row.h"
#include "paimon/common/utils/date_time_utils.h"
#include "paimon/memory/bytes.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(ColumnarArrayTest, TestSimple) {
auto pool = GetDefaultPool();
{
auto f1 =
arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::boolean()), "[[true, false], [true], [false], [false, true]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/2, 1);
ASSERT_EQ(array.Size(), 1);
ASSERT_EQ(array.GetBoolean(0), true);
std::vector<char> expected_array = {static_cast<char>(1)};
ASSERT_EQ(array.ToBooleanArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int8()),
"[[1, 1, 2], [3], [2], [2]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/5, 1);
ASSERT_EQ(array.GetByte(0), 2);
std::vector<char> expected_array = {static_cast<char>(2)};
ASSERT_EQ(array.ToByteArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int16()),
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 3);
ASSERT_EQ(array.GetShort(0), 1);
ASSERT_EQ(array.GetShort(1), 1);
ASSERT_EQ(array.GetShort(2), 2);
std::vector<int16_t> expected_array = {1, 1, 2};
ASSERT_EQ(array.ToShortArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int32()),
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_EQ(array.GetInt(0), 3);
std::vector<int32_t> expected_array = {3};
ASSERT_EQ(array.ToIntArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int64()),
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_EQ(array.GetLong(0), 2);
std::vector<int64_t> expected_array = {2};
ASSERT_EQ(array.ToLongArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::int64()),
"[[1, 1, 2], [3], [null], null]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_NOK_WITH_MSG(array.ToLongArray(), "is null");
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::float32()), "[[0.0, 1.1, 2.2], [3.3], [4.4], [5.5]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 3);
ASSERT_NEAR(array.GetFloat(1), 1.1, 0.001);
std::vector<float> expected_array = {0.0, 1.1, 2.2};
ASSERT_EQ(array.ToFloatArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::float64()), "[[0.0, 1.1, 2.2], [3.3], [4.4], [5.5]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_NEAR(array.GetDouble(0), 3.3, 0.001);
std::vector<double> expected_array = {3.3};
ASSERT_EQ(array.ToDoubleArray().value(), expected_array);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::utf8()), R"([["abc", "def"], ["efg"], ["hello"], ["hi"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_EQ(array.GetString(0).ToString(), "hi");
ASSERT_EQ(std::string(array.GetStringView(0)), "hi");
}
}
TEST(ColumnarArrayTest, TestComplexAndNestedType) {
auto pool = GetDefaultPool();
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::date32()),
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_EQ(array.GetDate(0), 3);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::decimal128(10, 3)),
R"([["1.234", "1234.000"], ["-9876.543"], ["666.888"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
ASSERT_EQ(array.GetDecimal(0, 10, 3), Decimal(10, 3, 1234));
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::NANO)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(59000, 0));
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::binary()),
R"([["aaa", "bb"], ["ccc"], ["bbb"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
ASSERT_EQ(*array.GetBinary(1), Bytes("bb", pool.get()));
ASSERT_EQ(std::string(array.GetStringView(1)), "bb");
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::list(arrow::struct_({
field("sub1", arrow::int64()),
field("sub2", arrow::int64()),
field("sub3", arrow::int64()),
field("sub4", arrow::int64()),
})),
R"([
[[1, 3, 2, 5],
[2, 2, 1, 3]],
[[3, 2, 1, 3]],
[[4, 1, 0, 2]]
])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
auto result_row = array.GetRow(1, 4);
ASSERT_EQ(result_row->GetLong(0), 2);
ASSERT_EQ(result_row->GetLong(1), 2);
ASSERT_EQ(result_row->GetLong(2), 1);
ASSERT_EQ(result_row->GetLong(3), 3);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::list(arrow::int64())), "[[[1, 2, 3], [4, 5, 6]], []]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto result_array = array.GetArray(0);
auto inner_result_array = array.GetArray(0);
std::vector<int64_t> values = {1, 2, 3};
ASSERT_EQ(inner_result_array->ToLongArray().value(), values);
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::map(arrow::int32(), arrow::int64())),
R"([
[[[1, 3], [4, 4]]], []
])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
ASSERT_TRUE(list_array);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto result_key = array.GetMap(0)->KeyArray();
auto result_value = array.GetMap(0)->ValueArray();
ASSERT_EQ(result_key->ToIntArray().value(), std::vector<int32_t>({1, 4}));
ASSERT_EQ(result_value->ToLongArray().value(), std::vector<int64_t>({3, 4}));
}
}
TEST(ColumnarArrayTest, TestTimestampType) {
auto pool = GetDefaultPool();
auto timezone = DateTimeUtils::GetLocalTimezoneName();
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::SECOND)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 0);
ASSERT_EQ(ts, Timestamp(951866603000, 0)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::MILLI)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 3);
ASSERT_EQ(ts, Timestamp(951866603001, 0)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::MICRO)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 6);
ASSERT_EQ(ts, Timestamp(951866603001, 1000)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::NANO)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001001001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(951866603001, 1001)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::SECOND, timezone)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 0);
ASSERT_EQ(ts, Timestamp(951866603000, 0)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::MILLI, timezone)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 3);
ASSERT_EQ(ts, Timestamp(951866603001, 0)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::MICRO, timezone)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 6);
ASSERT_EQ(ts, Timestamp(951866603001, 1000)) << ts.GetMillisecond();
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::timestamp(arrow::TimeUnit::NANO, timezone)),
R"([["1970-01-01T00:00:59"],["2000-02-29T23:23:23.001001001",
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(951866603001, 1001)) << ts.GetMillisecond();
}
}
} // namespace paimon::test