blob: b0078552c45faeefed963ed922f39e4ac8c424eb [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/utils/data_converter_utils.h"
#include <cstddef>
#include <memory>
#include <vector>
#include "arrow/type_fwd.h"
#include "gtest/gtest.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithLegacyPartitionName) {
auto pool = GetDefaultPool();
std::vector<std::pair<std::string, arrow::Type::type>> data = {
{"true", arrow::Type::BOOL},
{"10", arrow::Type::INT8},
{"-20", arrow::Type::INT8},
{"1556", arrow::Type::INT16},
{"-2556", arrow::Type::INT16},
{"348489", arrow::Type::INT32},
{"-448489", arrow::Type::INT32},
{"279039", arrow::Type::INT64},
{"1234567", arrow::Type::INT64},
{"abcde", arrow::Type::STRING},
{"这是一个很长很长的中文", arrow::Type::STRING},
{"10440", arrow::Type::DATE32}};
std::vector<DataConverterUtils::StrToBinaryRowConverter> converters;
std::vector<DataConverterUtils::BinaryRowFieldToStrConverter> reconverters;
for (const auto& [value, type] : data) {
ASSERT_OK_AND_ASSIGN(auto converter,
DataConverterUtils::CreateDataToBinaryRowConverter(type, pool.get()));
converters.emplace_back(std::move(converter));
ASSERT_OK_AND_ASSIGN(auto reconverter,
DataConverterUtils::CreateBinaryRowFieldToStringConverter(
type, /*legacy_partition_name_enabled=*/true));
reconverters.emplace_back(reconverter);
}
// test not implement type
ASSERT_NOK(DataConverterUtils::CreateDataToBinaryRowConverter(arrow::Type::LIST, pool.get()));
BinaryRow row(data.size());
BinaryRowWriter writer(&row, 0, pool.get());
for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK(converters[idx](data[idx].first, idx, &writer));
}
// test invalid str
ASSERT_NOK(converters[0]("abc", /*idx=*/0, &writer));
writer.Complete();
ASSERT_EQ(data.size(), row.GetFieldCount());
ASSERT_EQ(true, row.GetBoolean(0));
ASSERT_EQ(10, row.GetByte(1));
ASSERT_EQ(-20, row.GetByte(2));
ASSERT_EQ(1556, row.GetShort(3));
ASSERT_EQ(-2556, row.GetShort(4));
ASSERT_EQ(348489, row.GetInt(5));
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(279039, row.GetLong(7));
ASSERT_EQ(1234567, row.GetLong(8));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(10440, row.GetDate(11));
for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK_AND_ASSIGN(auto partition_field_str, reconverters[idx](row, idx));
ASSERT_EQ(data[idx].first, partition_field_str);
}
}
TEST(DataConverterUtilsTest, TestDataToBinaryRowConverterWithNoLegacyPartitionName) {
auto pool = GetDefaultPool();
std::vector<std::pair<std::string, arrow::Type::type>> data = {
{"true", arrow::Type::BOOL},
{"10", arrow::Type::INT8},
{"-20", arrow::Type::INT8},
{"1556", arrow::Type::INT16},
{"-2556", arrow::Type::INT16},
{"348489", arrow::Type::INT32},
{"-448489", arrow::Type::INT32},
{"279039", arrow::Type::INT64},
{"1234567", arrow::Type::INT64},
{"abcde", arrow::Type::STRING},
{"这是一个很长很长的中文", arrow::Type::STRING},
{"1998-08-02", arrow::Type::DATE32}};
std::vector<DataConverterUtils::StrToBinaryRowConverter> converters;
std::vector<DataConverterUtils::BinaryRowFieldToStrConverter> reconverters;
for (const auto& [value, type] : data) {
ASSERT_OK_AND_ASSIGN(auto converter,
DataConverterUtils::CreateDataToBinaryRowConverter(type, pool.get()));
converters.emplace_back(std::move(converter));
ASSERT_OK_AND_ASSIGN(auto reconverter,
DataConverterUtils::CreateBinaryRowFieldToStringConverter(
type, /*legacy_partition_name_enabled=*/false));
reconverters.emplace_back(reconverter);
}
BinaryRow row(data.size());
BinaryRowWriter writer(&row, 0, pool.get());
for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK(converters[idx](data[idx].first, idx, &writer));
}
writer.Complete();
ASSERT_EQ(data.size(), row.GetFieldCount());
ASSERT_EQ(true, row.GetBoolean(0));
ASSERT_EQ(10, row.GetByte(1));
ASSERT_EQ(-20, row.GetByte(2));
ASSERT_EQ(1556, row.GetShort(3));
ASSERT_EQ(-2556, row.GetShort(4));
ASSERT_EQ(348489, row.GetInt(5));
ASSERT_EQ(-448489, row.GetInt(6));
ASSERT_EQ(279039, row.GetLong(7));
ASSERT_EQ(1234567, row.GetLong(8));
ASSERT_EQ("abcde", row.GetString(9).ToString());
ASSERT_EQ("这是一个很长很长的中文", row.GetString(10).ToString());
ASSERT_EQ(10440, row.GetDate(11));
for (size_t idx = 0; idx < data.size(); idx++) {
ASSERT_OK_AND_ASSIGN(auto partition_field_str, reconverters[idx](row, idx));
ASSERT_EQ(data[idx].first, partition_field_str);
}
}
} // namespace paimon::test