blob: dc40e70f7d3b4b3fd044aa5e71d99a17b378eec8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <vector>
#include "common/status.h"
#include "gtest/gtest_pred_impl.h"
#include "olap/field.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_variant.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_variant.h"
#include "vec/functions/simple_function_factory.h"
namespace doris::vectorized {
static doris::vectorized::Field construct_variant_map(
const std::vector<std::pair<std::string, doris::vectorized::Field>>& key_and_values) {
doris::vectorized::Field res = Field::create_field<TYPE_VARIANT>(VariantMap {});
auto& object = res.get<VariantMap&>();
for (const auto& [k, v] : key_and_values) {
PathInData path(k);
object.try_emplace(path, v);
}
return res;
}
static auto construct_basic_varint_column() {
// 1. create an empty variant column
auto variant = ColumnVariant::create(5);
std::vector<std::pair<std::string, doris::vectorized::Field>> data;
// 2. subcolumn path
data.emplace_back("v.a", Field::create_field<TYPE_INT>(20));
data.emplace_back("v.b", Field::create_field<TYPE_STRING>("20"));
data.emplace_back("v.c", Field::create_field<TYPE_INT>(20));
data.emplace_back("v.f", Field::create_field<TYPE_INT>(20));
data.emplace_back("v.e", Field::create_field<TYPE_STRING>("50"));
for (int i = 0; i < 5; ++i) {
auto field = construct_variant_map(data);
variant->try_insert(field);
}
return variant;
}
TEST(FunctionVariantCast, CastToVariant) {
// Test casting from basic types to variant
{
// Test Int32 to variant
auto int32_type = std::make_shared<DataTypeInt32>();
auto variant_type = std::make_shared<DataTypeVariant>();
auto int32_col = ColumnInt32::create();
int32_col->insert(Field::create_field<TYPE_INT>(42));
int32_col->insert(Field::create_field<TYPE_INT>(100));
int32_col->insert(Field::create_field<TYPE_INT>(-1));
ColumnsWithTypeAndName arguments {{int32_col->get_ptr(), int32_type, "int32_col"},
{nullptr, variant_type, "variant_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, variant_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, variant_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* variant_col = assert_cast<const ColumnVariant*>(result_col.get());
ASSERT_EQ(variant_col->size(), 3);
}
// Test casting from string to variant
{
auto string_type = std::make_shared<DataTypeString>();
auto variant_type = std::make_shared<DataTypeVariant>();
auto string_col = ColumnString::create();
string_col->insert_data("hello", 5);
string_col->insert_data("world", 5);
ColumnsWithTypeAndName arguments {{string_col->get_ptr(), string_type, "string_col"},
{nullptr, variant_type, "variant_type"}};
auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments,
make_nullable(variant_type));
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, variant_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 2).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* variant_col =
assert_cast<const ColumnVariant*>(remove_nullable(result_col).get());
ASSERT_EQ(variant_col->size(), 2);
}
// Test casting from array to variant
{
auto array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
auto variant_type = std::make_shared<DataTypeVariant>();
auto array_col =
ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create());
auto& data = assert_cast<ColumnInt32&>(array_col->get_data());
auto& offsets = array_col->get_offsets();
data.insert(Field::create_field<TYPE_INT>(1));
data.insert(Field::create_field<TYPE_INT>(2));
data.insert(Field::create_field<TYPE_INT>(3));
offsets.push_back(3);
ColumnsWithTypeAndName arguments {{array_col->get_ptr(), array_type, "array_col"},
{nullptr, variant_type, "variant_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, variant_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, variant_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* variant_col =
assert_cast<const ColumnVariant*>(remove_nullable(result_col).get());
ASSERT_EQ(variant_col->size(), 1);
}
}
TEST(FunctionVariantCast, CastFromVariant) {
// Test casting from variant to basic types
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto int32_type = std::make_shared<DataTypeInt32>();
auto variant_col = ColumnVariant::create(0);
// Create a variant column with integer values
variant_col->create_root(int32_type, ColumnInt32::create());
MutableColumnPtr data = variant_col->get_root();
data->insert(Field::create_field<TYPE_INT>(42));
data->insert(Field::create_field<TYPE_INT>(100));
data->insert(Field::create_field<TYPE_INT>(-1));
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, int32_type, "int32_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, int32_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
// always nullable
const auto* int32_result =
assert_cast<const ColumnInt32*>(remove_nullable(result_col).get());
ASSERT_EQ(int32_result->size(), 3);
ASSERT_EQ(int32_result->get_element(0), 42);
ASSERT_EQ(int32_result->get_element(1), 100);
ASSERT_EQ(int32_result->get_element(2), -1);
}
// Test casting from variant to string
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto string_type = std::make_shared<DataTypeString>();
auto variant_col = ColumnVariant::create(0);
// Create a variant column with string values
variant_col->create_root(string_type, ColumnString::create());
MutableColumnPtr data = variant_col->get_root();
data->insert_data("hello", 5);
data->insert_data("world", 5);
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, string_type, "string_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, string_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, string_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 2).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* string_result =
assert_cast<const ColumnString*>(remove_nullable(result_col).get());
ASSERT_EQ(string_result->size(), 2);
ASSERT_EQ(string_result->get_data_at(0).to_string(), "hello");
ASSERT_EQ(string_result->get_data_at(1).to_string(), "world");
}
// Test casting from variant to array
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
auto variant_col = ColumnVariant::create(0);
// Create a variant column with array values
variant_col->create_root(
array_type,
ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()));
MutableColumnPtr data = variant_col->get_root();
Field a = Field::create_field<TYPE_ARRAY>(Array {Field::create_field<TYPE_INT>(1),
Field::create_field<TYPE_INT>(2),
Field::create_field<TYPE_INT>(3)});
data->insert(a);
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, array_type, "array_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, array_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, array_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* array_result =
assert_cast<const ColumnArray*>(remove_nullable(result_col).get());
ASSERT_EQ(array_result->size(), 1);
const auto& result_data = assert_cast<const ColumnInt32&>(array_result->get_data());
ASSERT_EQ(result_data.size(), 3);
ASSERT_EQ(result_data.get_element(0), 1);
ASSERT_EQ(result_data.get_element(1), 2);
ASSERT_EQ(result_data.get_element(2), 3);
}
}
TEST(FunctionVariantCast, CastVariantWithNull) {
auto variant_type = std::make_shared<DataTypeVariant>();
auto int32_type = std::make_shared<DataTypeInt32>();
auto nullable_int32_type = std::make_shared<DataTypeNullable>(int32_type);
// Create a variant column with nullable integer values
auto variant_col = ColumnVariant::create(0);
variant_col->create_root(nullable_int32_type,
ColumnNullable::create(ColumnInt32::create(), ColumnUInt8::create()));
MutableColumnPtr data = variant_col->get_root();
data->insert(Field::create_field<TYPE_INT>(42));
data->insert(Field::create_field<TYPE_NULL>(Null()));
data->insert(Field::create_field<TYPE_INT>(100));
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, nullable_int32_type, "nullable_int32_type"}};
variant_col->finalize();
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, nullable_int32_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, nullable_int32_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 3).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* nullable_result = assert_cast<const ColumnNullable*>(result_col.get());
ASSERT_EQ(nullable_result->size(), 3);
const auto& result_data = assert_cast<const ColumnInt32&>(nullable_result->get_nested_column());
const auto& result_null_map = nullable_result->get_null_map_data();
ASSERT_EQ(result_data.get_element(0), 42);
ASSERT_EQ(result_null_map[0], 0);
ASSERT_EQ(result_null_map[1], 1);
ASSERT_EQ(result_data.get_element(2), 100);
}
TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
// Test case 1: variant.empty() branch
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto int32_type = std::make_shared<DataTypeInt32>();
MutableColumnPtr root = ColumnInt32::create();
root->insert(Field::create_field<TYPE_INT>(42));
vectorized::ColumnVariant::Subcolumns dynamic_subcolumns;
dynamic_subcolumns.add(
vectorized::PathInData(ColumnVariant::COLUMN_NAME_DUMMY),
vectorized::ColumnVariant::Subcolumn {root->get_ptr(), int32_type, true, true});
auto variant_col = ColumnVariant::create(0, std::move(dynamic_subcolumns));
variant_col->finalize();
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, int32_type, "int32_type"}};
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, int32_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
// always nullable
const auto* int32_result =
assert_cast<const ColumnInt32*>(remove_nullable(result_col).get());
ASSERT_EQ(int32_result->size(), 1);
// because of variant.empty() we insert_default with data_type_to
ASSERT_EQ(int32_result->get_element(0), 0);
}
// Test case 2: !data_type_to->is_nullable() && !WhichDataType(data_type_to).is_string() branch
{
// object has sparse column
auto int32_type = std::make_shared<DataTypeInt32>();
auto variant_col = construct_basic_varint_column();
auto variant_type = std::make_shared<DataTypeVariant>();
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, int32_type, "int32_type"}};
variant_col->finalize();
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, int32_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, int32_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* nullable_result = assert_cast<const ColumnNullable*>(result_col.get());
ASSERT_EQ(nullable_result->size(), 1);
ASSERT_TRUE(nullable_result->is_null_at(0));
}
// Test case 3: WhichDataType(data_type_to).is_string() branch
{
// variant has sparse column
auto int32_type = std::make_shared<DataTypeInt32>();
auto variant_col = construct_basic_varint_column();
auto string_type = std::make_shared<DataTypeString>();
auto variant_type = std::make_shared<DataTypeVariant>();
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), variant_type, "variant_col"},
{nullptr, string_type, "string_type"}};
variant_col->finalize();
auto function =
SimpleFunctionFactory::instance().get_function("CAST", arguments, string_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, string_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* string_result = assert_cast<const ColumnString*>(result_col.get());
// just call ConvertImplGenericToString which will insert all source column data to ColumnString
ASSERT_EQ(string_result->size(), variant_col->size());
ASSERT_EQ(string_result->get_data_at(0).to_string(),
"{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}");
}
// Test case 4: else branch (nullable type)
{
auto variant_col = construct_basic_varint_column();
variant_col->finalize();
auto nullable_variant_col = make_nullable(variant_col->get_ptr());
auto nullable_string_type = make_nullable(std::make_shared<DataTypeString>());
auto variant_type = std::make_shared<DataTypeVariant>();
auto nullable_variant_type = make_nullable(variant_type);
ColumnsWithTypeAndName arguments {
{nullable_variant_col->get_ptr(), nullable_variant_type, "variant_col"},
{nullptr, nullable_string_type, "nullable_string_type"}};
auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments,
nullable_string_type);
ASSERT_NE(function, nullptr);
Block block {arguments};
size_t result_column = block.columns();
block.insert({nullptr, nullable_string_type, "result"});
RuntimeState state;
auto ctx = FunctionContext::create_context(&state, {}, {});
ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok());
auto result_col = block.get_by_position(result_column).column;
ASSERT_NE(result_col.get(), nullptr);
const auto* nullable_result = assert_cast<const ColumnNullable*>(result_col.get());
ASSERT_EQ(nullable_result->size(), 1);
ASSERT_TRUE(nullable_result->is_null_at(1));
}
}
} // namespace doris::vectorized