blob: 01eda5ba3648c7e531d972126eff1b1e0a2aff26 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
<<<<<<< HEAD
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
#include "vec/columns/column_variant.h"
#include <gen_cpp/internal_service.pb.h>
========
<<<<<<< HEAD
<<<<<<< HEAD
=======
#include "vec/columns/column_object.h"
#include <gmock/gmock-more-matchers.h>
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
=======
#include <gen_cpp/internal_service.pb.h>
>>>>>>> 67769750f96 ([Fix](Variant) add implementation `update_XXXhash` for IColumnDummy (#52610))
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>
#include <rapidjson/prettywriter.h>
#include <stdio.h>
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
#include <memory>
#include "runtime/define_primitive_type.h"
#include "runtime/jsonb_value.h"
#include "vec/columns/common_column_test.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_nothing.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/json/path_in_data.h"
========
<<<<<<< HEAD
<<<<<<< HEAD
=======
#include <memory>
#include "runtime/define_primitive_type.h"
>>>>>>> 67769750f96 ([Fix](Variant) add implementation `update_XXXhash` for IColumnDummy (#52610))
#include "vec/columns/column_variant.h"
=======
#include "common/cast_set.h"
#include "runtime/jsonb_value.h"
#include "testutil/variant_util.h"
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
#include "vec/columns/common_column_test.h"
<<<<<<< HEAD
#include "vec/common/string_ref.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_factory.hpp"
=======
#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_nothing.h"
#include "vec/json/path_in_data.h"
>>>>>>> 67769750f96 ([Fix](Variant) add implementation `update_XXXhash` for IColumnDummy (#52610))
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
namespace doris::vectorized {
class ColumnVariantTest : public ::testing::Test {};
<<<<<<< HEAD
auto construct_dst_varint_column() {
// 1. create an empty variant column
vectorized::ColumnVariant::Subcolumns dynamic_subcolumns;
dynamic_subcolumns.create_root(vectorized::ColumnVariant::Subcolumn(0, true, true /*root*/));
dynamic_subcolumns.add(vectorized::PathInData("v.f"),
vectorized::ColumnVariant::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.e"),
vectorized::ColumnVariant::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.b"),
vectorized::ColumnVariant::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.b.d"),
vectorized::ColumnVariant::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.c.d"),
vectorized::ColumnVariant::Subcolumn {0, true});
return ColumnVariant::create(std::move(dynamic_subcolumns), true);
=======
void convert_field_to_rapidjson(const vectorized::Field& field, rapidjson::Value& target,
rapidjson::Document::AllocatorType& allocator) {
switch (field.get_type()) {
case vectorized::Field::Types::Null:
target.SetNull();
break;
case vectorized::Field::Types::Int64:
target.SetInt64(field.get<Int64>());
break;
case vectorized::Field::Types::Float64:
target.SetDouble(field.get<Float64>());
break;
case vectorized::Field::Types::JSONB: {
const auto& val = field.get<JsonbField>();
JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), val.get_size());
convert_jsonb_to_rapidjson(*json_val, target, allocator);
break;
}
case vectorized::Field::Types::String: {
const String& val = field.get<String>();
target.SetString(val.data(), cast_set<rapidjson::SizeType>(val.size()));
break;
}
case vectorized::Field::Types::Array: {
const vectorized::Array& array = field.get<Array>();
target.SetArray();
for (const vectorized::Field& item : array) {
rapidjson::Value val;
convert_field_to_rapidjson(item, val, allocator);
target.PushBack(val, allocator);
}
break;
}
case vectorized::Field::Types::VariantMap: {
const vectorized::VariantMap& map = field.get<VariantMap>();
target.SetObject();
for (const auto& item : map) {
if (item.second.is_null()) {
continue;
}
rapidjson::Value key;
key.SetString(item.first.get_path().data(),
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
rapidjson::Value val;
convert_field_to_rapidjson(item.second, val, allocator);
if (val.IsNull() && item.first.empty()) {
// skip null value with empty key, indicate the null json value of root in variant map,
// usally padding in nested arrays
continue;
}
target.AddMember(key, val, allocator);
}
break;
}
default:
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field type: {}",
field.get_type_name());
break;
}
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
}
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
TEST_F(ColumnVariantTest, permute) {
auto column_variant = construct_dst_varint_column();
=======
#include <gmock/gmock-more-matchers.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include "testutil/test_util.h"
#include "testutil/variant_util.h"
#include "vec/columns/column_object.cpp"
#include "vec/columns/column_object.h"
#include "vec/columns/common_column_test.h"
#include "vec/columns/subcolumn_tree.h"
#include "vec/common/schema_util.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_factory.hpp"
using namespace doris;
namespace doris::vectorized {
static std::string root_dir;
static std::string test_data_dir;
static std::string test_result_dir;
static std::string test_data_dir_json;
static DataTypePtr dt_variant =
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_VARIANT, 0, 0);
DataTypeSerDeSPtrs serde;
static ColumnObject::MutablePtr column_variant;
class ColumnObjectTest : public CommonColumnTest {
protected:
static void SetUpTestSuite() {
root_dir = std::string(getenv("ROOT"));
std::cout << "root_dir: " << root_dir << std::endl;
test_data_dir = root_dir + "/be/test/data/vec/columns";
test_result_dir = root_dir + "/be/test/expected_result/vec/columns";
column_variant = ColumnObject::create(true);
std::cout << dt_variant->get_name() << std::endl;
load_json_columns_data();
}
static void load_json_columns_data() {
std::cout << "loading json dataset : " << FLAGS_gen_out << std::endl;
{
MutableColumns columns;
columns.push_back(column_variant->get_ptr());
serde = {dt_variant->get_serde()};
test_data_dir_json = root_dir + "/regression-test/data/nereids_function_p0/";
std::vector<string> json_files = {
test_data_dir_json + "json_variant/boolean_boundary.jsonl",
test_data_dir_json + "json_variant/null_boundary.jsonl",
test_data_dir_json + "json_variant/number_boundary.jsonl",
test_data_dir_json + "json_variant/string_boundary.jsonl",
test_data_dir_json + "json_variant/array_boolean_boundary.jsonl",
test_data_dir_json + "json_variant/array_nullable_null_boundary.jsonl",
test_data_dir_json + "json_variant/array_number_boundary.jsonl",
test_data_dir_json + "json_variant/array_string_boundary.jsonl",
test_data_dir_json + "json_variant/array_object_boundary.jsonl",
test_data_dir_json + "json_variant/array_nullable_boolean_boundary.jsonl",
test_data_dir_json + "json_variant/array_nullable_number_boundary.jsonl",
test_data_dir_json + "json_variant/array_nullable_string_boundary.jsonl",
test_data_dir_json + "json_variant/array_nullable_object_boundary.jsonl",
test_data_dir_json + "json_variant/array_array_boolean_boundary.jsonl",
test_data_dir_json + "json_variant/array_array_number_boundary.jsonl",
test_data_dir_json +
"json_variant/array_nullable_array_nullable_boolean_boundary.jsonl",
test_data_dir_json +
"json_variant/array_nullable_array_nullable_null_boundary.jsonl",
test_data_dir_json +
"json_variant/array_nullable_array_nullable_number_boundary.jsonl",
test_data_dir_json + "json_variant/object_boundary.jsonl",
test_data_dir_json + "json_variant/object_nested_1025.jsonl"};
for (const auto& json_file : json_files) {
load_columns_data_from_file(columns, serde, '\n', {0}, json_file);
EXPECT_TRUE(!column_variant->empty());
column_variant->insert_default();
std::cout << "column variant size: " << column_variant->size() << std::endl;
}
column_variant->finalize();
std::cout << "column variant finalize size: " << column_variant->size() << std::endl;
}
}
template <typename T>
void column_common_test(T callback) {
callback(ColumnObject(true), column_variant->get_ptr());
}
void hash_common_test(
const std::string& function_name,
std::function<void(const MutableColumns& load_cols, DataTypeSerDeSPtrs serders,
const std::string& res_file_name)>
assert_callback) {
{
MutableColumns columns;
columns.push_back(column_variant->get_ptr());
DataTypeSerDeSPtrs serdes = {dt_variant->get_serde()};
assert_callback(columns, serdes,
test_result_dir + "/column_variant_" + function_name + ".out");
}
}
};
TEST_F(ColumnObjectTest, is_variable_length) {
EXPECT_TRUE(column_variant->is_variable_length());
}
TEST_F(ColumnObjectTest, byte_size) {
hash_common_test("byte_size", assert_byte_size_with_file_callback);
}
//TEST_F(ColumnObjectTest, has_enough_capacity) {
// auto test_func = [](const auto& src_col) {
// auto src_size = src_col->size();
// // variant always return fasle
// auto assert_col = src_col->clone_empty();
// ASSERT_FALSE(assert_col->has_enough_capacity(*src_col));
// assert_col->reserve(src_size);
// ASSERT_FALSE(assert_col->has_enough_capacity(*src_col));
// };
// test_func(column_variant);
//}
TEST_F(ColumnObjectTest, allocated_bytes) {
hash_common_test("allocated_bytes", assert_allocated_bytes_with_file_callback);
}
TEST_F(ColumnObjectTest, clone_resized) {
auto src_size = column_variant->size();
auto test_func = [&](size_t clone_count) {
auto target_column = column_variant->clone_resized(clone_count);
EXPECT_NE(target_column.get(), column_variant.get());
EXPECT_EQ(target_column->size(), clone_count);
size_t same_count = std::min(clone_count, src_size);
size_t i = 0;
for (; i < same_count; ++i) {
checkField(*target_column, *column_variant, i, i);
}
for (; i < clone_count; ++i) {
// more than source size
Field target_field;
Field source_field = column_variant->get_root_type()->get_default();
target_column->get(i, target_field);
EXPECT_EQ(target_field, source_field)
<< "target_field: " << target_field.get_type_name()
<< ", source_field: " << source_field.get_type_name();
}
};
test_func(0);
test_func(3);
test_func(src_size);
test_func(src_size + 10);
// test clone_empty
auto target_column = column_variant->clone_empty();
EXPECT_NE(target_column.get(), column_variant.get());
// assert subcolumns
auto target_subcolumns = assert_cast<ColumnObject*>(target_column.get())->get_subcolumns();
// always has root for ColumnObject(0)
EXPECT_EQ(target_subcolumns.size(), 1);
}
TEST_F(ColumnObjectTest, field_test) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
{
auto assert_col = source_column->clone();
for (size_t i = 0; i != src_size; ++i) {
Field f;
source_column->get(i, f);
assert_col->insert(f);
}
for (size_t i = 0; i != src_size; ++i) {
Field assert_field;
assert_col->get(i, assert_field);
Field source_field;
source_column->get(i, source_field);
ASSERT_EQ(assert_field, source_field);
}
}
{
auto assert_col = source_column->clone();
std::cout << source_column->size() << std::endl;
for (size_t i = 0; i != src_size; ++i) {
VariantMap jsonbf;
Field f(std::move(jsonbf));
source_column->get(i, f);
assert_col->insert(f);
}
for (size_t i = 0; i != src_size; ++i) {
VariantMap jsonbf;
Field f(std::move(jsonbf));
assert_col->get(i, f);
const auto& real_field = vectorized::get<const VariantMap&>(f);
Field source_field;
source_column->get(i, source_field);
ASSERT_EQ(real_field, source_field);
}
}
};
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
EXPECT_TRUE(!obj->empty());
test_func(obj);
}
// is seri
TEST_F(ColumnObjectTest, is_column_string64) {
EXPECT_FALSE(column_variant->is_column_string64());
}
TEST_F(ColumnObjectTest, is_column_string) {
EXPECT_FALSE(column_variant->is_column_string());
}
TEST_F(ColumnObjectTest, serialize_one_row_to_string) {
{
const auto* variant = assert_cast<const ColumnObject*>(column_variant.get());
// Serialize hierarchy types to json format
std::string buffer;
for (size_t row_idx = 2000; row_idx < variant->size(); ++row_idx) {
variant->serialize_one_row_to_string(row_idx, &buffer);
}
{
// TEST buffer
auto tmp_col = ColumnString::create();
VectorBufferWriter write_buffer(*tmp_col.get());
for (size_t row_idx = 2000; row_idx < variant->size(); ++row_idx) {
variant->serialize_one_row_to_string(row_idx, write_buffer);
}
}
}
{
// TEST SCALA_VARAINT
// 1. create an empty variant column
auto v = ColumnObject::create(true);
auto dt = DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_STRING, 0,
0);
auto cs = dt->create_column();
cs->insert(Field("amory"));
cs->insert(Field("doris"));
v->create_root(dt, std::move(cs));
EXPECT_TRUE(v->is_scalar_variant());
// 3. serialize
std::string buf2;
for (size_t row_idx = 0; row_idx < v->size(); ++row_idx) {
v->serialize_one_row_to_string(row_idx, &buf2);
}
auto tmp_col = ColumnString::create();
VectorBufferWriter write_buffer(*tmp_col.get());
for (size_t row_idx = 0; row_idx < v->size(); ++row_idx) {
v->serialize_one_row_to_string(row_idx, write_buffer);
}
}
}
// insert interface
// not implemented: insert_many_fix_len_data, insert_many_dict_data, insert_many_continuous_binary_data, insert_from_multi_column
// insert_many_strings, insert_many_strings_overflow, insert_range_from_ignore_overflow, insert_many_raw_data, insert_data, get_data_at, replace_column_data,
// serialize_value_into_arena, deserialize_and_insert_from_arena
TEST_F(ColumnObjectTest, insert_many_fix_len_data) {
EXPECT_ANY_THROW(column_variant->insert_many_fix_len_data(nullptr, 0));
}
TEST_F(ColumnObjectTest, insert_many_dict_data) {
EXPECT_ANY_THROW(column_variant->insert_many_dict_data(nullptr, 0, nullptr, 0, 0));
}
TEST_F(ColumnObjectTest, insert_many_continuous_binary_data) {
EXPECT_ANY_THROW(column_variant->insert_many_continuous_binary_data(nullptr, 0, 0));
}
//TEST_F(ColumnObjectTest, insert_from_multi_column) {
// EXPECT_ANY_THROW(column_variant->insert_from_multi_column({column_variant.get()}, {0}));
//}
TEST_F(ColumnObjectTest, insert_many_strings) {
EXPECT_ANY_THROW(column_variant->insert_many_strings(nullptr, 0));
}
TEST_F(ColumnObjectTest, insert_many_strings_overflow) {
EXPECT_ANY_THROW(column_variant->insert_many_strings_overflow(nullptr, 0, 0));
}
TEST_F(ColumnObjectTest, insert_many_raw_data) {
EXPECT_ANY_THROW(column_variant->insert_many_raw_data(nullptr, 0));
}
TEST_F(ColumnObjectTest, insert_data) {
EXPECT_ANY_THROW(column_variant->insert_data(nullptr, 0));
}
TEST_F(ColumnObjectTest, get_data_at) {
EXPECT_ANY_THROW(column_variant->get_data_at(0));
}
TEST_F(ColumnObjectTest, replace_column_data) {
EXPECT_ANY_THROW(
column_variant->replace_column_data(column_variant->assume_mutable_ref(), 0, 0));
}
TEST_F(ColumnObjectTest, serialize_value_into_arena) {
Arena a;
const char* begin = nullptr;
EXPECT_ANY_THROW(column_variant->serialize_value_into_arena(0, a, begin));
}
TEST_F(ColumnObjectTest, deserialize_and_insert_from_arena) {
EXPECT_ANY_THROW(column_variant->deserialize_and_insert_from_arena(nullptr));
}
// insert series:
// insert_from, insert_many_from, insert_range_from, insert_range_from_ignore_overflow, insert_indices_from
// insert_default, insert_many_defaults
TEST_F(ColumnObjectTest, insert_many_from) {
assert_insert_many_from_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, insert_from) {
assert_insert_from_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, insert_range_from) {
// insert_range_from_ignore_overflow call insert_range_from
assert_insert_range_from_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, insert_indices_from) {
assert_insert_indices_from_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, insert_default_insert_many_defaults) {
assert_insert_default_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, get_name) {
EXPECT_TRUE(column_variant->get_name().find("variant") != std::string::npos);
}
// pop_back interface
TEST_F(ColumnObjectTest, pop_back_test) {
assert_pop_back_with_field_callback(column_variant->get_ptr());
}
// serialize and deserialize is not implemented
// serialize_vec, deserialize_vec, serialize_vec_with_null_map, deserialize_vec_with_null_map, get_max_row_byte_size
TEST_F(ColumnObjectTest, ser_deser_test) {
std::vector<StringRef> keys;
EXPECT_ANY_THROW(column_variant->get_max_row_byte_size());
EXPECT_ANY_THROW(column_variant->serialize_vec(keys, 0, 0));
EXPECT_ANY_THROW(column_variant->deserialize_vec(keys, 0));
EXPECT_ANY_THROW(column_variant->serialize_vec_with_null_map(keys, 0, nullptr));
EXPECT_ANY_THROW(column_variant->deserialize_vec_with_null_map(keys, 0, nullptr));
}
// hash interface
TEST_F(ColumnObjectTest, update_xxHash_with_value) {
hash_common_test("update_xxHash_with_value", assert_update_xxHash_with_value_callback);
}
// hang
//TEST_F(ColumnObjectTest, update_sip_hash_with_value_test) {
// hash_common_test("update_sip_hash_with_value",
// assert_column_vector_update_siphashes_with_value_callback);
//}
TEST_F(ColumnObjectTest, update_hashes_with_value_test) {
hash_common_test("update_hashes_with_value",
assert_column_vector_update_hashes_with_value_callback);
}
TEST_F(ColumnObjectTest, update_crc_with_value_test) {
hash_common_test("update_crc_with_value", assert_update_crc_with_value_callback);
}
TEST_F(ColumnObjectTest, update_crcs_with_value_test) {
std::string function_name = "update_crcs_with_value";
MutableColumns columns;
columns.push_back(column_variant->get_ptr());
DataTypeSerDeSPtrs serdes = {dt_variant->get_serde()};
std::vector<PrimitiveType> pts(columns.size(), PrimitiveType::TYPE_VARIANT);
assert_column_vector_update_crc_hashes_callback(
columns, serdes, pts, test_result_dir + "/column_variant_" + function_name + ".out");
}
// filter interface
TEST_F(ColumnObjectTest, filter) {
assert_filter_with_field_callback(column_variant->get_ptr());
}
TEST_F(ColumnObjectTest, filter_by_selector) {
auto test_func = [&](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size <= UINT16_MAX);
auto target_column = source_column->clone_empty();
std::vector<uint16_t> indices(src_size);
std::iota(indices.begin(), indices.end(), 0);
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(indices.begin(), indices.end(), g);
size_t sel_size = src_size / 2;
indices.resize(sel_size);
std::sort(indices.begin(), indices.end());
EXPECT_ANY_THROW(Status st = source_column->filter_by_selector(indices.data(), 0,
target_column.get()));
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, permute) {
>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features)
{
// test empty column and limit == 0
IColumn::Permutation permutation(0);
auto col = column_variant->clone_empty();
col->permute(permutation, 0);
EXPECT_EQ(col->size(), 0);
<<<<<<< HEAD
========
void convert_variant_map_to_rapidjson(const vectorized::VariantMap& map, rapidjson::Value& target,
rapidjson::Document::AllocatorType& allocator) {
target.SetObject();
for (const auto& item : map) {
if (item.second.is_null()) {
continue;
}
rapidjson::Value key;
key.SetString(item.first.get_path().data(),
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
rapidjson::Value val;
convert_field_to_rapidjson(item.second, val, allocator);
if (val.IsNull() && item.first.empty()) {
// skip null value with empty key, indicate the null json value of root in variant map,
// usally padding in nested arrays
continue;
}
target.AddMember(key, val, allocator);
}
}
void convert_array_to_rapidjson(const vectorized::Array& array, rapidjson::Value& target,
rapidjson::Document::AllocatorType& allocator) {
target.SetArray();
for (const vectorized::Field& item : array) {
rapidjson::Value val;
convert_field_to_rapidjson(item, val, allocator);
target.PushBack(val, allocator);
}
}
TEST(ColumnVariantTest, insert_try_insert) {
auto v = VariantUtil::construct_dst_varint_column();
FieldInfo info;
info.scalar_type_id = TypeIndex::Nothing;
info.num_dimensions = 0;
PathInData path("v.f");
auto sub = v->get_subcolumn(path);
Int64 value = 43;
sub->insert(value, info);
info.num_dimensions = 1;
sub->insert(value, info);
info.num_dimensions = 2;
sub->insert(value, info);
}
TEST(ColumnVariantTest, basic_finalize) {
auto variant = VariantUtil::construct_basic_varint_column();
// 4. finalize
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(variant->size(), 10);
// check finalized subcolumn
// 5 subcolumn + 1 root
EXPECT_EQ(variant->subcolumns.size(), 6);
for (const auto& column : variant->subcolumns) {
if (column->data.is_root) {
continue;
}
EXPECT_EQ(column->data.data.size(), 1);
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
}
// check sparse column
const auto& offsets = variant->serialized_sparse_column_offsets();
for (int row = 0; row < 5; ++row) {
EXPECT_EQ(offsets[row], 0);
}
for (int row = 5; row < 10; ++row) {
EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
}
}
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
// test ColumnVariant with ColumnNothing using update_hash_with_value
TEST_F(ColumnVariantTest, updateHashValueWithColumnNothingTest) {
========
<<<<<<< HEAD
<<<<<<< HEAD
=======
// test ColumnVariant with ColumnNothing using update_hash_with_value
TEST_F(ColumnObjectTest, updateHashValueWithColumnNothingTest) {
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
// Create a subcolumn with ColumnNothing type
auto type = std::make_shared<DataTypeNothing>();
auto column = type->create_column();
column->insert_many_defaults(3);
// Create a ColumnVariant with a subcolumn that contains ColumnNothing
auto variant = ColumnVariant::create(true, type, std::move(column));
// Finalize the variant column to ensure proper structure
EXPECT_EQ(variant->size(), 3);
// Test update_hash_with_value with ColumnNothing
SipHash hash1, hash2, hash3;
// Test that update_hash_with_value doesn't crash with ColumnNothing
EXPECT_NO_THROW(variant->update_hash_with_value(0, hash1));
EXPECT_NO_THROW(variant->update_hash_with_value(1, hash2));
EXPECT_NO_THROW(variant->update_hash_with_value(2, hash3));
// For ColumnNothing, the hash should be consistent since it doesn't contain actual data
// However, the hash might include structural information, so we just verify it doesn't crash
// and produces some hash value
EXPECT_NE(hash1.get64(), 0);
EXPECT_NE(hash2.get64(), 0);
EXPECT_NE(hash3.get64(), 0);
// Test update_hashes_with_value with ColumnNothing
std::vector<uint64_t> hashes(3, 0);
EXPECT_NO_THROW(variant->update_hashes_with_value(hashes.data()));
// Test update_xxHash_with_value with ColumnNothing
uint64_t xxhash = 0;
EXPECT_NO_THROW(variant->update_xxHash_with_value(0, 3, xxhash, nullptr));
// Test update_crc_with_value with ColumnNothing
uint32_t crc_hash = 0;
EXPECT_NO_THROW(variant->update_crc_with_value(0, 3, crc_hash, nullptr));
// Test with null map
std::vector<uint8_t> null_map(3, 0);
null_map[1] = 1; // Mark second row as null
std::vector<uint64_t> hashes_with_null(3, 0);
EXPECT_NO_THROW(variant->update_hashes_with_value(hashes_with_null.data(), null_map.data()));
uint64_t xxhash_with_null = 0;
EXPECT_NO_THROW(variant->update_xxHash_with_value(0, 3, xxhash_with_null, null_map.data()));
uint32_t crc_hash_with_null = 0;
EXPECT_NO_THROW(variant->update_crc_with_value(0, 3, crc_hash_with_null, null_map.data()));
}
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
========
>>>>>>> 67769750f96 ([Fix](Variant) add implementation `update_XXXhash` for IColumnDummy (#52610))
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
// TEST
TEST_F(ColumnVariantTest, test_pop_back) {
ColumnVariant::Subcolumn subcolumn(0, true /* is_nullable */, false /* is_root */);
Field field_int = Field::create_field<TYPE_INT>(123);
Field field_string = Field::create_field<TYPE_STRING>("hello");
=======
TEST(ColumnVariantTest, basic_deserialize) {
auto variant = VariantUtil::construct_basic_varint_column();
// 4. finalize
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(variant->size(), 10);
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
const auto& [path, value] = variant->get_sparse_data_paths_and_values();
const auto& offsets = variant->serialized_sparse_column_offsets();
for (size_t row = 5; row < 10; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
<<<<<<< HEAD
subcolumn.pop_back(1);
EXPECT_EQ(subcolumn.size(), 1);
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(TINYINT)");
=======
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.b.d", 5));
auto pair = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int64>(), 30);
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
auto data2 = path->get_data_at(start);
auto pair2 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c.d", 5));
EXPECT_EQ(pair2.first.get<Int64>(), 30);
auto data3 = path->get_data_at(start);
auto pair3 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(pair3.first.get<String>(), "50");
EXPECT_EQ(start, end);
}
}
<<<<<<<< HEAD:be/test/vec/columns/column_variant_test.cpp
TEST_F(ColumnVariantTest, test_pop_back_multiple_types) {
========
<<<<<<< HEAD
TEST_F(ColumnObjectTest, test_pop_back_multiple_types) {
>>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features):be/test/vec/columns/column_object_test.cpp
ColumnVariant::Subcolumn subcolumn(0, true /* is_nullable */, false /* is_root */);
Field field_int8 = Field::create_field<TYPE_TINYINT>(42);
subcolumn.insert(field_int8);
EXPECT_EQ(subcolumn.size(), 1);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(TINYINT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(TINYINT)");
Field field_int16 = Field::create_field<TYPE_SMALLINT>(12345);
subcolumn.insert(field_int16);
EXPECT_EQ(subcolumn.size(), 2);
EXPECT_EQ(subcolumn.data_types.size(), 2);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(TINYINT)");
EXPECT_EQ(subcolumn.data_types[1]->get_name(), "Nullable(SMALLINT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(SMALLINT)");
Field field_int32 = Field::create_field<TYPE_INT>(1234567);
subcolumn.insert(field_int32);
EXPECT_EQ(subcolumn.size(), 3);
EXPECT_EQ(subcolumn.data_types.size(), 3);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(TINYINT)");
EXPECT_EQ(subcolumn.data_types[1]->get_name(), "Nullable(SMALLINT)");
EXPECT_EQ(subcolumn.data_types[2]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(INT)");
subcolumn.pop_back(1);
EXPECT_EQ(subcolumn.size(), 2);
EXPECT_EQ(subcolumn.data_types.size(), 2);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(TINYINT)");
EXPECT_EQ(subcolumn.data_types[1]->get_name(), "Nullable(SMALLINT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(SMALLINT)");
subcolumn.pop_back(1);
EXPECT_EQ(subcolumn.size(), 1);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(TINYINT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(TINYINT)");
subcolumn.pop_back(1);
EXPECT_EQ(subcolumn.size(), 0);
EXPECT_EQ(subcolumn.data_types.size(), 0);
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nothing");
subcolumn.insert(field_int32);
EXPECT_EQ(subcolumn.size(), 1);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(INT)");
subcolumn.insert(field_int16);
EXPECT_EQ(subcolumn.size(), 2);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(INT)");
subcolumn.insert(field_int8);
EXPECT_EQ(subcolumn.size(), 3);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(INT)");
subcolumn.pop_back(1);
EXPECT_EQ(subcolumn.size(), 2);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(INT)");
Field field_string = Field::create_field<TYPE_STRING>("hello");
subcolumn.insert(field_string);
EXPECT_EQ(subcolumn.size(), 3);
EXPECT_EQ(subcolumn.data_types.size(), 2);
EXPECT_EQ(subcolumn.data_types[0]->get_name(), "Nullable(INT)");
EXPECT_EQ(subcolumn.data_types[1]->get_name(), "Nullable(JSONB)");
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nullable(JSONB)");
subcolumn.pop_back(3);
EXPECT_EQ(subcolumn.size(), 0);
EXPECT_EQ(subcolumn.data_types.size(), 0);
EXPECT_EQ(subcolumn.get_least_common_type()->get_name(), "Nothing");
=======
TEST(ColumnVariantTest, basic_inset_range_from) {
auto src = VariantUtil::construct_basic_varint_column();
EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(src->size(), 10);
// dst is an empty column, has 5 subcolumn + 1 root
auto dst = VariantUtil::construct_dst_varint_column();
// subcolumn->subcolumn v.b v.f v.e
// subcolumn->sparse_column v.a v.c
// sparse_column->subcolumn v.b.d v.c.d
// sparse_column->sparse_column v.d.d
dst->insert_range_from(*src, 0, 10);
dst->finalize();
EXPECT_EQ(dst->size(), 10);
// 5 subcolumn
EXPECT_EQ(dst->subcolumns.size(), 6);
ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
std::sort(
dst_subcolumns.begin(), dst_subcolumns.end(),
[](const auto& lhsItem, const auto& rhsItem) { return lhsItem->path < rhsItem->path; });
for (const auto& column : dst_subcolumns) {
if (column->data.is_root) {
continue;
}
EXPECT_EQ(column->data.data.size(), 1);
EXPECT_EQ(column->data.data[0]->size(), 10);
if (column->path.get_path().size() == 3) {
EXPECT_EQ(column->data.get_non_null_value_size(), 10);
} else {
EXPECT_EQ(column->path.get_path().size(), 5);
EXPECT_EQ(column->data.get_non_null_value_size(), 5);
for (size_t row = 0; row != 5; ++row) {
EXPECT_TRUE(column->data.data[0]->is_null_at(row));
}
for (size_t row = 5; row != 10; ++row) {
EXPECT_EQ((*column->data.data[0])[row].get<Int64>(), 30);
}
}
}
// check sparse column
const auto& [path, value] = dst->get_sparse_data_paths_and_values();
const auto& offsets = dst->serialized_sparse_column_offsets();
// v.a v.c
for (int row = 0; row < 5; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.a", 3));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int64>(), 20);
auto data2 = path->get_data_at(start);
EXPECT_EQ(data2, StringRef("v.c", 3));
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair2.first.get<Int64>(), 20);
EXPECT_EQ(start, end);
}
// v.a v.c v.d.d
for (int row = 5; row < 10; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.a", 3));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int64>(), 20);
auto data2 = path->get_data_at(start);
EXPECT_EQ(data2, StringRef("v.c", 3));
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair2.first.get<Int64>(), 20);
auto data3 = path->get_data_at(start);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair3.first.get<String>(), "50");
EXPECT_EQ(start, end);
}
}
auto convert_to_jsonb_field(auto serde, auto& column) {
vectorized::DataTypeSerDe::FormatOptions options;
options.escape_char = '\\';
auto tmp_col = ColumnString::create();
VectorBufferWriter write_buffer(*tmp_col.get());
EXPECT_TRUE(serde->serialize_column_to_json(column, 0, 1, write_buffer, options).ok());
write_buffer.commit();
auto str_ref = tmp_col->get_data_at(0);
Slice data((char*)(str_ref.data), str_ref.size);
auto jsonb_type = doris::vectorized::DataTypeFactory::instance().create_data_type(
TypeIndex::JSONB, false);
auto jsonb_serde = jsonb_type->get_serde();
auto jsonb_column = jsonb_type->create_column();
DataTypeSerDe::FormatOptions format_options;
format_options.converted_from_string = true;
EXPECT_TRUE(
jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data, format_options).ok());
auto res = jsonb_column->get_data_at(0);
return JsonbField(res.data, res.size);
}
auto convert_string_to_jsonb_field(auto& column) {
auto str_ref = column.get_data_at(0);
Slice data((char*)(str_ref.data), str_ref.size);
auto jsonb_type = doris::vectorized::DataTypeFactory::instance().create_data_type(
TypeIndex::JSONB, false);
auto jsonb_serde = jsonb_type->get_serde();
auto jsonb_column = jsonb_type->create_column();
DataTypeSerDe::FormatOptions format_options;
format_options.converted_from_string = true;
format_options.escape_char = '\\';
EXPECT_TRUE(
jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data, format_options).ok());
auto res = jsonb_column->get_data_at(0);
return JsonbField(res.data, res.size);
}
doris::vectorized::Field get_jsonb_field(std::string_view type) {
static std::unordered_map<std::string_view, doris::vectorized::Field> field_map;
if (field_map.empty()) {
DataTypePtr data_type_int = doris::vectorized::DataTypeFactory::instance().create_data_type(
TypeIndex::Int8, false);
DataTypePtr data_type_array_int =
std::make_shared<doris::vectorized::DataTypeArray>(data_type_int);
auto array_column_int = data_type_array_int->create_column();
array_column_int->insert(VariantUtil::get_field("array_int"));
auto array_serde_int = data_type_array_int->get_serde();
field_map["array_int"] = convert_to_jsonb_field(array_serde_int, *array_column_int);
DataTypePtr data_type_str = doris::vectorized::DataTypeFactory::instance().create_data_type(
TypeIndex::String, false);
DataTypePtr data_type_array_str =
std::make_shared<doris::vectorized::DataTypeArray>(data_type_str);
auto array_column_str = data_type_array_str->create_column();
array_column_str->insert(VariantUtil::get_field("array_str"));
auto array_serde_str = data_type_array_str->get_serde();
field_map["array_str"] = convert_to_jsonb_field(array_serde_str, *array_column_str);
auto column_int = data_type_int->create_column();
column_int->insert(VariantUtil::get_field("int"));
auto serde_int = data_type_int->get_serde();
field_map["int"] = convert_to_jsonb_field(serde_int, *column_int);
// auto column_str = data_type_str->create_column();
// column_str->insert(VariantUtil::get_field("string"));
// field_map["string"] = convert_string_to_jsonb_field(*column_str);
=======
}
{
IColumn::Permutation permutation(0);
EXPECT_THROW(column_variant->permute(permutation, 10), Exception);
}
MutableColumns columns;
columns.push_back(column_variant->get_ptr());
assert_column_vector_permute(columns, 0, false);
assert_column_vector_permute(columns, 1, false);
assert_column_vector_permute(columns, column_variant->size(), false);
assert_column_vector_permute(columns, UINT64_MAX, false);
}
// not support
TEST_F(ColumnObjectTest, get_permutation) {
EXPECT_ANY_THROW(assert_column_permutations2(*column_variant, dt_variant));
}
TEST_F(ColumnObjectTest, structure_equals) {
auto cl = column_variant->clone_empty();
EXPECT_ANY_THROW(column_variant->structure_equals(*cl));
}
TEST_F(ColumnObjectTest, replicate) {
assert_replicate_with_field(column_variant->get_ptr());
}
// Compare Interface not implement: compare_at, compare_internal
TEST_F(ColumnObjectTest, compare_at) {
EXPECT_ANY_THROW(column_variant->compare_at(0, 0, *column_variant, -1));
std::vector<uint8> com_res(column_variant->size());
EXPECT_ANY_THROW(column_variant->compare_internal(0, *column_variant, 0, 0, com_res, nullptr));
}
TEST_F(ColumnObjectTest, clear) {
auto tmp_col = column_variant->clone();
EXPECT_EQ(tmp_col->size(), column_variant->size());
tmp_col->clear();
EXPECT_EQ(tmp_col->size(), 0);
}
TEST_F(ColumnObjectTest, convert_column_if_overflow) {
// convert_column_if_overflow may need impl in ColumnObject, like ColumnArray?
auto ret = column_variant->convert_column_if_overflow();
EXPECT_EQ(ret.get(), column_variant.get());
}
TEST_F(ColumnObjectTest, resize) {
auto test_func = [](const auto& source_column, size_t add_count) {
{
auto source_size = source_column->size();
auto tmp_col = source_column->clone();
auto default_col = source_column->clone_empty();
default_col->insert_default();
tmp_col->resize(source_size + add_count);
EXPECT_EQ(tmp_col->size(), source_size + add_count);
for (size_t i = 0; i != source_size; ++i) {
checkField(*tmp_col, *source_column, i, i);
}
for (size_t i = 0; i != add_count; ++i) {
checkField(*tmp_col, *default_col, source_size + i, 0);
}
}
{
// resize in self
auto ptr = source_column.get();
source_column->resize(add_count);
EXPECT_EQ(source_column.get(), ptr);
EXPECT_EQ(source_column->size(), add_count);
}
};
test_func(column_variant, 0);
test_func(column_variant, 10);
}
// ================= variant specific interface =================
// meta info related interface
TEST_F(ColumnObjectTest, get_least_common_type) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_least_common_type for root column
const auto& root = source_column->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
EXPECT_TRUE(root->data.get_least_common_type() != nullptr);
// Test get_least_common_type for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_TRUE(subcolumn->data.get_least_common_type() != nullptr);
}
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, get_dimensions) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_dimensions for root column
const auto& root = source_column->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
EXPECT_GE(root->data.get_dimensions(), 0);
// Test get_dimensions for subcolumns
for (auto& entry : source_column->get_subcolumns()) {
EXPECT_TRUE(entry != nullptr);
EXPECT_GE(entry->data.get_dimensions(), 0);
}
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, get_last_field) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_last_field for root column
const auto& root = source_column->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
Field last_field;
root->data.get_last_field();
// Test get_last_field for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
subcolumn->data.get_last_field();
}
};
test_func(column_variant);
}
// sub column op related interface
TEST_F(ColumnObjectTest, get_finalized_column) {
auto test_func = [](const auto& source_column) {
// do not clone and then get , will case heap-after-use-free cause of defined in COW as temporary Ptr
// auto source_column = assert_cast<ColumnObject*>(var_column->clone_resized(var_column->size()).get());
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_finalized_column for root column
auto root = source_column->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
source_column->finalize();
root = source_column->get_subcolumns().get_root();
const auto& finalized_col = root->data.get_finalized_column();
EXPECT_TRUE(source_column->is_finalized());
Field rf;
finalized_col.get(0, rf);
EXPECT_TRUE(strlen(rf.get_type_name()) > 0);
// Test get_finalized_column for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_TRUE(subcolumn->data.is_finalized());
const auto& subcolumn_finalized = subcolumn->data.get_finalized_column();
// Verify finalized column data
Field field;
subcolumn_finalized.get(0, field);
EXPECT_TRUE(strlen(field.get_type_name()) > 0);
// Verify column size
EXPECT_EQ(subcolumn_finalized.size(), src_size);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
EXPECT_NE(cloned_object, column_variant.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, get_finalized_column_ptr) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_finalized_column_ptr for root column
auto root = source_column->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
source_column->finalize();
// when finalized , the root will be changed
root = source_column->get_subcolumns().get_root();
const auto& finalized_col_ptr = root->data.get_finalized_column_ptr();
EXPECT_TRUE(finalized_col_ptr.get() != nullptr);
// Test get_finalized_column_ptr for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
const auto& subcolumn_finalized_ptr = subcolumn->data.get_finalized_column_ptr();
EXPECT_TRUE(subcolumn_finalized_ptr.get() != nullptr);
EXPECT_TRUE(subcolumn->data.is_finalized());
// Verify finalized column data
Field field;
subcolumn_finalized_ptr->get(0, field);
EXPECT_TRUE(strlen(field.get_type_name()) > 0);
// Verify column size
EXPECT_EQ(subcolumn_finalized_ptr->size(), src_size);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, remove_nullable) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test remove_nullable for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
subcolumn->data.finalize();
auto subcolumn_type_before = subcolumn->data.get_least_common_type();
subcolumn->data.remove_nullable();
auto subcolumn_type_after = subcolumn->data.get_least_common_type();
EXPECT_TRUE(remove_nullable(subcolumn_type_before)->equals(*subcolumn_type_after));
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, add_new_column_part) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test add_new_column_part for subcolumns
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
// Store original type before adding new part
auto original_type = subcolumn->data.get_least_common_type();
// The add_new_column_part interface must be added to the minimum common type of the data type vector in the current subcolumn,
// otherwise an error will be reported: [E3] Not implemeted
subcolumn->data.add_new_column_part(original_type);
// Verify the type is updated
auto updated_type = subcolumn->data.get_least_common_type();
EXPECT_TRUE(updated_type != nullptr);
// Verify column size
EXPECT_EQ(subcolumn->data.size(), src_size);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, get_subcolumn) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_subcolumn
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
// Verify subcolumn properties
EXPECT_TRUE(subcolumn->data.get_least_common_type() != nullptr);
EXPECT_GE(subcolumn->data.get_dimensions(), 0);
// Verify subcolumn data
Field field;
subcolumn->data.get(0, field);
EXPECT_TRUE(strlen(field.get_type_name()) > 0);
EXPECT_EQ(subcolumn->data.size(), src_size);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, ensure_root_node_type) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
EXPECT_TRUE(!obj->empty());
// Store original root type
auto root = obj->get_subcolumns().get_root();
auto original_root_type = root->data.get_least_common_type();
obj->finalize();
// Test ensure_root_node_type
auto new_type =
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
obj->ensure_root_node_type(new_type);
// Verify root type is updated
root = obj->get_subcolumns().get_root();
auto updated_root_type = root->data.get_least_common_type();
EXPECT_TRUE(updated_root_type->equals(*new_type));
};
TEST_F(ColumnObjectTest, create_root) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test case 1: Create root with string type
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
auto type = DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
auto column = type->create_column();
obj->create_root(type, std::move(column));
// Verify root is created with correct type
const auto& root = obj->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
EXPECT_TRUE(root->data.get_least_common_type()->equals(*type));
}
// Test case 2: Create root with int type
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
auto type = DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_INT,
0, 0);
auto column = type->create_column();
obj->create_root(type, std::move(column));
// Verify root is created with correct type
const auto& root = obj->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
EXPECT_TRUE(root->data.get_least_common_type()->equals(*type));
}
// Test case 3: Create root on existing column
{
auto col = source_column->clone();
auto obj = assert_cast<ColumnObject*>(col.get());
auto original_root = obj->get_subcolumns().get_root();
EXPECT_TRUE(original_root != nullptr);
// Create root with new type
auto type = DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
auto column = type->create_column();
EXPECT_ANY_THROW(obj->create_root(type, std::move(column)));
// Verify root is replaced with new type
const auto& new_root = obj->get_subcolumns().get_root();
EXPECT_TRUE(new_root != nullptr);
EXPECT_EQ(new_root, original_root);
}
// Test case 4: Create root and verify data operations
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
auto type = DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
auto column = type->create_column();
obj->create_root(type, std::move(column));
// Insert some data
Field field;
source_column->get(0, field);
obj->insert(field);
// Verify data is inserted correctly
Field inserted_field;
obj->get(0, inserted_field);
}
// Test case 5: Create root with nullable type
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
auto type = DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
auto nullable_type = make_nullable(type);
auto column = nullable_type->create_column();
obj->create_root(nullable_type, std::move(column));
// Verify root is created with nullable type
const auto& root = obj->get_subcolumns().get_root();
EXPECT_TRUE(root != nullptr);
EXPECT_TRUE(root->data.get_least_common_type()->equals(*nullable_type));
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, get_most_common_type) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_most_common_type
DataTypePtr most_common_type = source_column->get_most_common_type();
EXPECT_TRUE(most_common_type != nullptr);
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, is_null_root) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test is_null_root
bool is_null = source_column->is_null_root();
EXPECT_FALSE(is_null); // Since we have data, root should not be null
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, is_scalar_variant) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test is_scalar_variant
bool is_scalar = source_column->is_scalar_variant();
// The result depends on the actual data structure
EXPECT_FALSE(is_scalar);
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, is_exclusive) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test is_exclusive
bool is_exclusive = source_column->is_exclusive();
// The result depends on the actual data structure
EXPECT_TRUE(is_exclusive);
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, get_root_type) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test get_root_type
DataTypePtr root_type = source_column->get_root_type();
EXPECT_TRUE(root_type != nullptr);
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, has_subcolumn) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test has_subcolumn
for (const auto& subcolumn : source_column->get_subcolumns()) {
EXPECT_TRUE(subcolumn != nullptr);
bool has_subcolumn = source_column->has_subcolumn(subcolumn->path);
EXPECT_TRUE(has_subcolumn);
}
};
test_func(column_variant);
}
TEST_F(ColumnObjectTest, finalize) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test case 1: Test finalize with READ_MODE
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Insert data from source column
for (size_t i = 0; i < src_size; ++i) {
Field field;
source_column->get(i, field);
obj->insert(field);
}
// Verify initial state
EXPECT_FALSE(obj->is_finalized());
// Finalize in READ_MODE
Status st = obj->finalize(ColumnObject::FinalizeMode::READ_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(obj->is_finalized());
// Verify data integrity
for (size_t i = 0; i < src_size; ++i) {
Field original_field, finalized_field;
source_column->get(i, original_field);
obj->get(i, finalized_field);
EXPECT_EQ(original_field, finalized_field);
}
}
// Test case 2: Test finalize with WRITE_MODE
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Insert data from source column
for (size_t i = 0; i < src_size; ++i) {
Field field;
source_column->get(i, field);
obj->insert(field);
}
// Verify initial state
EXPECT_FALSE(obj->is_finalized());
// Finalize in WRITE_MODE
Status st = obj->finalize(ColumnObject::FinalizeMode::WRITE_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(obj->is_finalized());
// Verify data integrity
for (size_t i = 0; i < src_size; ++i) {
Field original_field, finalized_field;
source_column->get(i, original_field);
obj->get(i, finalized_field);
EXPECT_EQ(original_field, finalized_field);
}
}
// Test case 3: Test finalize without mode (default READ_MODE)
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Insert data from source column
for (size_t i = 0; i < src_size; ++i) {
Field field;
source_column->get(i, field);
obj->insert(field);
}
// Verify initial state
EXPECT_FALSE(obj->is_finalized());
// Finalize without mode
obj->finalize();
EXPECT_TRUE(obj->is_finalized());
// Verify data integrity
for (size_t i = 0; i < src_size; ++i) {
Field original_field, finalized_field;
source_column->get(i, original_field);
obj->get(i, finalized_field);
EXPECT_EQ(original_field, finalized_field);
}
}
// Test case 4: Test finalize on empty column
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Empty column always finalized
EXPECT_TRUE(obj->is_finalized());
// Finalize empty column
Status st = obj->finalize(ColumnObject::FinalizeMode::READ_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(obj->is_finalized());
EXPECT_EQ(obj->size(), 0);
}
// Test case 5: Test finalize preserves column structure
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Insert data from source column
for (size_t i = 0; i < src_size; ++i) {
Field field;
source_column->get(i, field);
obj->insert(field);
}
// Verify initial state
EXPECT_FALSE(obj->is_finalized());
// Store original structure
auto original_subcolumns = obj->get_subcolumns();
// Finalize
Status st = obj->finalize(ColumnObject::FinalizeMode::READ_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(obj->is_finalized());
// Verify structure is preserved
auto final_subcolumns = obj->get_subcolumns();
EXPECT_EQ(final_subcolumns.size(), original_subcolumns.size());
// Verify each subcolumn is finalized
for (const auto& subcolumn : final_subcolumns) {
EXPECT_TRUE(subcolumn->data.is_finalized());
}
}
// Test case 6: Test finalize with WRITE_MODE on sparse columns
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Insert data from source column
for (size_t i = 0; i < src_size; ++i) {
Field field;
source_column->get(i, field);
obj->insert(field);
}
// Verify initial state
EXPECT_FALSE(obj->is_finalized());
// Finalize in WRITE_MODE
Status st = obj->finalize(ColumnObject::FinalizeMode::WRITE_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(obj->is_finalized());
// Verify sparse columns are handled
auto sparse_column = obj->get_sparse_column().get();
EXPECT_TRUE(sparse_column != nullptr);
// Verify data integrity
for (size_t i = 0; i < src_size; ++i) {
Field original_field, finalized_field;
source_column->get(i, original_field);
obj->get(i, finalized_field);
EXPECT_EQ(original_field, finalized_field);
}
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, clone_finalized) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Store original data for comparison
auto original_subcolumns = source_column->get_subcolumns();
// Test clone_finalized
auto cloned = source_column->clone_finalized();
EXPECT_TRUE(cloned.get() != nullptr);
EXPECT_EQ(cloned->size(), src_size);
// Verify cloned column has same subcolumns
auto cloned_subcolumns = assert_cast<ColumnObject*>(cloned.get())->get_subcolumns();
EXPECT_EQ(cloned_subcolumns.size(), original_subcolumns.size());
// Verify data integrity
for (size_t i = 0; i < src_size; ++i) {
Field original_field, cloned_field;
source_column->get(i, original_field);
cloned->get(i, cloned_field);
EXPECT_EQ(original_field, cloned_field);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, sanitize) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Store original data for comparison
auto original_subcolumns = source_column->get_subcolumns();
// Test sanitize
Status status = source_column->sanitize();
EXPECT_TRUE(status.ok());
// Verify data integrity after sanitization
auto subcolumns_after = source_column->get_subcolumns();
EXPECT_EQ(subcolumns_after.size(), original_subcolumns.size());
// Verify all subcolumns are valid
for (const auto& subcolumn : subcolumns_after) {
EXPECT_TRUE(subcolumn != nullptr);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, debug_string) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test debug_string
std::string debug = source_column->debug_string();
EXPECT_FALSE(debug.empty());
};
test_func(column_variant);
}
// used in function_element_at for variant
TEST_F(ColumnObjectTest, find_path_lower_bound_in_sparse_data) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
auto* mutable_ptr = assert_cast<ColumnObject*>(source_column.get());
// auto [sparse_data_paths, sparse_data_values] = mutable_ptr->get_sparse_data_paths_and_values();
// forloop
PathInData pat("object.array");
StringRef prefix_ref(pat.get_path());
std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
const auto& sparse_data_map =
assert_cast<const ColumnMap&>(*mutable_ptr->get_sparse_column());
const auto& src_sparse_data_offsets = sparse_data_map.get_offsets();
const auto& src_sparse_data_paths =
assert_cast<const ColumnString&>(sparse_data_map.get_keys());
for (size_t i = 0; i != src_sparse_data_offsets.size(); ++i) {
size_t start = src_sparse_data_offsets[ssize_t(i) - 1];
size_t end = src_sparse_data_offsets[ssize_t(i)];
size_t lower_bound_index =
vectorized::ColumnObject::find_path_lower_bound_in_sparse_data(
prefix_ref, src_sparse_data_paths, start, end);
for (; lower_bound_index != end; ++lower_bound_index) {
auto path_ref = src_sparse_data_paths.get_data_at(lower_bound_index);
std::string_view path(path_ref.data, path_ref.size);
std::cout << "path : " << path << std::endl;
}
}
};
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
EXPECT_TRUE(!obj->empty());
std::cout << "column variant size: " << obj->size() << std::endl;
test_func(obj);
}
// used in SparseColumnExtractIterator::_fill_path_column
TEST_F(ColumnObjectTest, fill_path_column_from_sparse_data) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
EXPECT_TRUE(!obj->empty());
auto sparse_col = obj->get_sparse_column();
auto cloned_sparse = sparse_col->clone_empty();
auto& offsets = obj->serialized_sparse_column_offsets();
for (size_t i = 0; i != offsets.size(); ++i) {
auto start = offsets[i - 1];
auto end = offsets[i];
vectorized::ColumnObject::fill_path_column_from_sparse_data(
*obj->get_subcolumn({}) /*root*/, nullptr, StringRef {"array"},
cloned_sparse->get_ptr(), start, end);
}
EXPECT_NE(cloned_sparse->size(), sparse_col->size());
vectorized::ColumnObject::fill_path_column_from_sparse_data(
*obj->get_subcolumn({}) /*root*/, nullptr, StringRef {"array"}, sparse_col->get_ptr(),
0, sparse_col->size());
EXPECT_ANY_THROW(obj->check_consistency());
}
TEST_F(ColumnObjectTest, not_finalized) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
const auto& json_file_arr = test_data_dir_json + "json_variant/array_object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_arr);
EXPECT_TRUE(obj->size() == 200);
EXPECT_FALSE(obj->is_finalized());
// test get_finalized_column_ptr/ get_finalized_column for subColumn
auto subcolumns = obj->get_subcolumns();
for (const auto& subcolumn : subcolumns) {
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_FALSE(subcolumn->data.is_finalized());
EXPECT_ANY_THROW(subcolumn->data.get_finalized_column_ptr());
EXPECT_ANY_THROW(subcolumn->data.get_finalized_column());
}
}
doris::vectorized::Field get_field_v2(std::string_view type, size_t array_element_cnt = 0) {
static std::unordered_map<std::string_view, doris::vectorized::Field> field_map;
if (field_map.empty()) {
doris::vectorized::Field int_field = 20;
doris::vectorized::Field str_field(String("str", 3));
doris::vectorized::Field arr_int_field = Array();
doris::vectorized::Field arr_str_field = Array();
auto& array1 = arr_int_field.get<Array>();
auto& array2 = arr_str_field.get<Array>();
for (size_t i = 0; i < array_element_cnt; ++i) {
array1.emplace_back(int_field);
array2.emplace_back(str_field);
}
field_map["int"] = int_field;
field_map["string"] = str_field;
field_map["ai"] = arr_int_field;
field_map["as"] = arr_str_field;
>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features)
}
return field_map[type];
}
<<<<<<< HEAD
// std::string convert_jsonb_field_to_string(doris::vectorized::Field jsonb) {
// const auto& val = jsonb.get<JsonbField>();
// const JsonbValue* json_val = JsonbDocument::createValue(val.get_value(), val.get_size());
// rapidjson::Document doc;
// doc.SetObject();
// rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
// rapidjson::Value json_value;
// convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
// doc.AddMember("value", json_value, allocator);
// rapidjson::StringBuffer buffer;
// rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
// doc.Accept(writer);
// return std::string(buffer.GetString());
// }
std::string convert_field_to_string(doris::vectorized::Field array) {
rapidjson::Document doc;
doc.SetObject();
rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
rapidjson::Value json_value;
// DataTypeSerDe::convert_field_to_rapidjson(array, json_value, allocator);
doc.AddMember("value", json_value, allocator);
rapidjson::StringBuffer buffer;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
return std::string(buffer.GetString());
}
TEST(ColumnVariantTest, is_null_at) {
auto v = VariantUtil::construct_dst_varint_column();
PathInData path("v.f");
auto sub = v->get_subcolumn(path);
std::cout << sub->get_least_common_typeBase()->get_name() << std::endl;
EXPECT_TRUE(sub->is_null_at(0));
auto v1 = VariantUtil::construct_advanced_varint_column();
PathInData path1("v.b.d");
auto sub1 = v1->get_subcolumn(path1);
EXPECT_TRUE(sub1->is_null_at(2));
EXPECT_ANY_THROW(sub1->is_null_at(16));
vectorized::Field f;
EXPECT_ANY_THROW(sub1->get(16, f));
std::cout << sub1->num_rows << std::endl;
EXPECT_NO_THROW(sub1->resize(sub1->num_rows));
auto [sparse_column_keys, sparse_column_values] = v1->get_sparse_data_paths_and_values();
std::string_view pa("v.a");
EXPECT_NO_THROW(
sub1->serialize_to_sparse_column(sparse_column_keys, pa, sparse_column_values, 2));
EXPECT_ANY_THROW(
sub1->serialize_to_sparse_column(sparse_column_keys, pa, sparse_column_values, 16));
}
TEST(ColumnVariantTest, advanced_finalize) {
auto variant = VariantUtil::construct_advanced_varint_column();
// 4. finalize
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(variant->size(), 15);
// check finalized subcolumn
// 5 subcolumn + 1 root
EXPECT_EQ(variant->subcolumns.size(), 6);
for (const auto& column : variant->subcolumns) {
if (column->data.is_root) {
continue;
}
EXPECT_EQ(column->data.data.size(), 1);
}
// check sparse column
const auto& offsets = variant->serialized_sparse_column_offsets();
for (int row = 0; row < 5; ++row) {
EXPECT_EQ(offsets[row] - offsets[row - 1], 0);
}
for (int row = 5; row < 15; ++row) {
EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
}
{
// Test fill_path_column_from_sparse_data
auto map = std::make_unique<NullMap>(15, 0);
vectorized::ColumnObject::fill_path_column_from_sparse_data(
*variant->get_subcolumn({}) /*root*/, map.get(), StringRef {"array"},
variant->get_sparse_column(), 0, 5);
vectorized::ColumnObject::fill_path_column_from_sparse_data(
*variant->get_subcolumn({}) /*root*/, map.get(), StringRef {"array"},
variant->get_sparse_column(), 5, 15);
}
}
TEST(ColumnVariantTest, advanced_deserialize) {
auto variant = VariantUtil::construct_advanced_varint_column();
// 4. finalize
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(variant->size(), 15);
const auto& [path, value] = variant->get_sparse_data_paths_and_values();
const auto& offsets = variant->serialized_sparse_column_offsets();
for (size_t row = 5; row < 10; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
auto pair = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data, StringRef("v.b.d", 5));
EXPECT_EQ(convert_field_to_string(pair.first),
convert_field_to_string(get_jsonb_field("array_int")));
auto data2 = path->get_data_at(start);
auto pair2 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c.d", 5));
EXPECT_EQ(convert_field_to_string(pair2.first),
convert_field_to_string(VariantUtil::get_field("string")));
auto data3 = path->get_data_at(start);
auto pair3 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(convert_field_to_string(pair3.first),
convert_field_to_string(get_jsonb_field("array_int")));
EXPECT_EQ(start, end);
}
for (size_t row = 10; row < 15; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
auto pair = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data, StringRef("v.b.d", 5));
EXPECT_EQ(convert_field_to_string(pair.first),
convert_field_to_string(get_jsonb_field("array_str")));
auto data2 = path->get_data_at(start);
auto pair2 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c.d", 5));
EXPECT_EQ(convert_field_to_string(pair2.first),
convert_field_to_string(get_jsonb_field("int")));
auto data3 = path->get_data_at(start);
auto pair3 = variant->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(convert_field_to_string(pair3.first),
convert_field_to_string(get_jsonb_field("array_str")));
EXPECT_EQ(start, end);
}
}
TEST(ColumnVariantTest, advanced_insert_range_from) {
auto src = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(src->size(), 15);
auto dst = VariantUtil::construct_dst_varint_column();
// subcolumn->subcolumn v.b v.f v.e
// subcolumn->sparse_column v.a v.c
// sparse_column->subcolumn v.b.d v.c.d
// sparse_column->sparse_column v.d.d
dst->insert_range_from(*src, 0, src->size());
dst->finalize();
EXPECT_EQ(dst->size(), 15);
EXPECT_EQ(dst->subcolumns.size(), 6);
ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
std::sort(
dst_subcolumns.begin(), dst_subcolumns.end(),
[](const auto& lhsItem, const auto& rhsItem) { return lhsItem->path < rhsItem->path; });
// subcolumns
for (const auto& column : dst_subcolumns) {
if (column->data.is_root) {
continue;
}
EXPECT_EQ(column->data.data.size(), 1);
EXPECT_EQ(column->data.data[0]->size(), 15);
if (column->path.get_path().size() == 3) {
EXPECT_EQ(column->data.get_non_null_value_size(), 15);
if (column->path.get_path() == "v.b") {
EXPECT_EQ(assert_cast<const DataTypeNullable*>(column->data.data_types[0].get())
->get_nested_type()
->get_type_id(),
TypeIndex::JSONB);
}
} else if (column->path.get_path().size() == 5) {
EXPECT_EQ(column->data.get_non_null_value_size(), 10);
EXPECT_EQ(assert_cast<const DataTypeNullable*>(column->data.data_types[0].get())
->get_nested_type()
->get_type_id(),
TypeIndex::JSONB);
for (size_t row = 0; row < 5; ++row) {
EXPECT_TRUE(column->data.data[0]->is_null_at(row));
}
}
}
// sparse columns
const auto& [path, value] = dst->get_sparse_data_paths_and_values();
const auto& offsets = dst->serialized_sparse_column_offsets();
// v.a v.c
for (int row = 0; row < 5; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.a", 3));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int64>(), 20);
auto data2 = path->get_data_at(start);
EXPECT_EQ(data2, StringRef("v.c", 3));
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(convert_field_to_string(pair2.first),
convert_field_to_string(VariantUtil::get_field("array_int")));
EXPECT_EQ(start, end);
}
for (int row = 5; row < 10; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data, StringRef("v.a", 3));
EXPECT_EQ(pair.first.get<Int64>(), 20);
auto data2 = path->get_data_at(start);
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c", 3));
EXPECT_EQ(convert_field_to_string(pair2.first),
convert_field_to_string(VariantUtil::get_field("array_int")));
auto data3 = path->get_data_at(start);
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(convert_field_to_string(pair3.first),
convert_field_to_string(get_jsonb_field("array_int")));
EXPECT_EQ(start, end);
}
for (int row = 10; row < 15; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data, StringRef("v.a", 3));
EXPECT_EQ(pair.first.get<Int64>(), 20);
auto data2 = path->get_data_at(start);
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c", 3));
EXPECT_EQ(convert_field_to_string(pair2.first),
convert_field_to_string(VariantUtil::get_field("array_int")));
auto data3 = path->get_data_at(start);
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(convert_field_to_string(pair3.first),
convert_field_to_string(get_jsonb_field("array_str")));
EXPECT_EQ(start, end);
}
}
TEST(ColumnVariantTest, empty_inset_range_from) {
auto src = VariantUtil::construct_varint_column_only_subcolumns();
EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(src->size(), 6);
// dst is an empty column
auto dst = ColumnObject::create(5);
// subcolumn->subcolumn v.a v.b v.c v.f v.e
dst->insert_range_from(*src, 0, 6);
EXPECT_EQ(dst->size(), 6);
// 5 subcolumn
EXPECT_EQ(dst->subcolumns.size(), 6);
for (const auto& column : dst->subcolumns) {
if (column->data.is_root) {
EXPECT_EQ(column->data.data.size(), 1);
EXPECT_EQ(column->data.data[0]->size(), 6);
EXPECT_EQ(column->data.get_non_null_value_size(), 1);
continue;
}
EXPECT_EQ(column->data.data.size(), 1);
EXPECT_EQ(column->data.data[0]->size(), 6);
EXPECT_EQ(column->data.get_non_null_value_size(), 5);
}
// empty sparse column
const auto& [path, value] = dst->get_sparse_data_paths_and_values();
const auto& offsets = dst->serialized_sparse_column_offsets();
EXPECT_EQ(offsets[4], offsets[-1]);
EXPECT_EQ(path->size(), value->size());
auto src_contains_seven_subcolumns = VariantUtil::construct_varint_column_more_subcolumns();
EXPECT_TRUE(
src_contains_seven_subcolumns->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
EXPECT_TRUE(src_contains_seven_subcolumns->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(src_contains_seven_subcolumns->size(), 5);
// subcolumn->subcolumn v.a v.b v.c v.f v.e
// add sprase columns v.s v.x v.y v.z
dst->insert_range_from(*src_contains_seven_subcolumns, 0, 5);
EXPECT_EQ(dst->size(), 11);
// 5 subcolumn
EXPECT_EQ(dst->subcolumns.size(), 6);
for (int row = 0; row < 6; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
EXPECT_EQ(start, end);
}
// v.s v.x v.y v.z
for (int row = 6; row < 11; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data0 = path->get_data_at(start);
EXPECT_EQ(data0, StringRef("v.s", 3));
auto pair0 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(convert_field_to_string(pair0.first),
convert_field_to_string(VariantUtil::get_field("string")));
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.x", 3));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
auto data2 = path->get_data_at(start);
EXPECT_EQ(data2, StringRef("v.y", 3));
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
auto data3 = path->get_data_at(start);
EXPECT_EQ(data3, StringRef("v.z", 3));
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair3.first.get<Int64>(),
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
EXPECT_EQ(start, end);
}
auto src_contains_subcoumns_and_sparse_columns = VariantUtil::construct_basic_varint_column();
EXPECT_TRUE(src_contains_subcoumns_and_sparse_columns
->finalize(ColumnObject::FinalizeMode::WRITE_MODE)
.ok());
EXPECT_TRUE(
src_contains_subcoumns_and_sparse_columns->pick_subcolumns_to_sparse_column({}).ok());
EXPECT_EQ(src_contains_subcoumns_and_sparse_columns->size(), 10);
// subcolumn->subcolumn v.a v.b v.c v.f v.e
// add sprase columns v.s v.x v.y v.b.d v.c.d v.d.d
dst->insert_range_from(*src_contains_subcoumns_and_sparse_columns, 0, 10);
EXPECT_EQ(dst->size(), 21);
// 5 subcolumn
EXPECT_EQ(dst->subcolumns.size(), 6);
for (int row = 0; row < 6; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
EXPECT_EQ(start, end);
}
// v.x v.y
for (int row = 6; row < 11; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data0 = path->get_data_at(start);
EXPECT_EQ(data0, StringRef("v.s", 3));
auto pair0 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(convert_field_to_string(pair0.first),
convert_field_to_string(VariantUtil::get_field("string")));
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.x", 3));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
auto data2 = path->get_data_at(start);
EXPECT_EQ(data2, StringRef("v.y", 3));
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
auto data3 = path->get_data_at(start);
EXPECT_EQ(data3, StringRef("v.z", 3));
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair3.first.get<Int64>(),
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
EXPECT_EQ(start, end);
}
for (int row = 11; row < 16; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
EXPECT_EQ(start, end);
}
//v.b.d v.c.d v.d.d
for (int row = 16; row < 21; ++row) {
size_t start = offsets[row - 1];
size_t end = offsets[row];
auto data = path->get_data_at(start);
EXPECT_EQ(data, StringRef("v.b.d", 5));
auto pair = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(pair.first.get<Int64>(), 30);
auto data2 = path->get_data_at(start);
auto pair2 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data2, StringRef("v.c.d", 5));
EXPECT_EQ(pair2.first.get<Int64>(), 30);
auto data3 = path->get_data_at(start);
auto pair3 = dst->deserialize_from_sparse_column(value, start++);
EXPECT_EQ(data3, StringRef("v.d.d", 5));
EXPECT_EQ(pair3.first.get<String>(), "50");
EXPECT_EQ(start, end);
}
}
TEST(ColumnVariantTest, insert_null_to_decimal_column) {
ColumnObject::Subcolumn subcolumn(0, true /* is_nullable */, false /* is_root */);
Field null_field;
subcolumn.insert(null_field);
subcolumn.finalize();
EXPECT_EQ(subcolumn.data.size(), 1);
EXPECT_EQ(subcolumn.data[0]->size(), 1);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), TypeIndex::Nothing);
Field decimal_field(DecimalField<Decimal128V2>(10, 2));
subcolumn.insert(decimal_field);
subcolumn.finalize();
EXPECT_EQ(subcolumn.get_non_null_value_size(), 1);
EXPECT_EQ(subcolumn.data.size(), 1);
EXPECT_EQ(subcolumn.data[0]->size(), 2);
EXPECT_EQ(subcolumn.data[0]->is_null_at(0), true);
EXPECT_EQ(subcolumn.data[0]->is_null_at(1), false);
EXPECT_EQ(subcolumn.data_types.size(), 1);
EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), TypeIndex::Decimal128V2);
>>>>>>> 954311c1aad ([feature](semi-structure) support variant and index with many features)
}
TEST_F(ColumnVariantTest, test_insert_indices_from) {
// Test case 1: Insert from scalar variant source to empty destination
{
// Create source column with scalar values
auto src_column = ColumnVariant::create(true);
Field field_int = Field::create_field<TYPE_INT>(123);
src_column->try_insert(field_int);
Field field_int2 = Field::create_field<TYPE_INT>(456);
src_column->try_insert(field_int2);
src_column->finalize();
EXPECT_TRUE(src_column->is_scalar_variant());
EXPECT_TRUE(src_column->is_finalized());
EXPECT_EQ(src_column->size(), 2);
// Create empty destination column
auto dst_column = ColumnVariant::create(true);
EXPECT_EQ(dst_column->size(), 0);
// Create indices
std::vector<uint32_t> indices = {0, 1};
// Insert using indices
dst_column->insert_indices_from(*src_column, indices.data(),
indices.data() + indices.size());
// Verify results
EXPECT_EQ(dst_column->size(), 2);
EXPECT_TRUE(dst_column->is_scalar_variant());
EXPECT_TRUE(dst_column->is_finalized());
EXPECT_EQ(dst_column->get_root_type()->get_name(), src_column->get_root_type()->get_name());
Field result1;
dst_column->get(0, result1);
EXPECT_EQ(result1.get<VariantMap>().at({}).get<Int64>(), 123);
Field result2;
dst_column->get(1, result2);
EXPECT_EQ(result2.get<VariantMap>().at({}).get<Int64>(), 456);
}
// Test case 2: Insert from scalar variant source to non-empty destination of same type
{
// Create source column with scalar values
auto src_column = ColumnVariant::create(true);
Field field_int = Field::create_field<TYPE_INT>(123);
src_column->try_insert(field_int);
Field field_int2 = Field::create_field<TYPE_INT>(456);
src_column->try_insert(field_int2);
src_column->finalize();
EXPECT_TRUE(src_column->is_scalar_variant());
// Create destination column with same type
auto dst_column = ColumnVariant::create(true);
Field field_int3 = Field::create_field<TYPE_INT>(789);
dst_column->try_insert(field_int3);
dst_column->finalize();
EXPECT_TRUE(dst_column->is_scalar_variant());
EXPECT_EQ(dst_column->size(), 1);
// Create indices for selecting specific elements
std::vector<uint32_t> indices = {1, 0};
// Insert using indices (reversed order)
dst_column->insert_indices_from(*src_column, indices.data(),
indices.data() + indices.size());
// Verify results
EXPECT_EQ(dst_column->size(), 3);
Field result1, result2, result3;
dst_column->get(0, result1);
dst_column->get(1, result2);
dst_column->get(2, result3);
EXPECT_EQ(result1.get<VariantMap>().at({}).get<Int64>(), 789);
EXPECT_EQ(result2.get<VariantMap>().at({}).get<Int64>(), 456);
EXPECT_EQ(result3.get<VariantMap>().at({}).get<Int64>(), 123);
}
// Test case 3: Insert from non-scalar or different type source (fallback to try_insert)
{
// Create source column with object values (non-scalar)
auto src_column = ColumnVariant::create(true);
// Create a map with {"a": 123}
Field field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
auto& map1 = field_map.get<VariantMap&>();
map1[PathInData("a")] = Field::create_field<TYPE_INT>(123);
src_column->try_insert(field_map);
// Create another map with {"b": "hello"}
field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
auto& map2 = field_map.get<VariantMap&>();
map2[PathInData("b")] = Field::create_field<TYPE_STRING>(String("hello"));
src_column->try_insert(field_map);
src_column->finalize();
EXPECT_FALSE(src_column->is_scalar_variant());
// Create destination column (empty)
auto dst_column = ColumnVariant::create(true);
// Create indices
std::vector<uint32_t> indices = {1, 0};
// Insert using indices
dst_column->insert_indices_from(*src_column, indices.data(),
indices.data() + indices.size());
// Verify results
EXPECT_EQ(dst_column->size(), 2);
Field result1, result2;
dst_column->get(0, result1);
dst_column->get(1, result2);
EXPECT_TRUE(result1.get_type() == PrimitiveType::TYPE_VARIANT);
EXPECT_TRUE(result2.get_type() == PrimitiveType::TYPE_VARIANT);
const auto& result1_map = result1.get<const VariantMap&>();
const auto& result2_map = result2.get<const VariantMap&>();
EXPECT_EQ(result1_map.at(PathInData("b")).get<const String&>(), "hello");
EXPECT_EQ(result2_map.at(PathInData("a")).get<Int64>(), 123);
}
}
TEST_F(ColumnVariantTest, test_nested_array_of_jsonb_get) {
// Test case: Create a ColumnVariant with subcolumn type Array<JSONB>
// Create a ColumnVariant with subcolumns
auto variant_column = ColumnVariant::create(true);
// Add subcolumn with path "nested.array"
variant_column->add_sub_column(PathInData("nested.array"), 0);
// Get the subcolumn and manually set its type to Array<JSONB>
auto* subcolumn = variant_column->get_subcolumn(PathInData("nested.array"));
ASSERT_NE(subcolumn, nullptr);
// Create test data: Array of strings
Field array_of_strings = Field::create_field<TYPE_ARRAY>(Array());
// Add string elements to the array
std::string test_data1 = R"("a")";
std::string test_data2 = R"(b)";
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data1));
array_of_strings.get<Array&>().push_back(Field::create_field<TYPE_STRING>(test_data2));
// Insert the array field into the subcolumn
subcolumn->insert(array_of_strings);
// Test 1: the column and test get method
{
EXPECT_TRUE(variant_column->is_finalized());
// check the subcolumn get method
Field result;
EXPECT_NO_THROW(subcolumn->get(0, result));
// Verify the result is still an array
EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
const auto& result_array = result.get<const Array&>();
EXPECT_EQ(result_array.size(), 2);
// Check that all elements are JSONB fields
for (const auto& item : result_array) {
EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_STRING);
}
// Verify string content is preserved
const auto& string1 = result_array[0].get<const String&>();
const auto& string2 = result_array[1].get<const String&>();
EXPECT_EQ(string1, R"("a")"); // "\"a\""
EXPECT_EQ(string2, R"(b)"); // "b"
}
// Test 2: Test with a row of different type of array to test the subcolumn get method
{
// Add another row with different int array
Field int_array = Field::create_field<TYPE_ARRAY>(Array());
int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(1));
int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(2));
int_array.get<Array&>().push_back(Field::create_field<TYPE_INT>(3));
// and we should add more data to the subcolumn column
subcolumn->insert(int_array);
EXPECT_FALSE(variant_column->is_finalized());
// check the subcolumn get method
Field result;
EXPECT_NO_THROW(subcolumn->get(1, result));
EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
const auto& result_array = result.get<const Array&>();
EXPECT_EQ(result_array.size(), 3);
EXPECT_EQ(result_array[0].get_type(), PrimitiveType::TYPE_JSONB);
EXPECT_EQ(result_array[1].get_type(), PrimitiveType::TYPE_JSONB);
EXPECT_EQ(result_array[2].get_type(), PrimitiveType::TYPE_JSONB);
// check the first row Field is a string
Field result_string;
EXPECT_NO_THROW(subcolumn->get(0, result_string));
EXPECT_EQ(result_string.get_type(), PrimitiveType::TYPE_ARRAY);
const auto& result_string_array = result_string.get<const Array&>();
EXPECT_EQ(result_string_array.size(), 2);
EXPECT_EQ(result_string_array[0].get_type(), PrimitiveType::TYPE_JSONB);
EXPECT_EQ(result_string_array[1].get_type(), PrimitiveType::TYPE_JSONB);
// Finalize -> we should get the least common type of the subcolumn
variant_column->finalize();
EXPECT_TRUE(variant_column->is_finalized());
// we should get another subcolumn from the variant column
auto* subcolumn_finalized = variant_column->get_subcolumn(PathInData("nested.array"));
ASSERT_NE(subcolumn_finalized, nullptr);
// check the subcolumn_finalized get method
Field result1, result2;
EXPECT_NO_THROW(subcolumn_finalized->get(0, result1));
EXPECT_NO_THROW(subcolumn_finalized->get(1, result2));
// Verify both results are arrays
EXPECT_EQ(result1.get_type(), PrimitiveType::TYPE_ARRAY);
EXPECT_EQ(result2.get_type(), PrimitiveType::TYPE_ARRAY);
const auto& array1 = result1.get<const Array&>();
const auto& array2 = result2.get<const Array&>();
EXPECT_EQ(array1.size(), 2);
EXPECT_EQ(array2.size(), 3);
// Verify all elements are JSONB
for (const auto& item : array1) {
EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
}
for (const auto& item : array2) {
EXPECT_EQ(item.get_type(), PrimitiveType::TYPE_JSONB);
}
}
// Test 4: Test with empty array
{
auto* subcolumn = variant_column->get_subcolumn(PathInData("nested.array"));
ASSERT_NE(subcolumn, nullptr);
Field empty_array_field = Field::create_field<TYPE_ARRAY>(Array());
subcolumn->insert(empty_array_field);
EXPECT_TRUE(variant_column->is_finalized());
// check the subcolumn get method
Field result;
EXPECT_NO_THROW(subcolumn->get(2, result));
EXPECT_EQ(result.get_type(), PrimitiveType::TYPE_ARRAY);
const auto& result_array = result.get<const Array&>();
EXPECT_EQ(result_array.size(), 0);
}
}
} // namespace doris::vectorized
=======
TEST_F(ColumnObjectTest, array_field_operations) {
auto test_func = [](const auto& source_column) {
auto src_size = source_column->size();
EXPECT_TRUE(src_size > 0);
// Test case 1: Test create_empty_array_field
{
EXPECT_ANY_THROW(create_empty_array_field(0));
// Test with different dimensions
for (size_t dim = 1; dim <= 3; ++dim) {
Field array_field = create_empty_array_field(dim);
EXPECT_TRUE(array_field.get_type() == Field::Types::Array);
const Array& array = array_field.get<Array>();
if (dim > 1) {
EXPECT_FALSE(array.empty());
} else {
EXPECT_TRUE(array.empty());
}
}
}
// Test case 2: Test create_array
{
// Test with different types
std::vector<TypeIndex> types = {TypeIndex::Int8, TypeIndex::String, TypeIndex::Float64};
for (const auto& type : types) {
for (size_t dim = 1; dim <= 3; ++dim) {
DataTypePtr array_type = create_array(type, dim);
EXPECT_TRUE(array_type != nullptr);
}
}
// Test create_array_of_type with TypeIndex::Nothing
auto dt_ptr = create_array_of_type(TypeIndex::Nothing, 0, false);
EXPECT_TRUE(dt_ptr->get_type_id() == TypeIndex::Nothing);
}
// Test case 3: Test recreate_column_with_default_values
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Create a subcolumn with array type
PathInData path("array_field");
auto array_type = create_array(TypeIndex::Int8, 2);
auto column = array_type->create_column();
auto column_a = array_type->create_column();
column_a->insert(Array(1));
obj->add_sub_column(path, std::move(column));
// Get the subcolumn
const auto* subcolumn = obj->get_subcolumn(path);
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_ANY_THROW(subcolumn->get_finalized_column_ptr());
// Recreate column with default values
auto new_column = recreate_column_with_default_values(
column_a->convert_to_full_column_if_const(), TypeIndex::Int8, 2);
EXPECT_TRUE(new_column->get_name().find("Array") != std::string::npos);
EXPECT_EQ(new_column->size(), subcolumn->size());
}
// Test case 4: Test clone_with_default_values
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Create a subcolumn with array type
PathInData path("array_field");
auto array_type = create_array(TypeIndex::Int8, 1);
auto column = array_type->create_column();
Array array1 = {1, 2, 3};
column->insert(array1);
obj->add_sub_column(path, std::move(column), array_type);
// Get the subcolumn
const auto* subcolumn = obj->get_subcolumn(path);
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_TRUE(subcolumn->size() > 0);
std::cout << "subcolumn size: " << subcolumn->size() << std::endl;
Field f = subcolumn->get_last_field();
EXPECT_TRUE(f.get_type() == Field::Types::Array);
// Create field info
FieldInfo info;
info.scalar_type_id = TypeIndex::Int8;
info.num_dimensions = 1;
info.have_nulls = false;
info.need_convert = false;
// Clone with default values
auto cloned = subcolumn->clone_with_default_values(info);
std::cout << "cloned size: " << cloned.size() << std::endl;
EXPECT_TRUE(cloned.size() == subcolumn->size());
}
// Test case 5: Test Subcolumn::resize
{
auto col = source_column->clone_empty();
auto obj = assert_cast<ColumnObject*>(col.get());
// Create a subcolumn
PathInData path("test_field");
obj->add_sub_column(path, src_size);
// Get the subcolumn
auto* subcolumn = obj->get_subcolumn(path);
EXPECT_TRUE(subcolumn != nullptr);
// Test resize to larger size
size_t new_size = src_size + 10;
subcolumn->resize(new_size);
EXPECT_EQ(subcolumn->size(), new_size);
// Test resize to smaller size
new_size = src_size / 2;
subcolumn->resize(new_size);
EXPECT_EQ(subcolumn->size(), new_size);
// Test resize to zero
subcolumn->resize(0);
EXPECT_EQ(subcolumn->size(), 0);
}
{
// Test wrapp_array_nullable
// 1. create an empty variant column
auto variant = ColumnObject::create(2);
std::vector<std::pair<std::string, doris::vectorized::Field>> data;
// 2. subcolumn path
data.emplace_back("v.ai", get_field_v2("ai", 1));
data.emplace_back("v.as", get_field_v2("as", 1));
for (int i = 0; i < 2; ++i) {
auto field = VariantUtil::construct_variant_map(data);
variant->try_insert(field);
}
EXPECT_FALSE(variant->is_finalized());
Status st = variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(variant->is_finalized());
std::cout << "sub: " << variant->get_subcolumns().size() << std::endl;
for (auto& entry : variant->get_subcolumns()) {
std::cout << "entry path: " << entry->path.get_path() << std::endl;
std::cout << "entry type: " << entry->data.get_least_common_typeBase()->get_name()
<< std::endl;
std::cout << "entry dimension " << entry->data.get_dimensions() << std::endl;
}
// then clear
variant->clear_column_data();
EXPECT_TRUE(variant->size() == 0);
}
};
auto temp = column_variant->clone();
auto cloned_object = assert_cast<ColumnObject*>(temp.get());
test_func(std::move(cloned_object));
}
TEST_F(ColumnObjectTest, assert_exception_happen) {
// Test case 1: Test assert_exception_happen
{
// 1. create an empty variant column
vectorized::ColumnObject::Subcolumns dynamic_subcolumns;
dynamic_subcolumns.create_root(vectorized::ColumnObject::Subcolumn(0, true, true /*root*/));
dynamic_subcolumns.add(vectorized::PathInData("v.f"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.e"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.b"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.b.d"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(vectorized::PathInData("v.c.d"),
vectorized::ColumnObject::Subcolumn {0, true});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() << std::endl;
EXPECT_ANY_THROW(ColumnObject::create(2, std::move(dynamic_subcolumns)));
}
{
// 1. create an empty variant column
auto variant = ColumnObject::create(5);
std::vector<std::pair<std::string, doris::vectorized::Field>> data;
// 2. subcolumn path
data.emplace_back("v.a", get_field_v2("int"));
data.emplace_back("v.b", get_field_v2("string"));
data.emplace_back("v.c", get_field_v2("ai", 2));
data.emplace_back("v.f", get_field_v2("as", 2));
data.emplace_back("v.e", get_field_v2("string"));
for (int i = 0; i < 5; ++i) {
auto field = VariantUtil::construct_variant_map(data);
variant->try_insert(field);
}
// 3. sparse column path
data.emplace_back("v.d.d", get_field_v2("ai", 2));
data.emplace_back("v.c.d", get_field_v2("string"));
data.emplace_back("v.b.d", get_field_v2("ai", 2));
for (int i = 0; i < 5; ++i) {
auto field = VariantUtil::construct_variant_map(data);
variant->try_insert(field);
}
data.clear();
data.emplace_back("v.a", get_field_v2("int"));
data.emplace_back("v.b", get_field_v2("int"));
data.emplace_back("v.c", get_field_v2("ai", 2));
data.emplace_back("v.f", get_field_v2("as", 2));
data.emplace_back("v.e", get_field_v2("string"));
data.emplace_back("v.d.d", get_field_v2("as", 2));
data.emplace_back("v.c.d", get_field_v2("int"));
data.emplace_back("v.b.d", get_field_v2("as", 2));
for (int i = 0; i < 5; ++i) {
auto field = VariantUtil::construct_variant_map(data);
variant->try_insert(field);
}
EXPECT_FALSE(variant->is_finalized());
for (const auto& column : variant->get_subcolumns()) {
if (!column->data.is_finalized()) {
EXPECT_ANY_THROW(column->data.remove_nullable());
EXPECT_ANY_THROW(column->data.get_finalized_column());
} else {
std::cout << "column path: " << column->path.get_path() << std::endl;
EXPECT_NO_THROW(column->data.remove_nullable());
EXPECT_NO_THROW(column->data.get_finalized_column());
}
}
}
}
TEST_F(ColumnObjectTest, try_insert_default_from_nested) {
// 1. create an empty variant column
vectorized::ColumnObject::Subcolumns dynamic_subcolumns;
auto array_type = create_array(TypeIndex::String, 1);
auto column = array_type->create_column();
Array array1 = {"amory", "commit"};
Array array2 = {"amory", "doris"};
column->insert(array1);
column->insert(array2);
auto array_type2 = create_array(TypeIndex::String, 2);
auto column2 = array_type2->create_column();
Array array22, array23;
array22.push_back(array1);
array22.push_back(array2);
array23.push_back(array2);
array23.push_back(array1);
column2->insert(array22);
column2->insert(array23);
dynamic_subcolumns.create_root(vectorized::ColumnObject::Subcolumn(0, true, true /*root*/));
dynamic_subcolumns.add(vectorized::PathInData("v.f"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(
vectorized::PathInData("v.a"),
vectorized::ColumnObject::Subcolumn {std::move(column2), array_type2, false, false});
dynamic_subcolumns.add(vectorized::PathInData("v.b"),
vectorized::ColumnObject::Subcolumn {0, true});
dynamic_subcolumns.add(
vectorized::PathInData("v.b.a"),
vectorized::ColumnObject::Subcolumn {std::move(column), array_type, false, false});
dynamic_subcolumns.add(vectorized::PathInData("v.c.d"),
vectorized::ColumnObject::Subcolumn {0, true});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() << std::endl;
auto obj = ColumnObject::create(5, std::move(dynamic_subcolumns));
for (auto& entry : obj->get_subcolumns()) {
std::cout << "entry path: " << entry->path.get_path() << std::endl;
std::cout << "entry type: " << entry->data.get_least_common_typeBase()->get_name()
<< std::endl;
std::cout << "entry dimension " << entry->data.get_dimensions() << std::endl;
bool inserted = obj->try_insert_default_from_nested(entry);
if (!inserted) {
entry->data.insert_default();
}
}
}
// unnest, clear_column_data
TEST_F(ColumnObjectTest, unnest) {
// 1. create an empty variant column
vectorized::ColumnObject::Subcolumns dynamic_subcolumns;
auto nested_col = ColumnObject::NESTED_TYPE->create_column();
Array array1 = {"amory", "commit"};
Array array2 = {"amory", "doris"};
std::cout << "array: " << array1.size() << std::endl;
nested_col->insert(array1);
nested_col->insert(array2);
std::cout << nested_col->size() << std::endl;
// 2. subcolumn path
dynamic_subcolumns.create_root(vectorized::ColumnObject::Subcolumn(2, true, true /*root*/));
dynamic_subcolumns.add(vectorized::PathInData("v.f"),
vectorized::ColumnObject::Subcolumn {2, true});
dynamic_subcolumns.add(vectorized::PathInData("v.a"),
vectorized::ColumnObject::Subcolumn {
std::move(nested_col), ColumnObject::NESTED_TYPE, true, false});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() << std::endl;
auto obj = ColumnObject::create(2, std::move(dynamic_subcolumns));
obj->set_num_rows(2);
EXPECT_TRUE(!obj->empty());
std::cout << obj->size() << std::endl;
Status st = obj->finalize(ColumnObject::FinalizeMode::WRITE_MODE);
EXPECT_TRUE(st.ok());
}
TEST_F(ColumnObjectTest, path_in_data_builder_test) {
// Create a ColumnObject with nested subcolumns
auto variant = ColumnObject::create(5);
// Test case 1: Build a nested path with PathInDataBuilder
{
PathInDataBuilder builder;
builder.append("v", false); // First part is not array
builder.append("a", true); // Second part is array
builder.append("b", true); // Third part is array
builder.append("c", false); // Fourth part is not array
PathInData path = builder.build();
EXPECT_TRUE(path.has_nested_part());
// Create field info for nested type
FieldInfo field_info;
field_info.scalar_type_id = TypeIndex::Int8;
field_info.have_nulls = true;
field_info.need_convert = false;
field_info.num_dimensions = 2; // Array of Array
// Test add_nested_subcolumn
variant->add_nested_subcolumn(path, field_info, 5);
// Verify the subcolumn was added correctly
const auto* subcolumn = variant->get_subcolumn(path);
EXPECT_TRUE(subcolumn != nullptr);
// then clear
variant->clear_column_data();
EXPECT_TRUE(variant->size() == 0);
}
}
TEST_F(ColumnObjectTest, get_field_info_all_types) {
// Test Int32
{
Int32 field(42);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test Int64
{
Int64 field(42);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test UInt64
{
Field field(UInt64(42));
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test Int64 with different ranges
// Test Int64 with different ranges
{
// Test Int64 <= Int8::max()
Int64 field1(std::numeric_limits<Int8>::max());
FieldInfo info1;
schema_util::get_field_info(field1, &info1);
EXPECT_EQ(info1.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info1.have_nulls);
EXPECT_FALSE(info1.need_convert);
EXPECT_EQ(info1.num_dimensions, 0);
// Test Int64 <= Int16::max()
Int64 field2(std::numeric_limits<Int16>::max());
FieldInfo info2;
schema_util::get_field_info(field2, &info2);
EXPECT_EQ(info2.scalar_type_id, TypeIndex::Int16);
EXPECT_FALSE(info2.have_nulls);
EXPECT_FALSE(info2.need_convert);
EXPECT_EQ(info2.num_dimensions, 0);
// Test Int64 <= Int32::max()
Int64 field3(std::numeric_limits<Int32>::max());
FieldInfo info3;
schema_util::get_field_info(field3, &info3);
EXPECT_EQ(info3.scalar_type_id, TypeIndex::Int32);
EXPECT_FALSE(info3.have_nulls);
EXPECT_FALSE(info3.need_convert);
EXPECT_EQ(info3.num_dimensions, 0);
// Test Int64 > Int32::max()
Int64 field4(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1);
FieldInfo info4;
schema_util::get_field_info(field4, &info4);
EXPECT_EQ(info4.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info4.have_nulls);
EXPECT_FALSE(info4.need_convert);
EXPECT_EQ(info4.num_dimensions, 0);
// Test Int64 <= Int8::min()
Int64 field5(std::numeric_limits<Int8>::min());
FieldInfo info5;
schema_util::get_field_info(field5, &info5);
EXPECT_EQ(info5.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info5.have_nulls);
EXPECT_FALSE(info5.need_convert);
EXPECT_EQ(info5.num_dimensions, 0);
// Test Int64 <= Int16::min()
Int64 field6(std::numeric_limits<Int16>::min());
FieldInfo info6;
schema_util::get_field_info(field6, &info6);
EXPECT_EQ(info6.scalar_type_id, TypeIndex::Int16);
EXPECT_FALSE(info6.have_nulls);
EXPECT_FALSE(info6.need_convert);
EXPECT_EQ(info6.num_dimensions, 0);
// Test Int64 <= Int32::min()
Int64 field7(std::numeric_limits<Int32>::min());
FieldInfo info7;
schema_util::get_field_info(field7, &info7);
EXPECT_EQ(info7.scalar_type_id, TypeIndex::Int32);
EXPECT_FALSE(info7.have_nulls);
EXPECT_FALSE(info7.need_convert);
EXPECT_EQ(info7.num_dimensions, 0);
// Test Int64 < Int32::min()
Int64 field8(static_cast<Int64>(std::numeric_limits<Int32>::min()) - 1);
FieldInfo info8;
schema_util::get_field_info(field8, &info8);
EXPECT_EQ(info8.scalar_type_id, TypeIndex::Int64);
}
// Test UInt64 with different ranges
{
// Test UInt64 <= UInt8::max()
UInt64 field1(std::numeric_limits<UInt8>::max());
FieldInfo info1;
schema_util::get_field_info(field1, &info1);
EXPECT_EQ(info1.scalar_type_id, TypeIndex::Int16);
EXPECT_FALSE(info1.have_nulls);
EXPECT_FALSE(info1.need_convert);
EXPECT_EQ(info1.num_dimensions, 0);
// Test UInt64 <= UInt16::max()
UInt64 field2(std::numeric_limits<UInt16>::max());
FieldInfo info2;
schema_util::get_field_info(field2, &info2);
EXPECT_EQ(info2.scalar_type_id, TypeIndex::Int32);
EXPECT_FALSE(info2.have_nulls);
EXPECT_FALSE(info2.need_convert);
EXPECT_EQ(info2.num_dimensions, 0);
// Test UInt64 <= UInt32::max()
UInt64 field3(std::numeric_limits<UInt32>::max());
FieldInfo info3;
schema_util::get_field_info(field3, &info3);
EXPECT_EQ(info3.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info3.have_nulls);
EXPECT_FALSE(info3.need_convert);
EXPECT_EQ(info3.num_dimensions, 0);
// Test UInt64 > UInt32::max()
UInt64 field4(static_cast<UInt64>(std::numeric_limits<UInt32>::max()) + 1);
FieldInfo info4;
schema_util::get_field_info(field4, &info4);
EXPECT_EQ(info4.scalar_type_id, TypeIndex::Int64);
}
// Test Float32
{
Field field(Float32(42.0f));
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Float64);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test Float64
{
Field field(Float64(42.0));
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Float64);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test String
{
Field field(String("test"));
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::String);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
{
Slice slice("\"amory is cute\"");
JsonBinaryValue value;
Status st = value.from_json_string(slice.data, slice.size);
EXPECT_TRUE(st.ok()) << st.to_string();
JsonbField field(value.value(), value.size());
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::JSONB);
}
// Test Array
{
Array array;
array.push_back(Int64(1));
array.push_back(Int64(2));
Field field(array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 1);
}
// Test nested Array
{
Array inner_array;
inner_array.push_back(Int64(1));
inner_array.push_back(Int64(2));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test Tuple
{
Tuple t1;
t1.push_back(Field("amory cute"));
t1.push_back(__int128_t(37));
t1.push_back(true);
FieldInfo info;
schema_util::get_field_info(t1, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::JSONB)
<< "info.scalar_type_id: " << getTypeName(info.scalar_type_id);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test Map will throw exception: Bad type of Field 25
{
Array k1 = {"a", "b", "c"};
Array v1 = {1, 2, 3};
Map map;
map.push_back(k1);
map.push_back(v1);
FieldInfo info;
EXPECT_ANY_THROW(schema_util::get_field_info(map, &info));
}
// Test VariantMap
{
VariantMap variant_map;
variant_map[PathInData("key1")] = Int64(1);
variant_map[PathInData("key2")] = String("value");
Field field(variant_map);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::VARIANT);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 0);
}
// Test Array with different types
{
Array array;
array.push_back(Int64(1));
Field field(array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8)
<< "info.scalar_type_id: " << getTypeName(info.scalar_type_id);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 1);
}
// Test Array with nulls
{
Array array;
array.push_back(Int64(1));
array.push_back(Null());
Field field(array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_TRUE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 1);
}
// Test nested Array with Int64 in different ranges
{// Test nested Array with Int64 <= Int8::max()
{Array inner_array;
inner_array.push_back(Int64(std::numeric_limits<Int8>::max()));
inner_array.push_back(Int64(std::numeric_limits<Int8>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with Int64 <= Int16::max()
{
Array inner_array;
inner_array.push_back(Int64(std::numeric_limits<Int16>::max()));
inner_array.push_back(Int64(std::numeric_limits<Int16>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int16);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with Int64 <= Int32::max()
{
Array inner_array;
inner_array.push_back(Int64(std::numeric_limits<Int32>::max()));
inner_array.push_back(Int64(std::numeric_limits<Int32>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int32);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with Int64 > Int32::max()
{
Array inner_array;
inner_array.push_back(Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
inner_array.push_back(Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
} // namespace doris::vectorized
// Test nested Array with UInt64 in different ranges
{// Test nested Array with UInt64 <= UInt8::max()
{Array inner_array;
inner_array.push_back(UInt64(std::numeric_limits<UInt8>::max()));
inner_array.push_back(UInt64(std::numeric_limits<UInt8>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int16);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with UInt64 <= UInt16::max()
{
Array inner_array;
inner_array.push_back(UInt64(std::numeric_limits<UInt16>::max()));
inner_array.push_back(UInt64(std::numeric_limits<UInt16>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int32);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with UInt64 <= UInt32::max()
{
Array inner_array;
inner_array.push_back(UInt64(std::numeric_limits<UInt32>::max()));
inner_array.push_back(UInt64(std::numeric_limits<UInt32>::max()));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with UInt64 > UInt32::max()
{
Array inner_array;
inner_array.push_back(UInt64(static_cast<UInt64>(std::numeric_limits<UInt32>::max()) + 1));
inner_array.push_back(UInt64(static_cast<UInt64>(std::numeric_limits<UInt32>::max()) + 1));
Array outer_array;
outer_array.push_back(inner_array);
outer_array.push_back(inner_array);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
}
// Test nested Array with mixed Int64 and UInt64
{
Array inner_array1;
inner_array1.push_back(Int64(std::numeric_limits<Int32>::max()));
inner_array1.push_back(Int64(std::numeric_limits<Int32>::max()));
Array inner_array2;
inner_array2.push_back(UInt64(std::numeric_limits<UInt32>::max()));
inner_array2.push_back(UInt64(std::numeric_limits<UInt32>::max()));
Array outer_array;
outer_array.push_back(inner_array1);
outer_array.push_back(inner_array2);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int64);
EXPECT_FALSE(info.have_nulls);
EXPECT_TRUE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test nested Array with nulls
{
Array inner_array1;
inner_array1.push_back(Int64(1));
inner_array1.push_back(Int64(2));
Array inner_array2;
inner_array2.push_back(Int64(3));
inner_array2.push_back(Null());
Array outer_array;
outer_array.push_back(inner_array1);
outer_array.push_back(inner_array2);
Field field(outer_array);
FieldInfo info;
schema_util::get_field_info(field, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::Int8);
EXPECT_TRUE(info.have_nulls);
EXPECT_FALSE(info.need_convert);
EXPECT_EQ(info.num_dimensions, 2);
}
// Test Array with JsonbField
{
Slice slice("\"amory is cute\"");
JsonBinaryValue value;
Status st = value.from_json_string(slice.data, slice.size);
EXPECT_TRUE(st.ok()) << st.to_string();
JsonbField field(value.value(), value.size());
Array array;
array.push_back(field);
array.push_back(field);
FieldInfo info;
schema_util::get_field_info(array, &info);
EXPECT_EQ(info.scalar_type_id, TypeIndex::JSONB);
}
}
TEST_F(ColumnObjectTest, field_visitor) {
// Test replacing scalar values in a flat array
{
Array array;
array.push_back(Int64(1));
array.push_back(Int64(2));
array.push_back(Int64(3));
Field field(array);
Field replacement(Int64(42));
Field result = apply_visitor(FieldVisitorReplaceScalars(replacement, 0), field);
EXPECT_EQ(result.get<Int64>(), 42);
Field replacement1(Int64(42));
Field result1 = apply_visitor(FieldVisitorReplaceScalars(replacement, 1), field);
EXPECT_EQ(result1.get<Array>().size(), 3);
EXPECT_EQ(result1.get<Array>()[0].get<Int64>(), 42);
EXPECT_EQ(result1.get<Array>()[1].get<Int64>(), 42);
EXPECT_EQ(result1.get<Array>()[2].get<Int64>(), 42);
}
}
TEST_F(ColumnObjectTest, subcolumn_operations_coverage) {
// Test insert_range_from
{
auto src_column = VariantUtil::construct_basic_varint_column();
auto dst_column = VariantUtil::construct_dst_varint_column();
// Test normal case
auto* dst_subcolumn = const_cast<ColumnObject::Subcolumn*>(
&dst_column->get_subcolumns().get_root()->data);
dst_subcolumn->insert_range_from(src_column->get_subcolumns().get_root()->data, 0, 2);
// Test empty range
dst_subcolumn->insert_range_from(src_column->get_subcolumns().get_root()->data, 0, 0);
// Test with different types
auto src_column2 = VariantUtil::construct_advanced_varint_column();
dst_subcolumn->insert_range_from(src_column2->get_subcolumns().get_root()->data, 0, 1);
}
// Test parse_binary_from_sparse_column
{
auto column = VariantUtil::construct_basic_varint_column();
vectorized::Field res;
FieldInfo field_info;
// Test String type
{
std::string test_str = "test_data";
std::vector<char> binary_data;
size_t str_size = test_str.size();
binary_data.resize(sizeof(size_t) + test_str.size());
memcpy(binary_data.data(), &str_size, sizeof(size_t));
memcpy(binary_data.data() + sizeof(size_t), test_str.data(), test_str.size());
const char* data = binary_data.data();
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_STRING, data, res,
field_info);
EXPECT_EQ(res.get<String>(), "test_data");
}
// Test integer types
{
Int8 int8_val = 42;
const char* data = reinterpret_cast<const char*>(&int8_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_TINYINT, data, res,
field_info);
EXPECT_EQ(res.get<Int8>(), 42);
}
{
Int16 int16_val = 12345;
const char* data = reinterpret_cast<const char*>(&int16_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_SMALLINT, data, res,
field_info);
EXPECT_EQ(res.get<Int16>(), 12345);
}
{
Int32 int32_val = 123456789;
const char* data = reinterpret_cast<const char*>(&int32_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_INT, data, res, field_info);
EXPECT_EQ(res.get<Int32>(), 123456789);
}
{
Int64 int64_val = 1234567890123456789LL;
const char* data = reinterpret_cast<const char*>(&int64_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_BIGINT, data, res,
field_info);
EXPECT_EQ(res.get<Int64>(), 1234567890123456789LL);
}
// Test floating point types
{
Float32 float32_val = 3.1415901f;
const char* data = reinterpret_cast<const char*>(&float32_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_FLOAT, data, res,
field_info);
EXPECT_FLOAT_EQ(res.get<Float32>(), 0);
}
{
Float64 float64_val = 3.141592653589793;
const char* data = reinterpret_cast<const char*>(&float64_val);
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_DOUBLE, data, res,
field_info);
EXPECT_DOUBLE_EQ(res.get<Float64>(), 3.141592653589793);
}
// Test JSONB type
{
std::string json_str = "{\"key\": \"value\"}";
std::vector<char> binary_data;
size_t json_size = json_str.size();
binary_data.resize(sizeof(size_t) + json_str.size());
memcpy(binary_data.data(), &json_size, sizeof(size_t));
memcpy(binary_data.data() + sizeof(size_t), json_str.data(), json_str.size());
const char* data = binary_data.data();
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_JSONB, data, res,
field_info);
}
// Test Array type
{
std::vector<char> binary_data;
size_t array_size = 2;
binary_data.resize(sizeof(size_t) + 2 * (sizeof(uint8_t) + sizeof(Int32)));
char* data_ptr = binary_data.data();
// Write array size
memcpy(data_ptr, &array_size, sizeof(size_t));
data_ptr += sizeof(size_t);
// Write first element (Int32)
*data_ptr++ = static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_INT);
Int32 val1 = 42;
memcpy(data_ptr, &val1, sizeof(Int32));
data_ptr += sizeof(Int32);
// Write second element (Int32)
*data_ptr++ = static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_INT);
Int32 val2 = 43;
memcpy(data_ptr, &val2, sizeof(Int32));
const char* data = binary_data.data();
parse_binary_from_sparse_column(FieldType::OLAP_FIELD_TYPE_ARRAY, data, res,
field_info);
const Array& array = res.get<Array>();
EXPECT_EQ(array.size(), 2);
EXPECT_EQ(array[0].get<Int32>(), 42);
EXPECT_EQ(array[1].get<Int32>(), 43);
}
}
// Test add_sub_column
{
auto column = VariantUtil::construct_basic_varint_column();
PathInData path("test.path");
// Test normal case
column->add_sub_column(path, 10);
// Test with existing path
column->add_sub_column(path, 10);
// Test with max subcolumns limit
for (int i = 0; i < 1000; i++) {
PathInData new_path("test.path." + std::to_string(i));
column->add_sub_column(new_path, 10);
}
}
// Test wrapp_array_nullable
{
auto column = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(column->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
PathInData path("v.f");
auto* subcolumn = column->get_subcolumn(path);
subcolumn->wrapp_array_nullable();
EXPECT_TRUE(subcolumn->get_least_common_type()->is_nullable());
}
// Test is_empty_nested
{
vectorized::ColumnObject container_variant(1, true);
// v: {"k": [1,2,3]} ==》 [{"k": 1}, {"k": 2}, {"k": 3}]
// {"k": []} => [{}] vs {"k": null} -> [null]
// {"k": [4]} => [{"k": 4}]
auto col_arr =
ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create());
// Array array1 = {1, 2, 3};
// Array array2 = {4};
// col_arr->insert(array1);
// col_arr->insert(array2);
Array an;
an.push_back(Null());
col_arr->insert(an);
col_arr->insert(an);
col_arr->insert(an);
MutableColumnPtr nested_object = ColumnObject::create(
container_variant.max_subcolumns_count(), col_arr->get_data().size());
MutableColumnPtr offset = col_arr->get_offsets_ptr()->assume_mutable(); // [3, 3, 4]
auto* nested_object_ptr = assert_cast<ColumnObject*>(nested_object.get());
// flatten nested arrays
MutableColumnPtr flattend_column = col_arr->get_data_ptr()->assume_mutable();
DataTypePtr flattend_type = DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_BIGINT, 0, 0);
// add sub path without parent prefix
PathInData sub_path("k");
nested_object_ptr->add_sub_column(sub_path, std::move(flattend_column),
std::move(flattend_type));
nested_object = make_nullable(nested_object->get_ptr())->assume_mutable();
auto array =
make_nullable(ColumnArray::create(std::move(nested_object), std::move(offset)));
PathInData path("v.k");
container_variant.add_sub_column(path, array->assume_mutable(),
container_variant.NESTED_TYPE);
container_variant.set_num_rows(3);
for (auto subcolumn : container_variant.get_subcolumns()) {
if (subcolumn->data.is_root) {
// Nothing
EXPECT_TRUE(subcolumn->data.is_empty_nested(0));
continue;
}
for (int i = 0; i < 3; ++i) {
EXPECT_FALSE(subcolumn->data.is_empty_nested(i));
}
}
}
// Test is_empty_nested
{
auto v = ColumnObject::create(1);
auto sub_dt = make_nullable(std::make_unique<DataTypeArray>(
make_nullable(std::make_unique<DataTypeObject>(1))));
auto sub_col = sub_dt->create_column();
std::vector<std::pair<std::string, doris::vectorized::Field>> data;
Array an;
an.push_back(Null());
data.emplace_back("v.a", an);
// 2. subcolumn path
auto vf = VariantUtil::construct_variant_map(data);
v->try_insert(vf);
for (auto subcolumn : v->get_subcolumns()) {
for (int i = 0; i < v->size(); ++i) {
if (subcolumn->data.is_root) {
EXPECT_TRUE(subcolumn->data.is_empty_nested(i));
}
EXPECT_TRUE(subcolumn->data.is_empty_nested(i));
}
}
Status st = v->finalize(ColumnObject::FinalizeMode::WRITE_MODE);
EXPECT_TRUE(st.ok());
PathInData path("v.a");
for (auto sub : v->get_subcolumns()) {
if (sub->data.is_root) {
continue;
}
sub->kind = SubcolumnsTree<ColumnObject::Subcolumn>::Node::NESTED;
EXPECT_FALSE(v->try_insert_default_from_nested(sub));
}
}
}
}
>>>>>>> b4f01947a44 ([feature](semi-structure) support variant and index with many features)