blob: e0c9cb8247b17dd4ae64dd4ef1a7fe7d8137d318 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/core/block.h"
#include <concurrentqueue.h>
#include <gen_cpp/Descriptors_types.h>
#include <gen_cpp/Metrics_types.h>
#include <gen_cpp/segment_v2.pb.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include "agent/be_exec_version_manager.h"
#include "common/config.h"
#include "common/object_pool.h"
#include "gen_cpp/data.pb.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptor_helper.h"
#include "runtime/descriptors.h"
#include "testutil/column_helper.h"
#include "util/bitmap_value.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/common/sip_hash.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_or_datetime_v2.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
#include "vec/runtime/vdatetime_value.h"
namespace doris {
using vectorized::Int32;
void block_to_pb(
const vectorized::Block& block, PBlock* pblock,
segment_v2::CompressionTypePB compression_type = segment_v2::CompressionTypePB::SNAPPY) {
size_t uncompressed_bytes = 0;
size_t compressed_bytes = 0;
int64_t compress_time = 0;
Status st =
block.serialize(BeExecVersionManager::get_newest_version(), pblock, &uncompressed_bytes,
&compressed_bytes, &compress_time, compression_type);
EXPECT_TRUE(st.ok());
// const column maybe uncompressed_bytes<compressed_bytes
// as the serialize_bytes add some additional byets: STREAMVBYTE_PADDING=16;
// EXPECT_TRUE(uncompressed_bytes >= compressed_bytes);
EXPECT_EQ(compressed_bytes, pblock->column_values().size());
const vectorized::ColumnWithTypeAndName& type_and_name =
block.get_columns_with_type_and_name()[0];
EXPECT_EQ(type_and_name.name, pblock->column_metas()[0].name());
}
void fill_block_with_array_int(vectorized::Block& block) {
auto off_column = vectorized::ColumnOffset64::create();
auto data_column = vectorized::ColumnInt32::create();
// init column array with [[1,2,3],[],[4],[5,6]]
std::vector<vectorized::ColumnArray::Offset64> offs = {0, 3, 3, 4, 6};
std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
for (size_t i = 1; i < offs.size(); ++i) {
off_column->insert_data((const char*)(&offs[i]), 0);
}
for (auto& v : vals) {
data_column->insert_data((const char*)(&v), 0);
}
auto column_array_ptr =
vectorized::ColumnArray::create(std::move(data_column), std::move(off_column));
vectorized::DataTypePtr nested_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::DataTypePtr array_type(std::make_shared<vectorized::DataTypeArray>(nested_type));
vectorized::ColumnWithTypeAndName test_array_int(std::move(column_array_ptr), array_type,
"test_array_int");
block.insert(test_array_int);
}
void fill_block_with_array_string(vectorized::Block& block) {
auto off_column = vectorized::ColumnOffset64::create();
auto data_column = vectorized::ColumnString::create();
// init column array with [["abc","de"],["fg"],[], [""]];
std::vector<vectorized::ColumnArray::Offset64> offs = {0, 2, 3, 3, 4};
std::vector<std::string> vals = {"abc", "de", "fg", ""};
for (size_t i = 1; i < offs.size(); ++i) {
off_column->insert_data((const char*)(&offs[i]), 0);
}
for (auto& v : vals) {
data_column->insert_data(v.data(), v.size());
}
auto column_array_ptr =
vectorized::ColumnArray::create(std::move(data_column), std::move(off_column));
vectorized::DataTypePtr nested_type(std::make_shared<vectorized::DataTypeString>());
vectorized::DataTypePtr array_type(std::make_shared<vectorized::DataTypeArray>(nested_type));
vectorized::ColumnWithTypeAndName test_array_string(std::move(column_array_ptr), array_type,
"test_array_string");
block.insert(test_array_string);
}
void serialize_and_deserialize_test(segment_v2::CompressionTypePB compression_type) {
// int
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
data.push_back(i);
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// string
{
auto strcol = vectorized::ColumnString::create();
for (int i = 0; i < 1024; ++i) {
std::string is = std::to_string(i);
strcol->insert_data(is.c_str(), is.size());
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type,
"test_string");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// decimal
{
vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9, true));
auto decimal_column = decimal_data_type->create_column();
auto& data = ((vectorized::ColumnDecimal128V2*)decimal_column.get())->get_data();
for (int i = 0; i < 1024; ++i) {
__int128_t value = __int128_t(i * pow(10, 9) + i * pow(10, 8));
data.push_back(value);
}
vectorized::ColumnWithTypeAndName type_and_name(decimal_column->get_ptr(),
decimal_data_type, "test_decimal");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// bitmap
{
vectorized::DataTypePtr bitmap_data_type(std::make_shared<vectorized::DataTypeBitMap>());
auto bitmap_column = bitmap_data_type->create_column();
std::vector<BitmapValue>& container =
((vectorized::ColumnBitmap*)bitmap_column.get())->get_data();
for (int i = 0; i < 1024; ++i) {
BitmapValue bv;
for (int j = 0; j <= i; ++j) {
bv.add(j);
}
container.push_back(bv);
}
vectorized::ColumnWithTypeAndName type_and_name(bitmap_column->get_ptr(), bitmap_data_type,
"test_bitmap");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// nullable string
{
vectorized::DataTypePtr string_data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::DataTypePtr nullable_data_type(
std::make_shared<vectorized::DataTypeNullable>(string_data_type));
auto nullable_column = nullable_data_type->create_column();
((vectorized::ColumnNullable*)nullable_column.get())->insert_many_defaults(1024);
vectorized::ColumnWithTypeAndName type_and_name(nullable_column->get_ptr(),
nullable_data_type, "test_nullable");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// nullable decimal
{
vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9, true));
vectorized::DataTypePtr nullable_data_type(
std::make_shared<vectorized::DataTypeNullable>(decimal_data_type));
auto nullable_column = nullable_data_type->create_column();
((vectorized::ColumnNullable*)nullable_column.get())->insert_many_defaults(1024);
vectorized::ColumnWithTypeAndName type_and_name(
nullable_column->get_ptr(), nullable_data_type, "test_nullable_decimal");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
EXPECT_EQ(1, pblock.column_metas_size());
EXPECT_TRUE(pblock.column_metas()[0].has_decimal_param());
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// int with 4096 batch size
{
auto column_vector_int32 = vectorized::ColumnInt32::create();
auto column_nullable_vector = vectorized::make_nullable(std::move(column_vector_int32));
auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate();
for (int i = 0; i < 4096; i++) {
mutable_nullable_vector->insert(vectorized::Field::create_field<TYPE_INT>(
vectorized::cast_to_nearest_field_type(i)));
}
auto data_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(mutable_nullable_vector->get_ptr(),
data_type, "test_nullable_int32");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// array int and array string
{
vectorized::Block block;
fill_block_with_array_int(block);
fill_block_with_array_string(block);
PBlock pblock;
block_to_pb(block, &pblock, compression_type);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, compression_type);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
EXPECT_GT(block.dump_data_json().size(), 1);
}
}
void serialize_and_deserialize_test_one() {
// const int
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
data.push_back(111);
auto const_column = vectorized::ColumnConst::create(vec->get_ptr(), 1);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), data_type,
"test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_int() {
// const int
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
data.push_back(111);
auto const_column = vectorized::ColumnConst::create(vec->get_ptr(), 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), data_type,
"test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// int
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
data.push_back(i);
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_long() {
// const long
{
auto vec = vectorized::ColumnInt64::create();
auto& data = vec->get_data();
data.push_back(111);
auto const_column = vectorized::ColumnConst::create(vec->get_ptr(), 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt64>());
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), data_type,
"test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// long
{
auto vec = vectorized::ColumnInt64::create();
auto& data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
data.push_back(i);
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt64>());
vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_string() {
// const_string
{
auto strcol = vectorized::ColumnString::create();
std::string val = "doris";
strcol->insert_data(val.c_str(), val.size());
auto const_column = vectorized::ColumnConst::create(strcol->get_ptr(), 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), data_type,
"test_string");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::SNAPPY);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::SNAPPY);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// string
{
auto strcol = vectorized::ColumnString::create();
for (int i = 0; i < 1024; ++i) {
std::string is = std::to_string(i);
strcol->insert_data(is.c_str(), is.size());
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type,
"test_string");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::SNAPPY);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::SNAPPY);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_nullable() {
// nullable(const int)
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
data.push_back(111);
auto nullable_column = vectorized::make_nullable(vec->get_ptr());
auto const_column = vectorized::ColumnConst::create(nullable_column, 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
auto nullable_data_type = vectorized::make_nullable(data_type);
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), nullable_data_type,
"test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// nullable(int)
{
auto vec = vectorized::ColumnInt32::create();
auto& data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
data.push_back(i);
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName type_and_name(make_nullable(vec->get_ptr()),
make_nullable(data_type), "test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// nullable(const_string)
{
auto strcol = vectorized::ColumnString::create();
std::string val = "doris";
strcol->insert_data(val.c_str(), val.size());
auto const_column = vectorized::ColumnConst::create(strcol->get_ptr(), 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName type_and_name(make_nullable(const_column->get_ptr()),
make_nullable(data_type), "test_string");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::SNAPPY);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::SNAPPY);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// nullable(string)
{
auto strcol = vectorized::ColumnString::create();
for (int i = 0; i < 1024; ++i) {
std::string is = std::to_string(i);
strcol->insert_data(is.c_str(), is.size());
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName type_and_name(make_nullable(strcol->get_ptr()),
make_nullable(data_type), "test_string");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::SNAPPY);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::SNAPPY);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_decimal() {
// const decimal
{
auto vec = vectorized::ColumnDecimal32::create(0, 3);
vectorized::Decimal<int> value = 111234;
vec->insert_value(value);
auto const_column = vectorized::ColumnConst::create(vec->get_ptr(), 10);
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeDecimal32>(6, 3));
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), data_type,
"test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
// decimal
{
auto vec = vectorized::ColumnDecimal32::create(0, 3);
for (int i = 0; i < 1024; ++i) {
vectorized::Decimal<int> value = 111000 + i;
vec->insert_value(value);
}
vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeDecimal32>(6, 3));
vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(block.dump_data(), block2.dump_data());
EXPECT_EQ(block.dump_data_json(), block2.dump_data_json());
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_bitmap() {
// const bitmap
{
vectorized::DataTypePtr bitmap_data_type(std::make_shared<vectorized::DataTypeBitMap>());
auto bitmap_column = bitmap_data_type->create_column();
std::vector<BitmapValue>& container =
((vectorized::ColumnBitmap*)bitmap_column.get())->get_data();
BitmapValue bv;
for (int j = 0; j <= 2; ++j) {
bv.add(j);
}
container.push_back(bv);
auto const_column = vectorized::ColumnConst::create(bitmap_column->get_ptr(), 10);
vectorized::ColumnWithTypeAndName type_and_name(const_column->get_ptr(), bitmap_data_type,
"test_bitmap");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
std::string bb1 = block.dump_data(0, 1024);
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
std::string bb2 = block2.dump_data(0, 1024);
EXPECT_EQ(bb1, bb2);
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
// bitmap
{
vectorized::DataTypePtr bitmap_data_type(std::make_shared<vectorized::DataTypeBitMap>());
auto bitmap_column = bitmap_data_type->create_column();
std::vector<BitmapValue>& container =
((vectorized::ColumnBitmap*)bitmap_column.get())->get_data();
for (int i = 0; i < 1024; ++i) {
BitmapValue bv;
for (int j = 0; j <= i; ++j) {
bv.add(j);
}
container.push_back(bv);
}
vectorized::ColumnWithTypeAndName type_and_name(bitmap_column->get_ptr(), bitmap_data_type,
"test_bitmap");
vectorized::Block block({type_and_name});
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::LZ4);
std::string s1 = pblock.DebugString();
std::string bb1 = block.dump_data(0, 1024);
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
std::string bb2 = block2.dump_data(0, 1024);
EXPECT_EQ(bb1, bb2);
EXPECT_EQ(block.dump_data_json(0, 1024), block2.dump_data_json(0, 1024));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::LZ4);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
}
}
void serialize_and_deserialize_test_array() {
// array int and array string
{
vectorized::Block block;
fill_block_with_array_int(block);
fill_block_with_array_string(block);
PBlock pblock;
block_to_pb(block, &pblock, segment_v2::CompressionTypePB::SNAPPY);
std::string s1 = pblock.DebugString();
vectorized::Block block2;
size_t uncompress_size = 0;
int64_t uncompressed_time = 0;
static_cast<void>(block2.deserialize(pblock, &uncompress_size, &uncompressed_time));
PBlock pblock2;
block_to_pb(block2, &pblock2, segment_v2::CompressionTypePB::SNAPPY);
std::string s2 = pblock2.DebugString();
EXPECT_EQ(s1, s2);
EXPECT_GT(block.dump_data_json().size(), 1);
}
}
TEST(BlockTest, SerializeAndDeserializeBlock) {
serialize_and_deserialize_test(segment_v2::CompressionTypePB::SNAPPY);
serialize_and_deserialize_test(segment_v2::CompressionTypePB::LZ4);
serialize_and_deserialize_test_string();
serialize_and_deserialize_test_int();
serialize_and_deserialize_test_nullable();
serialize_and_deserialize_test_decimal();
serialize_and_deserialize_test_bitmap();
serialize_and_deserialize_test_array();
serialize_and_deserialize_test_long();
serialize_and_deserialize_test_one();
}
TEST(BlockTest, dump_data) {
auto vec = vectorized::ColumnInt32::create();
auto& int32_data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
int32_data.push_back(i);
}
vectorized::DataTypePtr int32_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName test_int(vec->get_ptr(), int32_type, "test_int");
auto strcol = vectorized::ColumnString::create();
for (int i = 0; i < 1024; ++i) {
std::string is = std::to_string(i);
strcol->insert_data(is.c_str(), is.size());
}
vectorized::DataTypePtr string_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName test_string(strcol->get_ptr(), string_type, "test_string");
vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9, true));
auto decimal_column = decimal_data_type->create_column();
auto& decimal_data = ((vectorized::ColumnDecimal128V2*)decimal_column.get())->get_data();
for (int i = 0; i < 1024; ++i) {
__int128_t value = __int128_t(i * pow(10, 9) + i * pow(10, 8));
decimal_data.push_back(value);
}
vectorized::ColumnWithTypeAndName test_decimal(decimal_column->get_ptr(), decimal_data_type,
"test_decimal");
auto column_vector_int32 = vectorized::ColumnInt32::create();
auto column_nullable_vector = vectorized::make_nullable(std::move(column_vector_int32));
auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate();
for (int i = 0; i < 4096; i++) {
mutable_nullable_vector->insert(vectorized::Field::create_field<TYPE_INT>(
vectorized::cast_to_nearest_field_type(i)));
}
auto nint32_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName test_nullable_int32(mutable_nullable_vector->get_ptr(),
nint32_type, "test_nullable_int32");
auto column_vector_date = vectorized::ColumnDate::create();
auto& date_data = column_vector_date->get_data();
for (int i = 0; i < 1024; ++i) {
VecDateTimeValue value;
value.from_date_int64(20210501);
date_data.push_back(*reinterpret_cast<vectorized::Int64*>(&value));
}
vectorized::DataTypePtr date_type(std::make_shared<vectorized::DataTypeDate>());
vectorized::ColumnWithTypeAndName test_date(column_vector_date->get_ptr(), date_type,
"test_date");
auto column_vector_datetime = vectorized::ColumnDateTime::create();
auto& datetime_data = column_vector_datetime->get_data();
for (int i = 0; i < 1024; ++i) {
VecDateTimeValue value;
value.from_date_int64(20210501080910);
datetime_data.push_back(*reinterpret_cast<vectorized::Int64*>(&value));
}
vectorized::DataTypePtr datetime_type(std::make_shared<vectorized::DataTypeDateTime>());
vectorized::ColumnWithTypeAndName test_datetime(column_vector_datetime->get_ptr(),
datetime_type, "test_datetime");
auto column_vector_date_v2 = vectorized::ColumnDateV2::create();
auto& date_v2_data = column_vector_date_v2->get_data();
for (int i = 0; i < 1024; ++i) {
DateV2Value<DateV2ValueType> value;
value.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0);
date_v2_data.push_back(*reinterpret_cast<vectorized::UInt32*>(&value));
}
vectorized::DataTypePtr date_v2_type(std::make_shared<vectorized::DataTypeDateV2>());
vectorized::ColumnWithTypeAndName test_date_v2(column_vector_date_v2->get_ptr(), date_v2_type,
"test_datev2");
vectorized::Block block({test_int, test_string, test_decimal, test_nullable_int32, test_date,
test_datetime, test_date_v2});
EXPECT_GT(block.dump_data().size(), 1);
EXPECT_GT(block.dump_data_json().size(), 1);
// test dump array int and array string
vectorized::Block block1;
fill_block_with_array_int(block1);
fill_block_with_array_string(block1);
// Note: here we should set 'row_num' in dump_data
EXPECT_GT(block1.dump_data(10).size(), 1);
EXPECT_GT(block.dump_data_json(10).size(), 1);
vectorized::IColumn::Filter filter;
int size = block1.rows() / 2;
for (int i = 0; i < block1.rows(); i++) {
filter.push_back(i % 2);
}
vectorized::Block::filter_block_internal(&block1, filter, block1.columns());
EXPECT_EQ(size, block1.rows());
}
TEST(BlockTest, merge_with_shared_columns) {
auto vec = vectorized::ColumnInt32::create();
auto& int32_data = vec->get_data();
for (int i = 0; i < 1024; ++i) {
int32_data.push_back(i);
}
vectorized::DataTypePtr int32_type(std::make_shared<vectorized::DataTypeInt32>());
vectorized::ColumnWithTypeAndName test_k1(vec->get_ptr(), int32_type, "k1");
auto strcol = vectorized::ColumnString::create();
for (int i = 0; i < 1024; ++i) {
std::string is = std::to_string(i);
strcol->insert_data(is.c_str(), is.size());
}
vectorized::DataTypePtr string_type(std::make_shared<vectorized::DataTypeString>());
vectorized::ColumnWithTypeAndName test_v1(strcol->get_ptr(), string_type, "v1");
vectorized::ColumnWithTypeAndName test_v2(strcol->get_ptr(), string_type, "v2");
vectorized::Block src_block({test_k1, test_v1, test_v2});
auto vec_temp = vectorized::ColumnInt32::create();
auto& int32_data_temp = vec_temp->get_data();
for (int i = 0; i < 10; ++i) {
int32_data_temp.push_back(i);
}
vectorized::ColumnWithTypeAndName test_k1_temp(vec_temp->get_ptr(), int32_type, "k1");
auto strcol_temp = vectorized::ColumnString::create();
for (int i = 0; i < 10; ++i) {
std::string is = std::to_string(i);
strcol_temp->insert_data(is.c_str(), is.size());
}
vectorized::ColumnWithTypeAndName test_v1_temp(strcol_temp->get_ptr(), string_type, "v1");
vectorized::ColumnWithTypeAndName test_v2_temp(strcol_temp->get_ptr(), string_type, "v2");
vectorized::Block temp_block({test_k1_temp, test_v1_temp, test_v2_temp});
vectorized::MutableBlock mutable_block(&src_block);
auto status = mutable_block.merge(temp_block);
ASSERT_TRUE(status.ok());
src_block.set_columns(std::move(mutable_block.mutable_columns()));
for (auto& column : src_block.get_columns()) {
EXPECT_EQ(1034, column->size());
}
EXPECT_EQ(1034, src_block.rows());
}
namespace vectorized {
template <typename T>
void clear_blocks(moodycamel::ConcurrentQueue<T>& blocks,
RuntimeProfile::Counter* memory_used_counter = nullptr);
}
TEST(BlockTest, clear_blocks) {
{
moodycamel::ConcurrentQueue<vectorized::Block> queue;
RuntimeProfile::Counter counter(TUnit::BYTES);
auto block1 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
auto block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
counter.update(block1.allocated_bytes());
counter.update(block2.allocated_bytes());
ASSERT_EQ(counter.value(), block1.allocated_bytes() + block2.allocated_bytes());
queue.enqueue(std::move(block1));
queue.enqueue(std::move(block2));
vectorized::clear_blocks(queue, &counter);
EXPECT_EQ(counter.value(), 0);
ASSERT_FALSE(queue.try_dequeue(block1));
}
{
moodycamel::ConcurrentQueue<std::unique_ptr<vectorized::Block>> queue;
RuntimeProfile::Counter counter(TUnit::BYTES);
auto block1 = std::make_unique<vectorized::Block>(
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3}));
auto block2 = std::make_unique<vectorized::Block>(
vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4, 5, 6}));
counter.update(block1->allocated_bytes());
counter.update(block2->allocated_bytes());
ASSERT_EQ(counter.value(), block1->allocated_bytes() + block2->allocated_bytes());
queue.enqueue(std::move(block1));
queue.enqueue(std::move(block2));
vectorized::clear_blocks(queue, &counter);
EXPECT_EQ(counter.value(), 0);
ASSERT_FALSE(queue.try_dequeue(block1));
}
}
TEST(BlockTest, replace_by_position) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(0, vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"a", "b", "c"}));
auto col = vectorized::ColumnHelper::create_column<vectorized::DataTypeInt32>({4, 5, 6});
block.replace_by_position(0, std::move(col));
ASSERT_EQ(block.get_by_position(0).column->get_int(0), 4);
ASSERT_EQ(block.get_by_position(0).column->get_int(1), 5);
ASSERT_EQ(block.get_by_position(0).column->get_int(2), 6);
}
TEST(BlockTest, compare_at) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
auto ret = block.compare_at(0, 0, block, -1);
ASSERT_EQ(ret, 0);
ret = block.compare_at(0, 1, block, -1);
ASSERT_LT(ret, 0);
ret = block.compare_at(2, 1, block, -1);
ASSERT_GT(ret, 0);
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeInt32>(
{2, 2, 1}));
std::vector<uint32_t> compare_columns {1, 0};
ret = block.compare_at(0, 1, &compare_columns, block, -1);
ASSERT_LT(ret, 0);
ret = block.compare_at(1, 2, &compare_columns, block, -1);
ASSERT_GT(ret, 0);
}
TEST(BlockTest, dump) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
auto types = block.dump_types();
ASSERT_TRUE(types.find("INT") != std::string::npos);
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
types = block.dump_types();
ASSERT_TRUE(types.find("String") != std::string::npos);
block.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
types = block.dump_types();
ASSERT_TRUE(types.find("DOUBLE") != std::string::npos);
auto names = block.get_names();
for (const auto& name : names) {
ASSERT_EQ(name, "column");
}
auto dumped_names = block.dump_names();
ASSERT_TRUE(dumped_names.find(", ") != std::string::npos);
ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"a", "b", "c"}));
auto dumped_line = block.dump_one_line(1, 2);
ASSERT_EQ(dumped_line, "2 b");
}
TEST(BlockTest, merge_impl_ignore_overflow) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
block.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
auto block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
auto mutable_block = vectorized::MutableBlock::build_mutable_block(&block);
auto st = mutable_block.merge_ignore_overflow(std::move(block2));
ASSERT_FALSE(st.ok());
block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat32>({}));
EXPECT_ANY_THROW(st = mutable_block.merge_impl_ignore_overflow(std::move(block2)));
}
TEST(BlockTest, merge_impl) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
block.insert(
vectorized::ColumnHelper::create_nullable_column_with_name<vectorized::DataTypeFloat64>(
{1.0}, {0}));
block.get_by_position(2).column = nullptr;
auto mutable_block = vectorized::MutableBlock::build_mutable_block(nullptr);
auto st = mutable_block.merge(std::move(block));
ASSERT_TRUE(st.ok()) << st.to_string();
auto block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({}));
st = mutable_block.merge_impl(std::move(block2));
ASSERT_FALSE(st);
block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1});
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"a"}));
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({}));
EXPECT_ANY_THROW(st = mutable_block.merge_impl(std::move(block2)));
}
TEST(BlockTest, ctor) {
TDescriptorTableBuilder builder;
TTupleDescriptorBuilder tuple_builder;
tuple_builder
.add_slot(
TSlotDescriptorBuilder().type(PrimitiveType::TYPE_INT).nullable(false).build())
.add_slot(TSlotDescriptorBuilder().type(PrimitiveType::TYPE_STRING).build());
tuple_builder.build(&builder);
auto t_table = builder.desc_tbl();
ObjectPool pool;
DescriptorTbl* tbl;
auto st = DescriptorTbl::create(&pool, t_table, &tbl);
ASSERT_TRUE(st.ok()) << st.to_string();
auto slots = tbl->get_tuple_descriptor(0)->slots();
std::vector<SlotDescriptor> slot_descs;
for (auto* slot : slots) {
slot_descs.push_back(*slot);
}
vectorized::Block block(slot_descs, 1);
ASSERT_EQ(block.columns(), 2);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_INT);
ASSERT_TRUE(block.get_by_position(1).type->is_nullable());
}
TEST(BlockTest, insert_erase) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
auto column_with_name =
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
EXPECT_ANY_THROW(block.insert(2, column_with_name));
block.insert(0, column_with_name);
ASSERT_EQ(block.columns(), 2);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_STRING);
EXPECT_ANY_THROW(block.insert(3, std::move(column_with_name)));
column_with_name =
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeFloat64>({});
block.insert(0, std::move(column_with_name));
ASSERT_EQ(block.columns(), 3);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_DOUBLE);
std::set<size_t> positions = {0, 2};
block.erase(positions);
ASSERT_EQ(block.columns(), 1);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_STRING);
block.erase_tail(0);
ASSERT_EQ(block.columns(), 0);
column_with_name =
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({});
block.insert(0, column_with_name);
ASSERT_EQ(block.columns(), 1);
block.insert(0, column_with_name);
EXPECT_EQ(block.get_position_by_name("column"), 0);
{
// test const block
const auto const_block = block;
EXPECT_EQ(const_block.get_position_by_name("column2"), -1);
EXPECT_EQ(const_block.get_position_by_name("column"), 0);
}
block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(), "col1"});
block.insert({nullptr, std::make_shared<vectorized::DataTypeString>(), "col2"});
ASSERT_EQ(block.columns(), 3);
}
TEST(BlockTest, check_number_of_rows) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({});
block.insert({nullptr, nullptr, "null_col"});
EXPECT_NO_THROW(block.check_number_of_rows(true));
EXPECT_ANY_THROW(block.check_number_of_rows(false));
block.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"abc"}));
EXPECT_ANY_THROW(block.check_number_of_rows(true));
EXPECT_ANY_THROW(block.columns_bytes());
ASSERT_GT(block.allocated_bytes(), 0);
}
TEST(BlockTest, clear_column_mem_not_keep) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
std::vector<bool> column_keep_flags {false, true};
auto block2 = block;
block2.clear_column_mem_not_keep(column_keep_flags, false);
ASSERT_EQ(block2.get_by_position(0).column->size(), 0);
ASSERT_EQ(block2.get_by_position(1).column->size(), 3);
block.clear_column_mem_not_keep(column_keep_flags, true);
ASSERT_EQ(block.get_by_position(0).column->size(), 3);
ASSERT_EQ(block.get_by_position(1).column->size(), 3);
}
TEST(BlockTest, filter) {
{
// normal filter
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
{1, 1, 1}));
auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block.rows(), 3);
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>(
{0, 1, 0}));
st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block.rows(), 1);
block.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeUInt8>({0}));
auto block2 = block;
st = vectorized::Block::filter_block(&block2, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block2.rows(), 0);
}
{
// nullable filter column
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
block.insert(vectorized::ColumnHelper::create_nullable_column_with_name<
vectorized::DataTypeUInt8>({1, 1, 1}, {0, 1, 0}));
auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block.rows(), 2);
}
{
// const filter column
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1, 1), 3),
std::make_shared<vectorized::DataTypeUInt8>(), "filter"});
auto st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block.rows(), 3);
block.insert({vectorized::ColumnConst::create(vectorized::ColumnUInt8::create(1, 0), 3),
std::make_shared<vectorized::DataTypeUInt8>(), "filter2"});
st = vectorized::Block::filter_block(&block, {0, 1}, 2, 2);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(block.rows(), 0);
}
{
// append_to_block_by_selector
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
vectorized::IColumn::Selector selector(2);
selector[0] = 0;
selector[1] = 2;
vectorized::MutableBlock mutable_block(block.clone_empty());
auto st = block.append_to_block_by_selector(&mutable_block, selector);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(mutable_block.rows(), 2);
ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(0), 1);
ASSERT_EQ(mutable_block.mutable_columns()[0]->get_int(1), 3);
}
}
TEST(BlockTest, add_rows) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
auto block2 = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({4});
block2.insert(
vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>({"lmn"}));
vectorized::MutableBlock mutable_block(&block);
mutable_block.add_row(&block2, 0);
ASSERT_EQ(mutable_block.rows(), 4);
vectorized::MutableBlock mutable_block2(&block2);
auto st = mutable_block2.add_rows(&block, {0, 2});
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(mutable_block2.rows(), 3);
}
TEST(BlockTest, others) {
auto block = vectorized::ColumnHelper::create_block<vectorized::DataTypeInt32>({1, 2, 3});
block.insert(vectorized::ColumnHelper::create_column_with_name<vectorized::DataTypeString>(
{"abc", "efg", "hij"}));
block.shrink_char_type_column_suffix_zero({1, 2});
SipHash hash;
block.update_hash(hash);
auto hash_value = hash.get64();
ASSERT_EQ(hash_value, 15311230698310402164ULL);
auto column = vectorized::ColumnHelper::create_column<vectorized::DataTypeString>(
{"lmn", "opq", "rst"});
block.replace_by_position(1, std::move(column));
SipHash hash2;
block.update_hash(hash2);
hash_value = hash2.get64();
ASSERT_EQ(hash_value, 16980601693179295243ULL);
std::vector<int> result_column_ids = {0, 1};
block.shuffle_columns(result_column_ids);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_INT);
ASSERT_EQ(block.get_by_position(1).type->get_primitive_type(), TYPE_STRING);
result_column_ids = {1, 0};
block.shuffle_columns(result_column_ids);
ASSERT_EQ(block.get_by_position(1).type->get_primitive_type(), TYPE_INT);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_STRING);
result_column_ids = {1};
block.shuffle_columns(result_column_ids);
ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_INT);
ASSERT_EQ(block.columns(), 1);
vectorized::MutableBlock mutable_block(&block);
auto dumped = mutable_block.dump_data();
ASSERT_GT(dumped.size(), 0) << "Dumped data size: " << dumped.size();
auto dumped_json = mutable_block.dump_data_json();
ASSERT_GT(dumped_json.size(), 0) << "Dumped data json size: " << dumped_json.size();
mutable_block.clear_column_data();
ASSERT_EQ(mutable_block.get_column_by_position(0)->size(), 0);
auto dumped_names = mutable_block.dump_names();
ASSERT_TRUE(dumped_names.find("column") != std::string::npos);
block.clear_names();
dumped_names = block.dump_names();
ASSERT_TRUE(dumped_names.empty()) << "Dumped names: " << dumped_names;
}
} // namespace doris