blob: faca751270bf4ec9ae5e79c6d4d99a09b4b9d2c5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gen_cpp/olap_file.pb.h>
#include <gen_cpp/segment_v2.pb.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "common/config.h"
#include "io/fs/local_file_system.h"
#include "olap/cumulative_compaction.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/segment_v2/segment.h"
#include "olap/rowset/segment_v2/segment_writer.h"
#include "olap/storage_engine.h"
#include "olap/tablet_meta.h"
#include "olap/tablet_reader.h"
#include "olap/tablet_schema.h"
#include "runtime/exec_env.h"
#include "util/key_util.h"
#include "vec/olap/block_reader.h"
namespace doris {
static std::string kSegmentDir = "./ut_dir/segments_key_bounds_truncation_test";
class SegmentsKeyBoundsTruncationTest : public testing::Test {
private:
StorageEngine* engine_ref = nullptr;
std::string absolute_dir;
std::unique_ptr<DataDir> data_dir;
int cur_version {2};
public:
void SetUp() override {
auto st = io::global_local_filesystem()->delete_directory(kSegmentDir);
ASSERT_TRUE(st.ok()) << st;
st = io::global_local_filesystem()->create_directory(kSegmentDir);
ASSERT_TRUE(st.ok()) << st;
doris::EngineOptions options;
auto engine = std::make_unique<StorageEngine>(options);
engine_ref = engine.get();
data_dir = std::make_unique<DataDir>(*engine_ref, kSegmentDir);
ASSERT_TRUE(data_dir->update_capacity().ok());
ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
}
void TearDown() override {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kSegmentDir).ok());
engine_ref = nullptr;
ExecEnv::GetInstance()->set_storage_engine(nullptr);
}
void disable_segments_key_bounds_truncation() {
config::segments_key_bounds_truncation_threshold = -1;
}
TabletSchemaSPtr create_schema(int varchar_length) {
TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
TabletSchemaPB tablet_schema_pb;
tablet_schema_pb.set_keys_type(DUP_KEYS);
tablet_schema_pb.set_num_short_key_columns(1);
tablet_schema_pb.set_num_rows_per_row_block(1024);
tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
tablet_schema_pb.set_next_column_unique_id(4);
ColumnPB* column_1 = tablet_schema_pb.add_column();
column_1->set_unique_id(1);
column_1->set_name("k1");
column_1->set_type("VARCHAR");
column_1->set_is_key(true);
column_1->set_length(varchar_length);
column_1->set_index_length(36);
column_1->set_is_nullable(false);
column_1->set_is_bf_column(false);
ColumnPB* column_2 = tablet_schema_pb.add_column();
column_2->set_unique_id(2);
column_2->set_name("c1");
column_2->set_type("INT");
column_2->set_length(4);
column_2->set_index_length(4);
column_2->set_is_nullable(true);
column_2->set_is_key(false);
column_2->set_is_nullable(true);
column_2->set_is_bf_column(false);
tablet_schema->init_from_pb(tablet_schema_pb);
return tablet_schema;
}
TabletSharedPtr create_tablet(const TabletSchema& tablet_schema,
bool enable_unique_key_merge_on_write) {
std::vector<TColumn> cols;
std::unordered_map<uint32_t, uint32_t> col_ordinal_to_unique_id;
for (auto i = 0; i < tablet_schema.num_columns(); i++) {
const TabletColumn& column = tablet_schema.column(i);
TColumn col;
col.column_type.type = TPrimitiveType::INT;
col.__set_column_name(column.name());
col.__set_is_key(column.is_key());
cols.push_back(col);
col_ordinal_to_unique_id[i] = column.unique_id();
}
TTabletSchema t_tablet_schema;
t_tablet_schema.__set_short_key_column_count(tablet_schema.num_short_key_columns());
t_tablet_schema.__set_schema_hash(3333);
if (tablet_schema.keys_type() == UNIQUE_KEYS) {
t_tablet_schema.__set_keys_type(TKeysType::UNIQUE_KEYS);
} else if (tablet_schema.keys_type() == DUP_KEYS) {
t_tablet_schema.__set_keys_type(TKeysType::DUP_KEYS);
} else if (tablet_schema.keys_type() == AGG_KEYS) {
t_tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
}
t_tablet_schema.__set_storage_type(TStorageType::COLUMN);
t_tablet_schema.__set_columns(cols);
TabletMetaSharedPtr tablet_meta {std::make_shared<TabletMeta>(
2, 2, 2, 2, 2, 2, t_tablet_schema, 2, col_ordinal_to_unique_id, UniqueId(1, 2),
TTabletType::TABLET_TYPE_DISK, TCompressionType::LZ4F, 0,
enable_unique_key_merge_on_write)};
TabletSharedPtr tablet {std::make_shared<Tablet>(*engine_ref, tablet_meta, data_dir.get())};
EXPECT_TRUE(tablet->init().ok());
return tablet;
}
RowsetWriterContext create_rowset_writer_context(TabletSchemaSPtr tablet_schema,
const SegmentsOverlapPB& overlap,
uint32_t max_rows_per_segment,
Version version) {
RowsetWriterContext rowset_writer_context;
rowset_writer_context.rowset_id = engine_ref->next_rowset_id();
rowset_writer_context.rowset_type = BETA_ROWSET;
rowset_writer_context.rowset_state = VISIBLE;
rowset_writer_context.tablet_schema = tablet_schema;
rowset_writer_context.tablet_path = kSegmentDir;
rowset_writer_context.version = version;
rowset_writer_context.segments_overlap = overlap;
rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
return rowset_writer_context;
}
void create_and_init_rowset_reader(Rowset* rowset, RowsetReaderContext& context,
RowsetReaderSharedPtr* result) {
auto s = rowset->create_reader(result);
EXPECT_TRUE(s.ok());
EXPECT_TRUE(*result != nullptr);
s = (*result)->init(&context);
EXPECT_TRUE(s.ok());
}
std::vector<vectorized::Block> generate_blocks(
TabletSchemaSPtr tablet_schema, const std::vector<std::vector<std::string>>& data) {
std::vector<vectorized::Block> ret;
int const_value = 999;
for (const auto& segment_rows : data) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (const auto& row : segment_rows) {
columns[0]->insert_data(row.data(), row.size());
columns[1]->insert_data(reinterpret_cast<const char*>(&const_value),
sizeof(const_value));
}
ret.emplace_back(std::move(block));
}
return ret;
}
std::vector<std::vector<std::string>> get_expected_key_bounds(
const std::vector<std::vector<std::string>>& data) {
std::vector<std::vector<std::string>> ret;
for (const auto& rows : data) {
auto& cur = ret.emplace_back();
auto min_key = rows.front();
auto max_key = rows.front();
for (const auto& row : rows) {
if (row < min_key) {
min_key = row;
}
if (row > max_key) {
max_key = row;
}
}
// segments key bounds have marker
min_key = std::string {KEY_NORMAL_MARKER} + min_key;
max_key = std::string {KEY_NORMAL_MARKER} + max_key;
cur.emplace_back(do_trunacte(min_key));
cur.emplace_back(do_trunacte(max_key));
}
return ret;
}
RowsetSharedPtr create_rowset(TabletSchemaSPtr tablet_schema, SegmentsOverlapPB overlap,
const std::vector<vectorized::Block> blocks, int64_t version,
bool is_vertical) {
auto writer_context = create_rowset_writer_context(tablet_schema, overlap, UINT32_MAX,
{version, version});
auto res = RowsetFactory::create_rowset_writer(*engine_ref, writer_context, is_vertical);
EXPECT_TRUE(res.has_value()) << res.error();
auto rowset_writer = std::move(res).value();
uint32_t num_rows = 0;
for (const auto& block : blocks) {
num_rows += block.rows();
EXPECT_TRUE(rowset_writer->add_block(&block).ok());
EXPECT_TRUE(rowset_writer->flush().ok());
}
RowsetSharedPtr rowset;
EXPECT_EQ(Status::OK(), rowset_writer->build(rowset));
EXPECT_EQ(blocks.size(), rowset->rowset_meta()->num_segments());
EXPECT_EQ(num_rows, rowset->rowset_meta()->num_rows());
return rowset;
}
std::string do_trunacte(std::string key) {
if (segments_key_bounds_truncation_enabled()) {
auto threshold = config::segments_key_bounds_truncation_threshold;
if (key.size() > threshold) {
key.resize(threshold);
}
}
return key;
}
bool segments_key_bounds_truncation_enabled() {
return (config::segments_key_bounds_truncation_threshold > 0);
}
void check_key_bounds(const std::vector<std::vector<std::string>>& data,
const std::vector<KeyBoundsPB>& segments_key_bounds) {
// 1. check size
for (const auto& segments_key_bound : segments_key_bounds) {
const auto& min_key = segments_key_bound.min_key();
const auto& max_key = segments_key_bound.max_key();
if (segments_key_bounds_truncation_enabled()) {
EXPECT_LE(min_key.size(), config::segments_key_bounds_truncation_threshold);
EXPECT_LE(max_key.size(), config::segments_key_bounds_truncation_threshold);
}
}
// 2. check content
auto expected_key_bounds = get_expected_key_bounds(data);
for (std::size_t i = 0; i < expected_key_bounds.size(); i++) {
const auto& min_key = segments_key_bounds[i].min_key();
const auto& max_key = segments_key_bounds[i].max_key();
EXPECT_EQ(min_key, expected_key_bounds[i][0]);
EXPECT_EQ(max_key, expected_key_bounds[i][1]);
std::cout << fmt::format("min_key={}, size={}\nmax_key={}, size={}\n",
hexdump(min_key.data(), min_key.size()), min_key.size(),
hexdump(max_key.data(), max_key.size()), max_key.size());
}
}
std::vector<RowsetSharedPtr> create_rowsets(TabletSchemaSPtr tablet_schema,
const std::vector<std::vector<std::string>>& data,
const std::vector<int64_t>& truncate_lengths = {}) {
std::vector<RowsetSharedPtr> rowsets;
for (size_t i {0}; i < data.size(); i++) {
const auto rows = data[i];
if (!truncate_lengths.empty()) {
config::segments_key_bounds_truncation_threshold = truncate_lengths[i];
}
std::vector<std::vector<std::string>> rowset_data {rows};
auto blocks = generate_blocks(tablet_schema, rowset_data);
RowsetSharedPtr rowset =
create_rowset(tablet_schema, NONOVERLAPPING, blocks, cur_version++, false);
std::vector<KeyBoundsPB> segments_key_bounds;
rowset->rowset_meta()->get_segments_key_bounds(&segments_key_bounds);
for (const auto& segments_key_bound : segments_key_bounds) {
const auto& min_key = segments_key_bound.min_key();
const auto& max_key = segments_key_bound.max_key();
LOG(INFO) << fmt::format(
"\n==== rowset_id={}, segment_key_bounds_truncated={} ====\nmin_key={}, "
"size={}\nmax_key={}, size={}\n",
rowset->rowset_id().to_string(), rowset->is_segments_key_bounds_truncated(),
min_key, min_key.size(), max_key, max_key.size());
}
rowsets.push_back(rowset);
RowsetReaderSharedPtr rs_reader;
EXPECT_TRUE(rowset->create_reader(&rs_reader));
}
for (std::size_t i {0}; i < truncate_lengths.size(); i++) {
EXPECT_EQ((truncate_lengths[i] > 0), rowsets[i]->is_segments_key_bounds_truncated());
}
return rowsets;
}
TabletReader::ReaderParams create_reader_params(
TabletSchemaSPtr tablet_schema, const std::vector<std::vector<std::string>>& data,
const std::vector<int64_t>& truncate_lengths = {}) {
TabletReader::ReaderParams reader_params;
std::vector<RowsetSharedPtr> rowsets =
create_rowsets(tablet_schema, data, truncate_lengths);
std::vector<RowSetSplits> rs_splits;
for (size_t i {0}; i < rowsets.size(); i++) {
RowsetReaderSharedPtr rs_reader;
EXPECT_TRUE(rowsets[i]->create_reader(&rs_reader));
RowSetSplits rs_split;
rs_split.rs_reader = rs_reader;
rs_splits.emplace_back(rs_split);
}
reader_params.rs_splits = std::move(rs_splits);
return reader_params;
}
};
TEST_F(SegmentsKeyBoundsTruncationTest, CompareFuncTest) {
// test `Slice::lhs_is_strictly_less_than_rhs`
// enumerating all possible combinations
// this test is reduntant, n = 3 is enough
constexpr int n = 8;
std::vector<std::string> datas;
for (int l = 1; l <= n; l++) {
for (int x = 0; x < (1 << l); x++) {
datas.emplace_back(fmt::format("{:0{width}b}", x, fmt::arg("width", l)));
}
}
std::cout << "datas.size()=" << datas.size() << "\n";
int count1 {0}, count2 {0}, total {0};
for (size_t i = 0; i < datas.size(); i++) {
for (size_t j = 0; j < datas.size(); j++) {
Slice X {datas[i]};
Slice Y {datas[j]};
for (int l1 = 0; l1 <= n; l1++) {
bool X_is_truncated = (l1 != 0);
Slice a {X};
if (X_is_truncated && X.get_size() >= l1) {
a.truncate(l1);
}
for (int l2 = 0; l2 <= n; l2++) {
bool Y_is_truncated = (l2 != 0);
Slice b {Y};
if (Y_is_truncated && Y.get_size() >= l2) {
b.truncate(l2);
}
bool res1 = Slice::lhs_is_strictly_less_than_rhs(a, X_is_truncated, b,
Y_is_truncated);
bool res2 = (X.compare(Y) < 0);
++total;
if (res1 && res2) {
++count1;
}
if (res2) {
++count2;
}
EXPECT_FALSE(res1 && !res2) << fmt::format(
"X={}, a={}, l1={}, Y={}, b={}, l2={}, res1={}, res2={}", X.to_string(),
a.to_string(), l1, Y.to_string(), b.to_string(), l2, res1, res2);
}
}
}
}
std::cout << fmt::format("count1={}, count2={}, count1/count2={}, total={}\n", count1, count2,
double(count1) / count2, total);
}
TEST_F(SegmentsKeyBoundsTruncationTest, BasicTruncationTest) {
{
// 1. don't do segments key bounds truncation when the config is off
config::segments_key_bounds_truncation_threshold = -1;
auto tablet_schema = create_schema(100);
std::vector<std::vector<std::string>> data {{std::string(2, 'x'), std::string(3, 'y')},
{std::string(4, 'a'), std::string(15, 'b')},
{std::string(18, 'c'), std::string(5, 'z')},
{std::string(20, '0'), std::string(22, '1')}};
auto blocks = generate_blocks(tablet_schema, data);
RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING, blocks, 2, false);
auto rowset_meta = rowset->rowset_meta();
EXPECT_EQ(false, rowset_meta->is_segments_key_bounds_truncated());
std::vector<KeyBoundsPB> segments_key_bounds;
rowset_meta->get_segments_key_bounds(&segments_key_bounds);
EXPECT_EQ(segments_key_bounds.size(), data.size());
check_key_bounds(data, segments_key_bounds);
}
{
// 2. do segments key bounds truncation when the config is on
config::segments_key_bounds_truncation_threshold = 10;
auto tablet_schema = create_schema(100);
std::vector<std::vector<std::string>> data {{std::string(2, 'x'), std::string(3, 'y')},
{std::string(4, 'a'), std::string(15, 'b')},
{std::string(18, 'c'), std::string(5, 'z')},
{std::string(20, '0'), std::string(22, '1')}};
auto blocks = generate_blocks(tablet_schema, data);
RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING, blocks, 2, false);
auto rowset_meta = rowset->rowset_meta();
EXPECT_EQ(true, rowset_meta->is_segments_key_bounds_truncated());
std::vector<KeyBoundsPB> segments_key_bounds;
rowset_meta->get_segments_key_bounds(&segments_key_bounds);
EXPECT_EQ(segments_key_bounds.size(), data.size());
check_key_bounds(data, segments_key_bounds);
}
{
// 3. segments_key_bounds_truncated should be set to false if no actual truncation happend
config::segments_key_bounds_truncation_threshold = 100;
auto tablet_schema = create_schema(100);
std::vector<std::vector<std::string>> data {{std::string(2, 'x'), std::string(3, 'y')},
{std::string(4, 'a'), std::string(15, 'b')},
{std::string(18, 'c'), std::string(5, 'z')},
{std::string(20, '0'), std::string(22, '1')}};
auto blocks = generate_blocks(tablet_schema, data);
RowsetSharedPtr rowset = create_rowset(tablet_schema, NONOVERLAPPING, blocks, 2, false);
auto rowset_meta = rowset->rowset_meta();
EXPECT_EQ(false, rowset_meta->is_segments_key_bounds_truncated());
}
}
TEST_F(SegmentsKeyBoundsTruncationTest, BlockReaderJudgeFuncTest) {
auto tablet_schema = create_schema(100);
{
// all rowsets are truncated to same size
// keys are distinctable from any index
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccc", "dddddd"},
{"eeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
disable_segments_key_bounds_truncation();
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can still determine that segments are non ascending after truncation
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
{
// all rowsets are truncated to same size
// keys are distinctable from any index before truncation
// some keys are not comparable after truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccccccccc", "ccdddddddd"},
{"cceeeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
disable_segments_key_bounds_truncation();
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::segments_key_bounds_truncation_threshold = 6;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can not determine wether rowset 2 and rowset 3 are mono ascending
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
{
// all rowsets are truncated to same size
// keys are not mono ascending before truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"bbbbb", "cccccccc"},
{"cccccccc", "xxxxxxx"},
{"xxxxxxx", "yyyyyyyy"}};
{
disable_segments_key_bounds_truncation();
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
{
// some rowsets are truncated, some are not
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbccccccc"},
{"bbbbbbddddddd", "dddddd"}};
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 9});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 4});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {9, -1});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, -1});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
{
// some rowsets are truncated, some are not, truncated lengths may be different
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 4, -1, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, -1, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, -1, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, -1, 8, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 4, 8, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, 9, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 3, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
}
TEST_F(SegmentsKeyBoundsTruncationTest, OrderedCompactionTest) {
auto tablet_schema = create_schema(100);
config::enable_ordered_data_compaction = true;
config::ordered_data_compaction_min_segment_size = 1;
{
disable_segments_key_bounds_truncation();
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data);
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {4, 4, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {4, 8, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {8, 4, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {8, 9, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {8, -1, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {-1, 9, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_TRUE(cu_compaction.handle_ordered_data_compaction());
EXPECT_EQ(cu_compaction._input_rowsets.size(), data.size());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {-1, 4, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
}
{
TabletSharedPtr tablet = create_tablet(*tablet_schema, false);
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet->tablet_path()).ok());
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbcccccc"},
{"bbbbbddddddd", "dddddd"},
{"eeeeeee", "fffffffff"},
{"gggggggg", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
auto input_rowsets = create_rowsets(tablet_schema, data, {4, -1, 4, 4, 4});
CumulativeCompaction cu_compaction(*engine_ref, tablet);
cu_compaction._input_rowsets = std::move(input_rowsets);
EXPECT_FALSE(cu_compaction.handle_ordered_data_compaction());
}
}
} // namespace doris