blob: d637175f05272aee6fd6a14fe18d479c5809c7aa [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include "exprs/function/is_not_null.h"
#include "exprs/function/is_null.h"
#include "storage/index/index_file_reader.h"
#include "storage/rowset/rowset_factory.h"
#include "storage/storage_engine.h"
namespace doris {
using namespace doris;
constexpr static uint32_t MAX_PATH_LEN = 1024;
constexpr static std::string_view dest_dir = "./ut_dir/inverted_index_test";
constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
static int64_t inc_id = 0;
class FunctionIsNullTest : public ::testing::Test {
protected:
void SetUp() override {
// absolute dir
char buffer[MAX_PATH_LEN];
EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
_curreent_dir = std::string(buffer);
_absolute_dir = _curreent_dir + std::string(dest_dir);
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
// tmp dir
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
std::vector<StorePath> paths;
paths.emplace_back(std::string(tmp_dir), 1024000000);
auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
EXPECT_TRUE(tmp_file_dirs->init().ok());
ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
// storage engine
doris::EngineOptions options;
auto engine = std::make_unique<StorageEngine>(options);
_engine_ref = engine.get();
_data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
static_cast<void>(_data_dir->update_capacity());
EXPECT_TRUE(_data_dir->init(true).ok());
ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
// tablet_schema
_schema_pb.set_keys_type(KeysType::DUP_KEYS);
_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
construct_column(_schema_pb.add_column(), _schema_pb.add_index(), 10000, "key_index", 0,
"INT", "key");
construct_column(_schema_pb.add_column(), _schema_pb.add_index(), 10001, "v1_index", 1,
"STRING", "v1");
// Initialize cache
int64_t inverted_index_cache_limit = 1024 * 1024 * 1024;
_inverted_index_searcher_cache = std::unique_ptr<segment_v2::InvertedIndexSearcherCache>(
InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit, 1));
_inverted_index_query_cache = std::unique_ptr<segment_v2::InvertedIndexQueryCache>(
InvertedIndexQueryCache::create_global_cache(inverted_index_cache_limit, 1));
ExecEnv::GetInstance()->set_inverted_index_searcher_cache(
_inverted_index_searcher_cache.get());
ExecEnv::GetInstance()->_inverted_index_query_cache = _inverted_index_query_cache.get();
}
void TearDown() override {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
_engine_ref = nullptr;
ExecEnv::GetInstance()->set_storage_engine(nullptr);
}
void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, int64_t index_id,
const std::string& index_name, int32_t col_unique_id,
const std::string& column_type, const std::string& column_name,
bool parser = false) {
column_pb->set_unique_id(col_unique_id);
column_pb->set_name(column_name);
column_pb->set_type(column_type);
column_pb->set_is_key(false);
column_pb->set_is_nullable(true);
tablet_index->set_index_id(index_id);
tablet_index->set_index_name(index_name);
tablet_index->set_index_type(IndexType::INVERTED);
tablet_index->add_col_unique_id(col_unique_id);
if (parser) {
auto* properties = tablet_index->mutable_properties();
(*properties)[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_UNICODE;
}
}
RowsetWriterContext rowset_writer_context() {
RowsetWriterContext context;
RowsetId rowset_id;
rowset_id.init(inc_id);
context.rowset_id = rowset_id;
context.rowset_type = BETA_ROWSET;
context.data_dir = _data_dir.get();
context.rowset_state = VISIBLE;
context.tablet_schema = _tablet_schema;
context.tablet_path = _tablet->tablet_path();
context.version = Version(inc_id, inc_id);
context.max_rows_per_segment = 200;
inc_id++;
return context;
}
FunctionIsNullTest() = default;
~FunctionIsNullTest() override = default;
private:
TabletSchemaSPtr _tablet_schema = nullptr;
StorageEngine* _engine_ref = nullptr;
std::unique_ptr<DataDir> _data_dir = nullptr;
TabletSharedPtr _tablet = nullptr;
std::string _absolute_dir;
std::string _curreent_dir;
TabletSchemaPB _schema_pb;
std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
std::unique_ptr<InvertedIndexQueryCache> _inverted_index_query_cache;
};
TEST_F(FunctionIsNullTest, gc_binlogs_test) {
_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
_tablet_schema.reset(new TabletSchema);
_tablet_schema->init_from_pb(_schema_pb);
TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
tablet_meta->set_tablet_uid(TabletUid(20, 20));
tablet_meta.get()->_tablet_id = 200;
_tablet.reset(new Tablet(*_engine_ref, tablet_meta, _data_dir.get()));
EXPECT_TRUE(_tablet->init().ok());
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
RowsetSharedPtr rowset;
const auto& res =
RowsetFactory::create_rowset_writer(*_engine_ref, rowset_writer_context(), false);
EXPECT_TRUE(res.has_value()) << res.error();
const auto& rowset_writer = res.value();
Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();
Field key = Field::create_field<TYPE_INT>(10);
Field v1 = Field::create_field<TYPE_STRING>("v1");
Field null_field = Field();
columns[0]->insert(key);
columns[0]->insert(null_field);
columns[0]->insert(key);
columns[1]->insert(v1);
columns[1]->insert(null_field);
columns[1]->insert(null_field);
EXPECT_TRUE(rowset_writer->add_block(&block).ok());
EXPECT_TRUE(rowset_writer->flush().ok());
EXPECT_TRUE(rowset_writer->build(rowset).ok());
auto check_result = [&](InvertedIndexReader* reader, bool is_null, int expected_result) {
OlapReaderStatistics stats;
RuntimeState runtime_state;
io::IOContext io_ctx;
auto context = std::make_shared<segment_v2::IndexQueryContext>();
context->io_ctx = &io_ctx;
context->stats = &stats;
context->runtime_state = &runtime_state;
std::unique_ptr<IndexIterator> iter;
EXPECT_TRUE(reader->new_iterator(&iter).ok());
EXPECT_TRUE(iter);
iter->set_context(context);
ColumnsWithTypeAndName arguments;
std::vector<IndexFieldNameAndTypePair> data_type_with_names;
std::vector<segment_v2::IndexIterator*> iterators;
iterators.push_back(iter.get());
segment_v2::InvertedIndexResultBitmap bitmap_result;
if (is_null) {
auto is_null_function = std::make_shared<FunctionIsNull>();
EXPECT_TRUE(is_null_function
->evaluate_inverted_index(arguments, data_type_with_names,
iterators, 3, nullptr, bitmap_result)
.ok());
EXPECT_TRUE(!bitmap_result.is_empty());
EXPECT_EQ(expected_result, bitmap_result.get_data_bitmap()->cardinality());
} else {
auto is_not_null_function = std::make_shared<FunctionIsNotNull>();
EXPECT_TRUE(is_not_null_function
->evaluate_inverted_index(arguments, data_type_with_names,
iterators, 3, nullptr, bitmap_result)
.ok());
EXPECT_TRUE(!bitmap_result.is_empty());
EXPECT_EQ(expected_result, bitmap_result.get_data_bitmap()->cardinality());
}
};
for (int i = 0; i < rowset->num_segments(); i++) {
auto segment_path = rowset->segment_path(i);
EXPECT_TRUE(segment_path.has_value());
std::string index_prefix = std::string(
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path.value()));
auto index_file_reader = std::make_shared<IndexFileReader>(
io::global_local_filesystem(), index_prefix, InvertedIndexStorageFormatPB::V2);
EXPECT_TRUE(index_file_reader->init().ok());
auto index_metas = _tablet_schema->inverted_indexs(0);
EXPECT_FALSE(index_metas.empty());
auto index_meta = index_metas[0];
auto bkd_reader = BkdIndexReader::create_shared(index_meta, index_file_reader);
EXPECT_TRUE(bkd_reader);
check_result(bkd_reader.get(), true, 1);
check_result(bkd_reader.get(), false, 2);
auto index_metas2 = _tablet_schema->inverted_indexs(1);
EXPECT_FALSE(index_metas2.empty());
auto index_meta2 = index_metas2[0];
auto string_reader =
StringTypeInvertedIndexReader::create_shared(index_meta2, index_file_reader);
EXPECT_TRUE(string_reader);
check_result(string_reader.get(), true, 2);
check_result(string_reader.get(), false, 1);
}
}
// Test corner cases for evaluate_inverted_index function
TEST_F(FunctionIsNullTest, evaluate_inverted_index_corner_cases) {
OlapReaderStatistics stats;
RuntimeState runtime_state;
io::IOContext io_ctx;
auto context = std::make_shared<segment_v2::IndexQueryContext>();
context->io_ctx = &io_ctx;
context->stats = &stats;
context->runtime_state = &runtime_state;
ColumnsWithTypeAndName arguments;
std::vector<IndexFieldNameAndTypePair> data_type_with_names;
auto is_null_function = std::make_shared<FunctionIsNull>();
auto is_not_null_function = std::make_shared<FunctionIsNotNull>();
// Test case 1: empty iterators
{
std::vector<segment_v2::IndexIterator*> iterators;
segment_v2::InvertedIndexResultBitmap bitmap_result;
EXPECT_TRUE(is_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result)
.ok());
EXPECT_TRUE(bitmap_result.is_empty());
}
{
std::vector<segment_v2::IndexIterator*> iterators;
segment_v2::InvertedIndexResultBitmap bitmap_result;
EXPECT_TRUE(is_not_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result)
.ok());
EXPECT_TRUE(bitmap_result.is_empty());
}
// Test case 2: iterators[0] == nullptr
{
std::vector<segment_v2::IndexIterator*> iterators;
iterators.push_back(nullptr);
segment_v2::InvertedIndexResultBitmap bitmap_result;
EXPECT_TRUE(is_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result)
.ok());
EXPECT_TRUE(bitmap_result.is_empty());
}
{
std::vector<segment_v2::IndexIterator*> iterators;
iterators.push_back(nullptr);
segment_v2::InvertedIndexResultBitmap bitmap_result;
EXPECT_TRUE(is_not_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result)
.ok());
EXPECT_TRUE(bitmap_result.is_empty());
}
// Test case 3 & 4: iterator without null bitmap or null_bitmap is nullptr
// These cases require creating a mock iterator with has_null() returning false
// or read_null_bitmap() returning nullptr
// We'll create a simple test using real data but verifying the behavior
_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
_tablet_schema.reset(new TabletSchema);
_tablet_schema->init_from_pb(_schema_pb);
TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
tablet_meta->set_tablet_uid(TabletUid(30, 30));
tablet_meta.get()->_tablet_id = 300;
_tablet.reset(new Tablet(*_engine_ref, tablet_meta, _data_dir.get()));
EXPECT_TRUE(_tablet->init().ok());
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
RowsetSharedPtr rowset;
const auto& res =
RowsetFactory::create_rowset_writer(*_engine_ref, rowset_writer_context(), false);
EXPECT_TRUE(res.has_value()) << res.error();
const auto& rowset_writer = res.value();
Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();
// Create block with NO null values to test the scenario where
// iterator might not have null bitmap or it's nullptr
Field key1 = Field::create_field<TYPE_INT>(10);
Field key2 = Field::create_field<TYPE_INT>(20);
Field key3 = Field::create_field<TYPE_INT>(30);
Field v1 = Field::create_field<TYPE_STRING>("value1");
Field v2 = Field::create_field<TYPE_STRING>("value2");
Field v3 = Field::create_field<TYPE_STRING>("value3");
columns[0]->insert(key1);
columns[0]->insert(key2);
columns[0]->insert(key3);
columns[1]->insert(v1);
columns[1]->insert(v2);
columns[1]->insert(v3);
EXPECT_TRUE(rowset_writer->add_block(&block).ok());
EXPECT_TRUE(rowset_writer->flush().ok());
EXPECT_TRUE(rowset_writer->build(rowset).ok());
// Test with data that has no nulls
// This will exercise the code path where has_null() might return false
// or null_bitmap might be nullptr
for (int i = 0; i < rowset->num_segments(); i++) {
auto segment_path = rowset->segment_path(i);
EXPECT_TRUE(segment_path.has_value());
std::string index_prefix = std::string(
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path.value()));
auto index_file_reader = std::make_shared<IndexFileReader>(
io::global_local_filesystem(), index_prefix, InvertedIndexStorageFormatPB::V2);
EXPECT_TRUE(index_file_reader->init().ok());
auto index_metas = _tablet_schema->inverted_indexs(0);
EXPECT_FALSE(index_metas.empty());
auto index_meta = index_metas[0];
auto bkd_reader = BkdIndexReader::create_shared(index_meta, index_file_reader);
EXPECT_TRUE(bkd_reader);
std::unique_ptr<IndexIterator> iter;
EXPECT_TRUE(bkd_reader->new_iterator(&iter).ok());
EXPECT_TRUE(iter);
iter->set_context(context);
std::vector<segment_v2::IndexIterator*> iterators;
iterators.push_back(iter.get());
segment_v2::InvertedIndexResultBitmap bitmap_result1;
EXPECT_TRUE(is_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result1)
.ok());
// When there's no null data, the result should be empty or have 0 cardinality
// depending on whether has_null() returns false
segment_v2::InvertedIndexResultBitmap bitmap_result2;
EXPECT_TRUE(is_not_null_function
->evaluate_inverted_index(arguments, data_type_with_names, iterators, 3,
nullptr, bitmap_result2)
.ok());
// Similar test for is_not_null
}
}
} // namespace doris