blob: 85ec430fd1a8ecb833f75c4e565661ca9163537d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/primary_key_index.h"
#include <gen_cpp/segment_v2.pb.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <algorithm>
#include <string>
#include <vector>
#include "gtest/gtest_pred_impl.h"
#include "io/fs/file_writer.h"
#include "io/fs/local_file_system.h"
#include "olap/types.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_factory.hpp"
namespace doris {
using namespace ErrorCode;
class PrimaryKeyIndexTest : public testing::Test {
public:
void SetUp() override {
auto st = io::global_local_filesystem()->delete_directory(kTestDir);
ASSERT_TRUE(st.ok()) << st;
st = io::global_local_filesystem()->create_directory(kTestDir);
ASSERT_TRUE(st.ok()) << st;
}
void TearDown() override {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
}
private:
const std::string kTestDir = "./ut_dir/primary_key_index_test";
};
TEST_F(PrimaryKeyIndexTest, builder) {
std::string filename = kTestDir + "/builder";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys;
for (int i = 1000; i < 10000; i += 2) {
keys.push_back(std::to_string(i));
static_cast<void>(builder.add_item(std::to_string(i)));
num_rows++;
}
EXPECT_EQ("1000", builder.min_key().to_string());
EXPECT_EQ("9998", builder.max_key().to_string());
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());
PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());
std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
// find a non-existing key "8701"
{
std::string key("8701");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(3851, row_id);
}
// find prefix "87"
{
std::string key("87");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(3850, row_id);
}
// find prefix "9999"
{
std::string key("9999");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
// read all key
{
int32_t remaining = num_rows;
std::string last_key;
int num_batch = 0;
int batch_size = 1024;
while (remaining > 0) {
std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
EXPECT_TRUE(index_reader.new_iterator(&iter, nullptr).ok());
size_t num_to_read = std::min(batch_size, remaining);
auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
index_reader.type_info()->type(), 1, 0);
auto index_column = index_type->create_column();
Slice last_key_slice(last_key);
EXPECT_TRUE(iter->seek_at_or_after(&last_key_slice, &exact_match).ok());
size_t num_read = num_to_read;
EXPECT_TRUE(iter->next_batch(&num_read, index_column).ok());
EXPECT_EQ(num_to_read, num_read);
last_key = index_column->get_data_at(num_read - 1).to_string();
// exclude last_key, last_key will be read in next batch.
if (num_read == batch_size && num_read != remaining) {
num_read -= 1;
}
for (size_t i = 0; i < num_read; i++) {
Slice key =
Slice(index_column->get_data_at(i).data, index_column->get_data_at(i).size);
DCHECK_EQ(keys[i + (batch_size - 1) * num_batch], key.to_string());
}
num_batch++;
remaining -= num_read;
}
}
}
TEST_F(PrimaryKeyIndexTest, multiple_pages) {
std::string filename = kTestDir + "/multiple_pages";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
config::primary_key_data_page_size = 5 * 5;
PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
EXPECT_GT(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());
PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());
std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_to_ordinal(i);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
{
auto status = index_iterator->seek_to_ordinal(10);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(10, row_id);
}
std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
std::string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}
TEST_F(PrimaryKeyIndexTest, single_page) {
std::string filename = kTestDir + "/single_page";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
config::primary_key_data_page_size = 32768;
PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
EXPECT_EQ(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());
PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta, nullptr).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta, nullptr).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());
std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator, nullptr).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
std::string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}
} // namespace doris