blob: f5b53d494390ac6293e903979c2aa383bd4607b6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/tablet_schema.h"
#include <gtest/gtest.h>
#include <set>
#include "gen_cpp/Descriptors_types.h"
#include "gen_cpp/Types_types.h"
#include "gen_cpp/olap_file.pb.h"
#include "vec/json/path_in_data.h"
namespace doris {
class TabletSchemaTest : public testing::Test {
protected:
void SetUp() override {}
};
TEST_F(TabletSchemaTest, test_tablet_column_init_from_pb) {
ColumnPB column_pb;
column_pb.set_unique_id(1001);
column_pb.set_name("test_column");
column_pb.set_type("INT");
column_pb.set_is_key(true);
column_pb.set_is_nullable(false);
column_pb.set_length(4);
column_pb.set_aggregation("NONE");
column_pb.set_precision(10);
column_pb.set_frac(0);
column_pb.set_is_bf_column(false);
column_pb.set_visible(true);
column_pb.set_variant_max_subcolumns_count(100);
column_pb.set_pattern_type(PatternTypePB::MATCH_NAME_GLOB);
column_pb.set_variant_enable_typed_paths_to_sparse(true);
TabletColumn tablet_column;
tablet_column.init_from_pb(column_pb);
EXPECT_EQ(1001, tablet_column.unique_id());
EXPECT_EQ("test_column", tablet_column.name());
EXPECT_EQ(FieldType::OLAP_FIELD_TYPE_INT, tablet_column.type());
EXPECT_TRUE(tablet_column.is_key());
EXPECT_FALSE(tablet_column.is_nullable());
EXPECT_EQ(4, tablet_column.length());
EXPECT_EQ(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, tablet_column.aggregation());
EXPECT_EQ(10, tablet_column.precision());
EXPECT_EQ(0, tablet_column.frac());
EXPECT_FALSE(tablet_column.is_bf_column());
EXPECT_TRUE(tablet_column.visible());
EXPECT_EQ(100, tablet_column.variant_max_subcolumns_count());
EXPECT_EQ(PatternTypePB::MATCH_NAME_GLOB, tablet_column.pattern_type());
EXPECT_TRUE(tablet_column.variant_enable_typed_paths_to_sparse());
}
TEST_F(TabletSchemaTest, test_tablet_column_init_from_thrift) {
TColumn tcolumn;
tcolumn.__set_column_name("thrift_column");
TColumnType column_type;
column_type.__set_type(TPrimitiveType::STRING);
column_type.__set_variant_max_subcolumns_count(100);
column_type.__set_len(255);
tcolumn.__set_column_type(column_type);
tcolumn.__set_is_key(false);
tcolumn.__set_is_allow_null(true);
tcolumn.__set_aggregation_type(TAggregationType::SUM);
tcolumn.__set_is_bloom_filter_column(true);
tcolumn.__set_visible(false);
tcolumn.__set_default_value("default_test");
tcolumn.__set_variant_enable_typed_paths_to_sparse(false);
tcolumn.__set_pattern_type(TPatternType::MATCH_NAME_GLOB);
TabletColumn tablet_column;
tablet_column.init_from_thrift(tcolumn);
EXPECT_EQ("thrift_column", tablet_column.name());
EXPECT_EQ(FieldType::OLAP_FIELD_TYPE_STRING, tablet_column.type());
EXPECT_FALSE(tablet_column.is_key());
EXPECT_TRUE(tablet_column.is_nullable());
EXPECT_EQ(259, tablet_column.length());
EXPECT_EQ(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, tablet_column.aggregation());
EXPECT_TRUE(tablet_column.is_bf_column());
EXPECT_FALSE(tablet_column.visible());
EXPECT_TRUE(tablet_column.has_default_value());
EXPECT_EQ("default_test", tablet_column.default_value());
EXPECT_FALSE(tablet_column.variant_enable_typed_paths_to_sparse());
}
TEST_F(TabletSchemaTest, test_tablet_index_init_from_pb) {
TabletIndexPB index_pb;
index_pb.set_index_id(12345);
index_pb.set_index_name("test_inverted_index");
index_pb.set_index_type(IndexType::INVERTED);
index_pb.add_col_unique_id(1001);
index_pb.add_col_unique_id(1002);
auto* properties = index_pb.mutable_properties();
(*properties)["parser"] = "standard";
(*properties)["support_phrase"] = "true";
(*properties)["char_filter"] = "char_replace";
TabletIndex tablet_index;
tablet_index.init_from_pb(index_pb);
EXPECT_EQ(12345, tablet_index.index_id());
EXPECT_EQ("test_inverted_index", tablet_index.index_name());
EXPECT_EQ(IndexType::INVERTED, tablet_index.index_type());
EXPECT_TRUE(tablet_index.is_inverted_index());
const auto& col_uids = tablet_index.col_unique_ids();
EXPECT_EQ(2, col_uids.size());
EXPECT_EQ(1001, col_uids[0]);
EXPECT_EQ(1002, col_uids[1]);
const auto& props = tablet_index.properties();
EXPECT_EQ("standard", props.at("parser"));
EXPECT_EQ("true", props.at("support_phrase"));
EXPECT_EQ("char_replace", props.at("char_filter"));
}
TEST_F(TabletSchemaTest, test_tablet_index_init_from_thrift) {
TOlapTableIndex tindex;
tindex.__set_index_id(54321);
tindex.__set_index_name("thrift_index");
tindex.__set_index_type(TIndexType::INVERTED);
tindex.__set_columns({"col1", "col2"});
std::map<std::string, std::string> properties;
properties["parser"] = "unicode";
properties["gram_size"] = "2";
properties["bf_size"] = "1024";
tindex.__set_properties(properties);
std::vector<int32_t> column_uids = {2001, 2002};
TabletIndex tablet_index;
tablet_index.init_from_thrift(tindex, column_uids);
EXPECT_EQ(54321, tablet_index.index_id());
EXPECT_EQ("thrift_index", tablet_index.index_name());
EXPECT_EQ(IndexType::INVERTED, tablet_index.index_type());
EXPECT_TRUE(tablet_index.is_inverted_index());
const auto& col_uids = tablet_index.col_unique_ids();
EXPECT_EQ(2, col_uids.size());
EXPECT_EQ(2001, col_uids[0]);
EXPECT_EQ(2002, col_uids[1]);
// Test gram size and bf size parsing
EXPECT_EQ(2, tablet_index.get_gram_size());
EXPECT_EQ(1024, tablet_index.get_gram_bf_size());
const auto& props = tablet_index.properties();
EXPECT_EQ("unicode", props.at("parser"));
}
TEST_F(TabletSchemaTest, test_tablet_schema_inverted_indexs) {
TabletSchema schema;
TabletColumn col1;
col1.set_unique_id(1001);
col1.set_name("col1");
col1.set_type(FieldType::OLAP_FIELD_TYPE_VARCHAR);
schema.append_column(col1);
TabletColumn col2;
col2.set_unique_id(1002);
col2.set_name("col2");
col2.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(col2);
TabletIndex index1;
index1.init_from_pb([&]() {
TabletIndexPB pb;
pb.set_index_id(1);
pb.set_index_name("idx1");
pb.set_index_type(IndexType::INVERTED);
pb.add_col_unique_id(1001);
return pb;
}());
TabletIndex index2;
index2.init_from_pb([&]() {
TabletIndexPB pb;
pb.set_index_id(2);
pb.set_index_name("idx2");
pb.set_index_type(IndexType::INVERTED);
pb.add_col_unique_id(1002);
return pb;
}());
schema.append_index(std::move(index1));
schema.append_index(std::move(index2));
auto inverted_indexes_col1 = schema.inverted_indexs(col1);
EXPECT_EQ(1, inverted_indexes_col1.size());
EXPECT_EQ("idx1", inverted_indexes_col1[0]->index_name());
auto inverted_indexes_col2 = schema.inverted_indexs(col2);
EXPECT_EQ(1, inverted_indexes_col2.size());
EXPECT_EQ("idx2", inverted_indexes_col2[0]->index_name());
auto inverted_indexes_by_uid = schema.inverted_indexs(1001);
EXPECT_EQ(1, inverted_indexes_by_uid.size());
EXPECT_EQ("idx1", inverted_indexes_by_uid[0]->index_name());
EXPECT_TRUE(schema.has_inverted_index());
auto all_inverted_indexes = schema.inverted_indexes();
EXPECT_EQ(2, all_inverted_indexes.size());
}
TEST_F(TabletSchemaTest, test_tablet_schema_update_indexes_from_thrift) {
TabletSchema schema;
TabletColumn col1;
col1.set_unique_id(3001);
col1.set_name("text_col");
col1.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(col1);
TabletColumn col2;
col2.set_unique_id(3002);
col2.set_name("varchar_col");
col2.set_type(FieldType::OLAP_FIELD_TYPE_VARCHAR);
schema.append_column(col2);
std::vector<TOlapTableIndex> tindexes;
TOlapTableIndex tindex1;
tindex1.index_id = 101;
tindex1.index_name = "text_idx";
tindex1.index_type = TIndexType::INVERTED;
tindex1.columns = {"text_col"};
tindex1.properties["parser"] = "standard";
tindexes.push_back(tindex1);
TOlapTableIndex tindex2;
tindex2.index_id = 102;
tindex2.index_name = "varchar_idx";
tindex2.index_type = TIndexType::INVERTED;
tindex2.columns = {"varchar_col"};
tindex2.properties["support_phrase"] = "false";
tindexes.push_back(tindex2);
schema.update_indexes_from_thrift(tindexes);
EXPECT_TRUE(schema.has_inverted_index());
auto all_inverted_indexes = schema.inverted_indexes();
EXPECT_EQ(2, all_inverted_indexes.size());
bool found_text_idx = false;
bool found_varchar_idx = false;
for (const auto* index : all_inverted_indexes) {
if (index->index_name() == "text_idx") {
found_text_idx = true;
EXPECT_EQ(101, index->index_id());
EXPECT_EQ(IndexType::INVERTED, index->index_type());
} else if (index->index_name() == "varchar_idx") {
found_varchar_idx = true;
EXPECT_EQ(102, index->index_id());
EXPECT_EQ(IndexType::INVERTED, index->index_type());
}
}
EXPECT_TRUE(found_text_idx);
EXPECT_TRUE(found_varchar_idx);
}
TEST_F(TabletSchemaTest, test_tablet_schema_append_index) {
TabletSchema schema;
TabletColumn col;
col.set_unique_id(4001);
col.set_name("test_col");
col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(col);
TabletIndex index;
TabletIndexPB index_pb;
index_pb.set_index_id(201);
index_pb.set_index_name("append_test_idx");
index_pb.set_index_type(IndexType::INVERTED);
index_pb.add_col_unique_id(4001);
index.init_from_pb(index_pb);
schema.append_index(std::move(index));
EXPECT_TRUE(schema.has_inverted_index());
auto indexes = schema.inverted_indexes();
EXPECT_EQ(1, indexes.size());
EXPECT_EQ("append_test_idx", indexes[0]->index_name());
EXPECT_EQ(201, indexes[0]->index_id());
}
TEST_F(TabletSchemaTest, test_tablet_column_protobuf_roundtrip) {
TabletColumn original;
original.set_unique_id(6001);
original.set_name("variant_col");
original.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
original.set_is_nullable(true);
original.set_variant_max_subcolumns_count(500);
ColumnPB column_pb;
original.to_schema_pb(&column_pb);
TabletColumn deserialized;
deserialized.init_from_pb(column_pb);
EXPECT_EQ(original.unique_id(), deserialized.unique_id());
EXPECT_EQ(original.name(), deserialized.name());
EXPECT_EQ(original.type(), deserialized.type());
EXPECT_EQ(original.is_nullable(), deserialized.is_nullable());
EXPECT_EQ(original.variant_max_subcolumns_count(), deserialized.variant_max_subcolumns_count());
EXPECT_EQ(original.pattern_type(), deserialized.pattern_type());
EXPECT_EQ(original.variant_enable_typed_paths_to_sparse(),
deserialized.variant_enable_typed_paths_to_sparse());
}
TEST_F(TabletSchemaTest, test_tablet_schema_remove_and_clear_index) {
TabletSchema schema;
TabletColumn col;
col.set_unique_id(8001);
col.set_name("test_col");
col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(col);
for (int i = 0; i < 3; ++i) {
TabletIndex index;
TabletIndexPB index_pb;
index_pb.set_index_id(400 + i);
index_pb.set_index_name("remove_test_idx_" + std::to_string(i));
index_pb.set_index_type(IndexType::INVERTED);
index_pb.add_col_unique_id(8001);
index.init_from_pb(index_pb);
schema.append_index(std::move(index));
}
EXPECT_TRUE(schema.has_inverted_index());
auto indexes_before = schema.inverted_indexes();
EXPECT_EQ(3, indexes_before.size());
schema.remove_index(401); // Remove the middle one
auto indexes_after_remove = schema.inverted_indexes();
EXPECT_EQ(2, indexes_after_remove.size());
bool found_400 = false, found_402 = false;
for (const auto* index : indexes_after_remove) {
if (index->index_id() == 400) {
found_400 = true;
}
if (index->index_id() == 402) {
found_402 = true;
}
}
EXPECT_TRUE(found_400);
EXPECT_TRUE(found_402);
schema.clear_index();
EXPECT_FALSE(schema.has_inverted_index());
auto indexes_after_clear = schema.inverted_indexes();
EXPECT_EQ(0, indexes_after_clear.size());
}
TEST_F(TabletSchemaTest, test_tablet_schema_path_set_info_inverted_indexs) {
TabletSchema schema;
TabletColumn variant_col;
variant_col.set_unique_id(9001);
variant_col.set_name("variant_col");
variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
schema.append_column(variant_col);
auto create_index = [](int64_t id, const std::string& name, int32_t col_uid) {
auto index = std::make_shared<TabletIndex>();
TabletIndexPB index_pb;
index_pb.set_index_id(id);
index_pb.set_index_name(name);
index_pb.set_index_type(IndexType::INVERTED);
index_pb.add_col_unique_id(col_uid);
index->init_from_pb(index_pb);
return index;
};
auto typed_index1 = create_index(1001, "typed_path_idx1", 9001);
auto typed_index2 = create_index(1002, "typed_path_idx2", 9001);
auto subcolumn_index1 = create_index(2001, "subcolumn_idx1", 9001);
auto subcolumn_index2 = create_index(2002, "subcolumn_idx2", 9001);
TabletSchema::PathsSetInfo path_set_info;
TabletSchema::SubColumnInfo typed_sub_col1;
typed_sub_col1.column = variant_col;
typed_sub_col1.indexes.push_back(typed_index1);
path_set_info.typed_path_set["user.name"] = typed_sub_col1;
TabletSchema::SubColumnInfo typed_sub_col2;
typed_sub_col2.column = variant_col;
typed_sub_col2.indexes.push_back(typed_index2);
path_set_info.typed_path_set["user.age"] = typed_sub_col2;
TabletIndexes subcolumn_indexes1 = {subcolumn_index1};
TabletIndexes subcolumn_indexes2 = {subcolumn_index2};
path_set_info.subcolumn_indexes["product.id"] = subcolumn_indexes1;
path_set_info.subcolumn_indexes["product.price"] = subcolumn_indexes2;
std::unordered_map<int32_t, TabletSchema::PathsSetInfo> path_set_info_map;
path_set_info_map[9001] = std::move(path_set_info);
schema.set_path_set_info(std::move(path_set_info_map));
TabletColumn typed_extracted_col1;
typed_extracted_col1.set_unique_id(-1);
typed_extracted_col1.set_name("variant_col.user.name");
typed_extracted_col1.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
typed_extracted_col1.set_parent_unique_id(9001);
vectorized::PathInData typed_path1("variant_col.user.name", true);
typed_extracted_col1.set_path_info(typed_path1);
auto typed_indexes = schema.inverted_indexs(typed_extracted_col1);
EXPECT_EQ(1, typed_indexes.size());
EXPECT_EQ("typed_path_idx1", typed_indexes[0]->index_name());
EXPECT_EQ(1001, typed_indexes[0]->index_id());
TabletColumn typed_extracted_col2;
typed_extracted_col2.set_unique_id(-1);
typed_extracted_col2.set_name("variant_col.user.age");
typed_extracted_col2.set_type(FieldType::OLAP_FIELD_TYPE_INT);
typed_extracted_col2.set_parent_unique_id(9001);
vectorized::PathInData typed_path2("variant_col.user.age", true);
typed_extracted_col2.set_path_info(typed_path2);
auto typed_indexes2 = schema.inverted_indexs(typed_extracted_col2);
EXPECT_EQ(1, typed_indexes2.size());
EXPECT_EQ("typed_path_idx2", typed_indexes2[0]->index_name());
EXPECT_EQ(1002, typed_indexes2[0]->index_id());
// Test subcolumn path (non-typed)
TabletColumn subcolumn_extracted_col1;
subcolumn_extracted_col1.set_unique_id(-1);
subcolumn_extracted_col1.set_name("variant_col.product.id");
subcolumn_extracted_col1.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
subcolumn_extracted_col1.set_parent_unique_id(9001);
vectorized::PathInData subcolumn_path1("variant_col.product.id");
subcolumn_extracted_col1.set_path_info(subcolumn_path1);
auto subcolumn_indexes = schema.inverted_indexs(subcolumn_extracted_col1);
EXPECT_EQ(1, subcolumn_indexes.size());
EXPECT_EQ("subcolumn_idx1", subcolumn_indexes[0]->index_name());
EXPECT_EQ(2001, subcolumn_indexes[0]->index_id());
TabletColumn non_existing_col;
non_existing_col.set_unique_id(-1);
non_existing_col.set_name("non_existing");
non_existing_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
non_existing_col.set_parent_unique_id(9001);
vectorized::PathInData non_existing_path("variant_col.non.existing");
non_existing_col.set_path_info(non_existing_path);
auto no_indexes = schema.inverted_indexs(non_existing_col);
EXPECT_EQ(0, no_indexes.size());
TabletColumn wrong_parent_col;
wrong_parent_col.set_unique_id(-1);
wrong_parent_col.set_name("wrong_parent");
wrong_parent_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
wrong_parent_col.set_parent_unique_id(9999); // Non-existing parent
vectorized::PathInData wrong_parent_path("wrong_variant.some.path");
wrong_parent_col.set_path_info(wrong_parent_path);
auto no_indexes_wrong_parent = schema.inverted_indexs(wrong_parent_col);
EXPECT_EQ(0, no_indexes_wrong_parent.size());
}
TEST_F(TabletSchemaTest, test_tablet_schema_path_set_info_accessors) {
TabletSchema schema;
TabletColumn variant_col;
variant_col.set_unique_id(10001);
variant_col.set_name("json_data");
variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
schema.append_column(variant_col);
TabletSchema::PathsSetInfo path_info;
path_info.sub_path_set.insert("extracted_path1");
path_info.sub_path_set.insert("extracted_path2");
path_info.sparse_path_set.insert("sparse_path1");
path_info.sparse_path_set.insert("sparse_path2");
std::unordered_map<int32_t, TabletSchema::PathsSetInfo> path_map;
path_map[10001] = std::move(path_info);
schema.set_path_set_info(std::move(path_map));
const auto& retrieved_info = schema.path_set_info(10001);
EXPECT_EQ(2, retrieved_info.sub_path_set.size());
EXPECT_EQ(2, retrieved_info.sparse_path_set.size());
EXPECT_TRUE(retrieved_info.sub_path_set.count("extracted_path1") > 0);
EXPECT_TRUE(retrieved_info.sub_path_set.count("extracted_path2") > 0);
EXPECT_TRUE(retrieved_info.sparse_path_set.count("sparse_path1") > 0);
EXPECT_TRUE(retrieved_info.sparse_path_set.count("sparse_path2") > 0);
}
TEST_F(TabletSchemaTest, test_tablet_schema_inverted_index_by_field_pattern) {
TabletSchema schema;
TabletColumn col1;
col1.set_unique_id(11001);
col1.set_name("variant_col1");
col1.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
schema.append_column(col1);
TabletColumn col2;
col2.set_unique_id(11002);
col2.set_name("variant_col2");
col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
schema.append_column(col2);
TabletIndex index1;
TabletIndexPB index1_pb;
index1_pb.set_index_id(3001);
index1_pb.set_index_name("pattern_idx1");
index1_pb.set_index_type(IndexType::INVERTED);
index1_pb.add_col_unique_id(11001);
auto* properties1 = index1_pb.mutable_properties();
(*properties1)["field_pattern"] = "user.*";
(*properties1)["parser"] = "standard";
index1.init_from_pb(index1_pb);
TabletIndex index2;
TabletIndexPB index2_pb;
index2_pb.set_index_id(3002);
index2_pb.set_index_name("pattern_idx2");
index2_pb.set_index_type(IndexType::INVERTED);
index2_pb.add_col_unique_id(11001);
auto* properties2 = index2_pb.mutable_properties();
(*properties2)["field_pattern"] = "product.*";
(*properties2)["parser"] = "unicode";
index2.init_from_pb(index2_pb);
TabletIndex index3;
TabletIndexPB index3_pb;
index3_pb.set_index_id(3003);
index3_pb.set_index_name("pattern_idx3");
index3_pb.set_index_type(IndexType::INVERTED);
index3_pb.add_col_unique_id(11002);
auto* properties3 = index3_pb.mutable_properties();
(*properties3)["field_pattern"] = "user.*";
(*properties3)["parser"] = "keyword";
index3.init_from_pb(index3_pb);
TabletIndex index4;
TabletIndexPB index4_pb;
index4_pb.set_index_id(3004);
index4_pb.set_index_name("no_pattern_idx");
index4_pb.set_index_type(IndexType::INVERTED);
index4_pb.add_col_unique_id(11001);
index4.init_from_pb(index4_pb);
TabletIndex index5;
TabletIndexPB index5_pb;
index5_pb.set_index_id(3005);
index5_pb.set_index_name("pattern_idx5");
index5_pb.set_index_type(IndexType::INVERTED);
index5_pb.add_col_unique_id(11001);
auto* properties5 = index5_pb.mutable_properties();
(*properties5)["field_pattern"] = "user.*";
(*properties5)["parser"] = "english";
index5.init_from_pb(index5_pb);
schema.append_index(std::move(index1));
schema.append_index(std::move(index2));
schema.append_index(std::move(index3));
schema.append_index(std::move(index4));
schema.append_index(std::move(index5));
auto user_indexes_col1 = schema.inverted_index_by_field_pattern(11001, "user.*");
EXPECT_EQ(2, user_indexes_col1.size());
std::set<std::string> expected_names_user_col1 = {"pattern_idx1", "pattern_idx5"};
std::set<std::string> actual_names_user_col1;
for (const auto& index : user_indexes_col1) {
actual_names_user_col1.insert(index->index_name());
}
EXPECT_EQ(expected_names_user_col1, actual_names_user_col1);
auto product_indexes_col1 = schema.inverted_index_by_field_pattern(11001, "product.*");
EXPECT_EQ(1, product_indexes_col1.size());
EXPECT_EQ("pattern_idx2", product_indexes_col1[0]->index_name());
EXPECT_EQ(3002, product_indexes_col1[0]->index_id());
auto user_indexes_col2 = schema.inverted_index_by_field_pattern(11002, "user.*");
EXPECT_EQ(1, user_indexes_col2.size());
EXPECT_EQ("pattern_idx3", user_indexes_col2[0]->index_name());
EXPECT_EQ(3003, user_indexes_col2[0]->index_id());
auto non_existing_pattern = schema.inverted_index_by_field_pattern(11001, "non.existing.*");
EXPECT_EQ(0, non_existing_pattern.size());
auto non_existing_column = schema.inverted_index_by_field_pattern(99999, "user.*");
EXPECT_EQ(0, non_existing_column.size());
auto empty_pattern = schema.inverted_index_by_field_pattern(11001, "");
EXPECT_EQ(0, empty_pattern.size());
auto different_pattern = schema.inverted_index_by_field_pattern(11001, "user.name");
EXPECT_EQ(0, different_pattern.size());
}
TEST_F(TabletSchemaTest, test_tablet_index_field_pattern_property) {
TabletIndex index_with_pattern;
TabletIndexPB index_pb;
index_pb.set_index_id(4001);
index_pb.set_index_name("test_pattern_idx");
index_pb.set_index_type(IndexType::INVERTED);
index_pb.add_col_unique_id(12001);
auto* properties = index_pb.mutable_properties();
(*properties)["field_pattern"] = "data.*.value";
(*properties)["parser"] = "standard";
index_with_pattern.init_from_pb(index_pb);
EXPECT_EQ("data.*.value", index_with_pattern.field_pattern());
TabletIndex index_without_pattern;
TabletIndexPB index_pb2;
index_pb2.set_index_id(4002);
index_pb2.set_index_name("test_no_pattern_idx");
index_pb2.set_index_type(IndexType::INVERTED);
index_pb2.add_col_unique_id(12001);
auto* properties2 = index_pb2.mutable_properties();
(*properties2)["parser"] = "unicode";
index_without_pattern.init_from_pb(index_pb2);
EXPECT_EQ("", index_without_pattern.field_pattern());
TabletIndex index_empty_pattern;
TabletIndexPB index_pb3;
index_pb3.set_index_id(4003);
index_pb3.set_index_name("test_empty_pattern_idx");
index_pb3.set_index_type(IndexType::INVERTED);
index_pb3.add_col_unique_id(12001);
auto* properties3 = index_pb3.mutable_properties();
(*properties3)["field_pattern"] = "";
(*properties3)["parser"] = "keyword";
index_empty_pattern.init_from_pb(index_pb3);
EXPECT_EQ("", index_empty_pattern.field_pattern());
}
TEST_F(TabletSchemaTest, test_tablet_schema_variant_max_subcolumns_count) {
TabletSchema schema;
EXPECT_EQ(0, schema.variant_max_subcolumns_count());
TabletColumn non_variant_col;
non_variant_col.set_unique_id(12001);
non_variant_col.set_name("string_col");
non_variant_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(non_variant_col);
EXPECT_EQ(0, schema.variant_max_subcolumns_count());
TabletColumn variant_col1;
variant_col1.set_unique_id(12002);
variant_col1.set_name("variant_col1");
variant_col1.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col1.set_variant_max_subcolumns_count(100);
schema.append_column(variant_col1);
EXPECT_EQ(100, schema.variant_max_subcolumns_count());
TabletColumn variant_col2;
variant_col2.set_unique_id(12003);
variant_col2.set_name("variant_col2");
variant_col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col2.set_variant_max_subcolumns_count(200);
schema.append_column(variant_col2);
EXPECT_EQ(100, schema.variant_max_subcolumns_count());
TabletSchema schema_with_zero_variant;
TabletColumn variant_col_zero;
variant_col_zero.set_unique_id(12004);
variant_col_zero.set_name("variant_col_zero");
variant_col_zero.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col_zero.set_variant_max_subcolumns_count(0);
schema_with_zero_variant.append_column(variant_col_zero);
EXPECT_EQ(0, schema_with_zero_variant.variant_max_subcolumns_count());
}
TEST_F(TabletSchemaTest, test_tablet_schema_need_record_variant_extended_schema) {
TabletSchema schema_empty;
EXPECT_TRUE(schema_empty.need_record_variant_extended_schema());
TabletSchema schema_non_variant;
TabletColumn non_variant_col;
non_variant_col.set_unique_id(13001);
non_variant_col.set_name("int_col");
non_variant_col.set_type(FieldType::OLAP_FIELD_TYPE_INT);
schema_non_variant.append_column(non_variant_col);
EXPECT_TRUE(schema_non_variant.need_record_variant_extended_schema());
TabletSchema schema_variant_zero;
TabletColumn variant_col_zero;
variant_col_zero.set_unique_id(13002);
variant_col_zero.set_name("variant_col_zero");
variant_col_zero.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col_zero.set_variant_max_subcolumns_count(0);
schema_variant_zero.append_column(variant_col_zero);
EXPECT_TRUE(schema_variant_zero.need_record_variant_extended_schema());
TabletSchema schema_variant_non_zero;
TabletColumn variant_col_non_zero;
variant_col_non_zero.set_unique_id(13003);
variant_col_non_zero.set_name("variant_col_non_zero");
variant_col_non_zero.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col_non_zero.set_variant_max_subcolumns_count(50);
schema_variant_non_zero.append_column(variant_col_non_zero);
EXPECT_FALSE(schema_variant_non_zero.need_record_variant_extended_schema());
TabletSchema schema_mixed;
TabletColumn regular_col;
regular_col.set_unique_id(13004);
regular_col.set_name("regular_col");
regular_col.set_type(FieldType::OLAP_FIELD_TYPE_VARCHAR);
schema_mixed.append_column(regular_col);
TabletColumn variant_col_100;
variant_col_100.set_unique_id(13005);
variant_col_100.set_name("variant_col_100");
variant_col_100.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col_100.set_variant_max_subcolumns_count(100);
schema_mixed.append_column(variant_col_100);
EXPECT_FALSE(schema_mixed.need_record_variant_extended_schema());
TabletSchema schema_multiple_variants;
TabletColumn variant_col1;
variant_col1.set_unique_id(13006);
variant_col1.set_name("variant_col1");
variant_col1.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col1.set_variant_max_subcolumns_count(150);
schema_multiple_variants.append_column(variant_col1);
TabletColumn variant_col2;
variant_col2.set_unique_id(13007);
variant_col2.set_name("variant_col2");
variant_col2.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
variant_col2.set_variant_max_subcolumns_count(300);
schema_multiple_variants.append_column(variant_col2);
EXPECT_FALSE(schema_multiple_variants.need_record_variant_extended_schema());
}
TEST_F(TabletSchemaTest, test_tablet_schema_get_index) {
TabletSchema schema;
TabletColumn col1;
col1.set_unique_id(14001);
col1.set_name("test_col1");
col1.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
schema.append_column(col1);
TabletColumn col2;
col2.set_unique_id(14002);
col2.set_name("test_col2");
col2.set_type(FieldType::OLAP_FIELD_TYPE_VARCHAR);
schema.append_column(col2);
TabletIndex inverted_index;
TabletIndexPB inverted_index_pb;
inverted_index_pb.set_index_id(5001);
inverted_index_pb.set_index_name("inverted_idx");
inverted_index_pb.set_index_type(IndexType::INVERTED);
inverted_index_pb.add_col_unique_id(14001);
inverted_index.init_from_pb(inverted_index_pb);
TabletIndex ann_index;
TabletIndexPB ann_index_pb;
ann_index_pb.set_index_id(5003);
ann_index_pb.set_index_name("ann_idx");
ann_index_pb.set_index_type(IndexType::ANN);
ann_index_pb.add_col_unique_id(14002);
ann_index.init_from_pb(ann_index_pb);
TabletIndex ngram_bf_index;
TabletIndexPB ngram_bf_index_pb;
ngram_bf_index_pb.set_index_id(5004);
ngram_bf_index_pb.set_index_name("ngram_bf_idx");
ngram_bf_index_pb.set_index_type(IndexType::NGRAM_BF);
ngram_bf_index_pb.add_col_unique_id(14002);
ngram_bf_index.init_from_pb(ngram_bf_index_pb);
schema.append_index(std::move(inverted_index));
schema.append_index(std::move(ann_index));
schema.append_index(std::move(ngram_bf_index));
const TabletIndex* found_inverted = schema.get_index(14001, IndexType::INVERTED, "");
EXPECT_NE(nullptr, found_inverted);
EXPECT_EQ("inverted_idx", found_inverted->index_name());
EXPECT_EQ(5001, found_inverted->index_id());
const TabletIndex* found_ann = schema.get_index(14002, IndexType::ANN, "");
EXPECT_NE(nullptr, found_ann);
EXPECT_EQ("ann_idx", found_ann->index_name());
EXPECT_EQ(5003, found_ann->index_id());
const TabletIndex* found_ngram_bf = schema.get_index(14002, IndexType::NGRAM_BF, "");
EXPECT_NE(nullptr, found_ngram_bf);
EXPECT_EQ("ngram_bf_idx", found_ngram_bf->index_name());
EXPECT_EQ(5004, found_ngram_bf->index_id());
const TabletIndex* not_found = schema.get_index(99999, IndexType::INVERTED, "");
EXPECT_EQ(nullptr, not_found);
const TabletIndex* empty_suffix = schema.get_index(14001, IndexType::INVERTED, "");
EXPECT_NE(nullptr, empty_suffix);
EXPECT_EQ("inverted_idx", empty_suffix->index_name());
const TabletIndex* with_suffix = schema.get_index(14001, IndexType::INVERTED, "test_suffix");
EXPECT_EQ(nullptr, with_suffix);
EXPECT_TRUE(found_inverted->is_inverted_index());
EXPECT_EQ(IndexType::INVERTED, found_inverted->index_type());
EXPECT_EQ(IndexType::ANN, found_ann->index_type());
EXPECT_EQ(IndexType::NGRAM_BF, found_ngram_bf->index_type());
const auto& inverted_col_ids = found_inverted->col_unique_ids();
EXPECT_EQ(1, inverted_col_ids.size());
EXPECT_EQ(14001, inverted_col_ids[0]);
const auto& ann_col_ids = found_ann->col_unique_ids();
EXPECT_EQ(1, ann_col_ids.size());
EXPECT_EQ(14002, ann_col_ids[0]);
}
} // namespace doris