|  | // Licensed to the Apache Software Foundation (ASF) under one | 
|  | // or more contributor license agreements.  See the NOTICE file | 
|  | // distributed with this work for additional information | 
|  | // regarding copyright ownership.  The ASF licenses this file | 
|  | // to you under the Apache License, Version 2.0 (the | 
|  | // "License"); you may not use this file except in compliance | 
|  | // with the License.  You may obtain a copy of the License at | 
|  | // | 
|  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, | 
|  | // software distributed under the License is distributed on an | 
|  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | // KIND, either express or implied.  See the License for the | 
|  | // specific language governing permissions and limitations | 
|  | // under the License. | 
|  |  | 
|  | #include "olap/task/index_builder.h" | 
|  |  | 
|  | #include <gmock/gmock.h> | 
|  | #include <gtest/gtest.h> | 
|  |  | 
|  | #include "olap/olap_common.h" | 
|  | #include "olap/rowset/beta_rowset.h" | 
|  | #include "olap/rowset/rowset_factory.h" | 
|  | #include "olap/rowset/rowset_writer_context.h" | 
|  | #include "olap/storage_engine.h" | 
|  | #include "olap/tablet_fwd.h" | 
|  | #include "olap/tablet_schema.h" | 
|  |  | 
|  | namespace doris { | 
|  | using namespace testing; | 
|  |  | 
|  | class IndexBuilderTest : public ::testing::Test { | 
|  | protected: | 
|  | void SetUp() override { | 
|  | char buffer[MAX_PATH_LEN]; | 
|  | ASSERT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr); | 
|  | _current_dir = std::string(buffer); | 
|  | _absolute_dir = _current_dir + "/" + std::string(dest_dir); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok()); | 
|  |  | 
|  | std::vector<StorePath> paths; | 
|  | paths.emplace_back(config::storage_root_path, -1); | 
|  |  | 
|  | // tmp dir | 
|  | EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok()); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok()); | 
|  | paths.emplace_back(std::string(tmp_dir), 1024000000); | 
|  | auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths); | 
|  | EXPECT_TRUE(tmp_file_dirs->init().ok()); | 
|  | ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs)); | 
|  |  | 
|  | // use memory limit | 
|  | int64_t inverted_index_cache_limit = 0; | 
|  | _inverted_index_searcher_cache = std::unique_ptr<segment_v2::InvertedIndexSearcherCache>( | 
|  | InvertedIndexSearcherCache::create_global_instance(inverted_index_cache_limit, | 
|  | 256)); | 
|  |  | 
|  | ExecEnv::GetInstance()->set_inverted_index_searcher_cache( | 
|  | _inverted_index_searcher_cache.get()); | 
|  | doris::EngineOptions options; | 
|  | options.store_paths = paths; | 
|  |  | 
|  | auto engine = std::make_unique<StorageEngine>(options); | 
|  | _engine_ref = engine.get(); | 
|  | _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir); | 
|  | ASSERT_TRUE(_data_dir->update_capacity().ok()); | 
|  | ExecEnv::GetInstance()->set_storage_engine(std::move(engine)); | 
|  |  | 
|  | _tablet_meta = create_tablet_meta(); | 
|  |  | 
|  | // Create tablet meta | 
|  | // auto* tablet_schema = _tablet_meta->mutable_tablet_schema(); | 
|  | _tablet_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(_tablet_schema, KeysType::DUP_KEYS); | 
|  | // Initialize tablet | 
|  | _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); | 
|  | ASSERT_TRUE(_tablet->init().ok()); | 
|  | } | 
|  |  | 
|  | void TearDown() override { | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok()); | 
|  | ExecEnv::GetInstance()->set_storage_engine(nullptr); | 
|  | _tablet.reset(); | 
|  | } | 
|  |  | 
|  | void create_tablet_schema(TabletSchemaSPtr tablet_schema, KeysType keystype, | 
|  | int num_value_col = 1) { | 
|  | // Set basic properties of TabletSchema directly | 
|  | tablet_schema->_keys_type = keystype; | 
|  | tablet_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; | 
|  |  | 
|  | // Create the first key column | 
|  | TabletColumn column_1; | 
|  | column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); | 
|  | column_1.set_unique_id(1); | 
|  | column_1.set_name("k1"); | 
|  | column_1.set_is_key(true); | 
|  | column_1.set_index_length(4); | 
|  | tablet_schema->append_column(column_1); | 
|  |  | 
|  | // Create the second key column | 
|  | TabletColumn column_2; | 
|  | column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); | 
|  | column_2.set_unique_id(2); | 
|  | column_2.set_name("k2"); | 
|  | column_2.set_is_key(false); | 
|  | tablet_schema->append_column(column_2); | 
|  | } | 
|  |  | 
|  | TabletMetaSharedPtr create_tablet_meta() { | 
|  | TabletMetaPB tablet_meta_pb; | 
|  | tablet_meta_pb.set_table_id(1); | 
|  | tablet_meta_pb.set_tablet_id(15673); | 
|  | tablet_meta_pb.set_schema_hash(567997577); | 
|  | tablet_meta_pb.set_shard_id(0); | 
|  | tablet_meta_pb.set_creation_time(1575351212); | 
|  |  | 
|  | TabletMetaSharedPtr tablet_meta(new TabletMeta()); | 
|  | tablet_meta->init_from_pb(tablet_meta_pb); | 
|  | return tablet_meta; | 
|  | } | 
|  |  | 
|  | // Helper to create rowset meta | 
|  | void init_rs_meta(RowsetMetaSharedPtr& rs_meta, TabletSchemaSPtr tablet_schema, int64_t start, | 
|  | int64_t end) { | 
|  | RowsetMetaPB rowset_meta_pb; | 
|  | rowset_meta_pb.set_rowset_id(540081); | 
|  | rowset_meta_pb.set_tablet_id(15673); | 
|  | rowset_meta_pb.set_tablet_schema_hash(567997577); | 
|  | rowset_meta_pb.set_rowset_type(RowsetTypePB::BETA_ROWSET); | 
|  | rowset_meta_pb.set_rowset_state(RowsetStatePB::VISIBLE); | 
|  | rowset_meta_pb.set_start_version(start); | 
|  | rowset_meta_pb.set_end_version(end); | 
|  | rowset_meta_pb.set_num_rows(3929); | 
|  | rowset_meta_pb.set_total_disk_size(84699); | 
|  | rowset_meta_pb.set_data_disk_size(84464); | 
|  | rowset_meta_pb.set_index_disk_size(235); | 
|  | rowset_meta_pb.set_num_segments(2); | 
|  |  | 
|  | rs_meta->init_from_pb(rowset_meta_pb); | 
|  | rs_meta->set_tablet_schema(tablet_schema); | 
|  | } | 
|  |  | 
|  | StorageEngine* _engine_ref = nullptr; | 
|  | TabletSharedPtr _tablet; | 
|  | TabletMetaSharedPtr _tablet_meta; | 
|  | TabletSchemaSPtr _tablet_schema; | 
|  | std::vector<TColumn> _columns; | 
|  | std::vector<doris::TOlapTableIndex> _alter_indexes; | 
|  | std::unique_ptr<DataDir> _data_dir = nullptr; | 
|  | std::string _current_dir; | 
|  | std::string _absolute_dir; | 
|  | std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache; | 
|  |  | 
|  | constexpr static uint32_t MAX_PATH_LEN = 1024; | 
|  | constexpr static std::string_view dest_dir = "./ut_dir/index_builder_test"; | 
|  | constexpr static std::string_view tmp_dir = "./ut_dir/index_builder_test"; | 
|  | }; | 
|  |  | 
|  | TEST_F(IndexBuilderTest, BasicBuildTest) { | 
|  | // 1. Prepare test data | 
|  | TOlapTableIndex index; | 
|  | index.index_id = 1; | 
|  | index.columns.emplace_back("col1"); | 
|  | _alter_indexes.push_back(index); | 
|  |  | 
|  | // 2. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 3. Verify initialization | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, DropIndexTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(15676); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | _tablet_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 3. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15676); | 
|  | writer_context.tablet_id = 15676; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 4. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 5. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Verify index exists before dropping | 
|  | EXPECT_TRUE(_tablet_schema->has_inverted_index()); | 
|  | EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); | 
|  |  | 
|  | // 7. Prepare index for dropping | 
|  | TOlapTableIndex drop_index; | 
|  | drop_index.index_id = 1; | 
|  | drop_index.columns.emplace_back("k1"); | 
|  | _alter_indexes.push_back(drop_index); | 
|  |  | 
|  | // 8. Create IndexBuilder with drop operation | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, true); | 
|  |  | 
|  | // 9. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  |  | 
|  | // 10. Execute drop operation | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 11. Verify the index has been removed | 
|  | // check old tablet path and new tablet path | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists); | 
|  |  | 
|  | // Check files in old and new directories | 
|  | std::vector<io::FileInfo> files; | 
|  | bool dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); | 
|  | EXPECT_TRUE(dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find("15676_0.idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find("15676_0.dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | if (filename.find("020000000000000100000000000000000000000000000000_0.idx") != | 
|  | std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != | 
|  | std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // The index should have been removed | 
|  | EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; | 
|  | EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; | 
|  |  | 
|  | //auto tablet_schema = _tablet->tablet_schema(); | 
|  | //EXPECT_FALSE(tablet_schema->has_inverted_index_with_index_id(1)); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, BuildIndexAfterWritingDataTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14673); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15673); | 
|  | writer_context.tablet_id = 15673; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15673); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns according to the schema | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 5. Prepare index for building | 
|  | TOlapTableIndex index1; | 
|  | index1.index_id = 1; | 
|  | index1.columns.emplace_back("k1"); | 
|  | index1.index_name = "k1_index"; | 
|  | index1.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index1); | 
|  |  | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 2; | 
|  | index2.columns.emplace_back("k2"); | 
|  | index2.index_name = "k2_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // 6. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 7. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 2); | 
|  |  | 
|  | // 8. Build index | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // check old tablet path and new tablet path | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15673); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14673); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int idx_file_count = 0; | 
|  | int dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(idx_file_count, 0) << "Old directory should contain exactly 0 .idx file"; | 
|  | EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 9. Verify the result (indexes should be built successfully) | 
|  | //auto tablet_schema = _tablet->tablet_schema(); | 
|  | //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(1)); | 
|  | //EXPECT_TRUE(tablet_schema->has_inverted_index_with_index_id(2)); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14675); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | _tablet_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 3. Create rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15675); | 
|  | writer_context.tablet_id = 15675; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15675); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 4. Create rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 5. Write data to rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add block to rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add rowset to tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Prepare new index information (only add for k2 column) | 
|  | TOlapTableIndex new_index; | 
|  | new_index.index_id = 2; // New index ID is 2 | 
|  | new_index.columns.emplace_back("k2"); | 
|  | new_index.index_name = "k2_index"; | 
|  | new_index.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(new_index); | 
|  |  | 
|  | // 7. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 8. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built | 
|  |  | 
|  | // 9. Build index | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // check old tablet path and new tablet path | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15675); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14675); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int idx_file_count = 0; | 
|  | int dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; | 
|  | EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(new_idx_file_count, 1) << "New directory should contain exactly 1 .idx files"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 10. Verify results (both indexes should exist) | 
|  | // Verify initial index (k1) still exists | 
|  | //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(1)); | 
|  | // Verify newly added index (k2) is successfully built | 
|  | //EXPECT_TRUE(_tablet_schema->has_inverted_index_with_index_id(2)); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { | 
|  | // 1. Create new schema using V1 format | 
|  | auto v1_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(v1_schema, KeysType::DUP_KEYS); | 
|  |  | 
|  | // 2. Modify to V1 format | 
|  | v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; | 
|  |  | 
|  | // 3. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | v1_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 4. Update schema in tablet | 
|  | TabletMetaPB tablet_meta_pb; | 
|  | _tablet_meta->to_meta_pb(&tablet_meta_pb, false); | 
|  |  | 
|  | TabletSchemaPB v1_schema_pb; | 
|  | v1_schema->to_schema_pb(&v1_schema_pb); | 
|  | tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); | 
|  |  | 
|  | _tablet_meta->init_from_pb(tablet_meta_pb); | 
|  |  | 
|  | // Reinitialize tablet to use new schema | 
|  | _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); | 
|  | ASSERT_TRUE(_tablet->init().ok()); | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14674); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 5. Prepare data | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 6. Create rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15674); | 
|  | writer_context.tablet_id = 15674; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15674); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = v1_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 7. Create rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 8. Write data to rowset | 
|  | { | 
|  | vectorized::Block block = v1_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add block to rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add rowset to tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 9. Clear existing index list, prepare new index | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // 10. Prepare new index information (only add for k2 column) | 
|  | TOlapTableIndex new_index; | 
|  | new_index.index_id = 2; // New index ID is 2 | 
|  | new_index.columns.emplace_back("k2"); | 
|  | new_index.index_name = "k2_index"; | 
|  | new_index.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(new_index); | 
|  |  | 
|  | // 11. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 12. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only one new index needs to be built | 
|  |  | 
|  | // 13. Build index | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // check old tablet path and new tablet path | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15674); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14674); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int idx_file_count = 0; | 
|  | int dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(idx_file_count, 1) << "Old directory should contain exactly 1 .idx file"; | 
|  | EXPECT_EQ(dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(new_idx_file_count, 2) << "New directory should contain exactly 2 .idx files"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  | // 14. Verify results (both indexes should exist) | 
|  | // Verify initial index (k1) still exists | 
|  | //EXPECT_TRUE(v1_schema->has_inverted_index_with_index_id(1)); | 
|  | // Verify newly added index (k2) is successfully built | 
|  | //EXPECT_TRUE(_tablet->tablet_schema()->has_inverted_index_with_index_id(2)); | 
|  |  | 
|  | // 15. Confirm storage format is still V1 | 
|  | //EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14677); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int rows_per_segment = 500; | 
|  | const int num_segments = 3; | 
|  |  | 
|  | // 2. Create a rowset writer context with segment size set to trigger multiple segments | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15677); | 
|  | writer_context.tablet_id = 15677; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15677); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  | // Set small segment size to ensure we create multiple segments | 
|  | writer_context.max_rows_per_segment = rows_per_segment; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write data to the rowset in multiple batches to ensure we get multiple segments | 
|  | for (int segment = 0; segment < num_segments; segment++) { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < rows_per_segment; ++i) { | 
|  | // k1 column (int) - make values different across segments | 
|  | int32_t k1 = (segment * rows_per_segment + i) * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = (segment * rows_per_segment + i) % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush to ensure we create a new segment | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  | } | 
|  |  | 
|  | // 5. Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Verify we have the expected number of segments | 
|  | ASSERT_EQ(rowset->num_segments(), num_segments) | 
|  | << "Rowset should have " << num_segments << " segments but has " | 
|  | << rowset->num_segments(); | 
|  |  | 
|  | // 6. Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  |  | 
|  | // 7. Prepare indexes for building | 
|  | TOlapTableIndex index1; | 
|  | index1.index_id = 1; | 
|  | index1.columns.emplace_back("k1"); | 
|  | index1.index_name = "k1_index"; | 
|  | index1.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index1); | 
|  |  | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 2; | 
|  | index2.columns.emplace_back("k2"); | 
|  | index2.index_name = "k2_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // 8. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 9. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 2); | 
|  |  | 
|  | // 10. Build indexes | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 11. Check paths and files | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15677); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14677); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 12. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; | 
|  | EXPECT_EQ(old_dat_file_count, num_segments) | 
|  | << "Old directory should contain exactly " << num_segments << " .dat files"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(new_idx_file_count, num_segments) | 
|  | << "New directory should contain exactly " << num_segments << " .idx files"; | 
|  | EXPECT_EQ(new_dat_file_count, num_segments) | 
|  | << "New directory should contain exactly " << num_segments << " .dat files"; | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14678); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15678); | 
|  | writer_context.tablet_id = 15678; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15678); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 5. Prepare indexes for building - including one for a non-existent column | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // Index for non-existent column "k3" | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 2; | 
|  | index2.columns.emplace_back("k3"); // This column doesn't exist in the schema | 
|  | index2.index_name = "k3_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // 6. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 7. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | // The init should succeed, as we'll skip non-existent columns later | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 8. Build indexes - should only build for existing columns | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 9. Check paths and files | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15678); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14678); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 10. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(old_idx_file_count, 0) << "Old directory should contain exactly 0 .idx files"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // Should only have index for k1, not for non-existent k3 | 
|  | EXPECT_EQ(new_idx_file_count, 0) | 
|  | << "New directory should contain exactly 0 .idx file for the existing column"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 11. Verify logs in the output to confirm k3 index was skipped | 
|  | // This would require examining the log output which isn't easily done in unit tests, | 
|  | // but the file count verification above should be sufficient to confirm behavior | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, RenameColumnIndexTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14679); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  | auto schema = std::make_shared<TabletSchema>(); | 
|  |  | 
|  | schema->_keys_type = KeysType::UNIQUE_KEYS; | 
|  | schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V2; | 
|  |  | 
|  | // Create the first key column | 
|  | TabletColumn column_1; | 
|  | column_1.set_type(FieldType::OLAP_FIELD_TYPE_INT); | 
|  | column_1.set_unique_id(1); | 
|  | column_1.set_name("k1"); | 
|  | column_1.set_is_key(true); | 
|  | column_1.set_index_length(4); | 
|  | schema->append_column(column_1); | 
|  |  | 
|  | // Create the second key column | 
|  | TabletColumn column_2; | 
|  | column_2.set_type(FieldType::OLAP_FIELD_TYPE_INT); | 
|  | // not sequential unique_id | 
|  | column_2.set_unique_id(3); | 
|  | column_2.set_name("k2"); | 
|  | column_2.set_is_key(false); | 
|  | schema->append_column(column_2); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 3. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15679); | 
|  | writer_context.tablet_id = 15679; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 4. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 5. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Prepare indexes for building - valid k2 and non-existent k3 | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // Index for rename column "k2" to "k3" | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 3; | 
|  | index2.columns.emplace_back("k3"); // This column doesn't exist in the schema | 
|  | index2.index_name = "k3_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | index2.column_unique_ids.push_back(3); | 
|  | index2.__isset.column_unique_ids = true; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // 7. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 8. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building | 
|  |  | 
|  | // 9. Build indexes - should only build for existing columns | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 10. Check paths and files | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 11. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(old_idx_file_count, 1) | 
|  | << "Old directory should contain exactly 1 .idx file for the original k1 index"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) | 
|  | EXPECT_EQ(new_idx_file_count, 1) | 
|  | << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 12. Verify the tablet schema - would need to examine tablet_schema here | 
|  | // k1 and k2 indexes should exist, k3 index should not | 
|  | // Note: In production code, additional verification of schema would be done here | 
|  | } | 
|  | TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14679); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | _tablet_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 3. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15679); | 
|  | writer_context.tablet_id = 15679; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15679); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 4. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 5. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Prepare indexes for building - valid k2 and non-existent k3 | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // Index for non-existent column "k3" | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 3; | 
|  | index2.columns.emplace_back("k3"); // This column doesn't exist in the schema | 
|  | index2.index_name = "k3_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // 7. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 8. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); // Only k1 is considered for building | 
|  |  | 
|  | // 9. Build indexes - should only build for existing columns | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 10. Check paths and files | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15679); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14679); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 11. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(old_idx_file_count, 1) | 
|  | << "Old directory should contain exactly 1 .idx file for the original k1 index"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) | 
|  | EXPECT_EQ(new_idx_file_count, 1) | 
|  | << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 12. Verify the tablet schema - would need to examine tablet_schema here | 
|  | // k1 and k2 indexes should exist, k3 index should not | 
|  | // Note: In production code, additional verification of schema would be done here | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { | 
|  | // 1. Create new schema using V1 format | 
|  | auto v1_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(v1_schema, KeysType::DUP_KEYS); | 
|  |  | 
|  | // 2. Modify to V1 format | 
|  | v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; | 
|  |  | 
|  | // 3. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | v1_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 4. Update schema in tablet | 
|  | TabletMetaPB tablet_meta_pb; | 
|  | _tablet_meta->to_meta_pb(&tablet_meta_pb, false); | 
|  |  | 
|  | TabletSchemaPB v1_schema_pb; | 
|  | v1_schema->to_schema_pb(&v1_schema_pb); | 
|  | tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); | 
|  |  | 
|  | _tablet_meta->init_from_pb(tablet_meta_pb); | 
|  |  | 
|  | // 5. Reinitialize tablet to use new schema | 
|  | _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); | 
|  | ASSERT_TRUE(_tablet->init().ok()); | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14680); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 6. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 7. Create rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15680); | 
|  | writer_context.tablet_id = 15680; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15680); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = v1_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 8. Create rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 9. Write data to rowset | 
|  | { | 
|  | vectorized::Block block = v1_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add block to rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add rowset to tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 10. Prepare indexes for building - valid k2 and non-existent k3 | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // Index for non-existent column "k3" | 
|  | TOlapTableIndex index2; | 
|  | index2.index_id = 3; | 
|  | index2.columns.emplace_back("k3"); // This column doesn't exist in the schema | 
|  | index2.index_name = "k3_index"; | 
|  | index2.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index2); | 
|  |  | 
|  | // Add column information for the non-existent column | 
|  | _columns.clear(); // Clear previous column info | 
|  | TColumn non_existent_column; | 
|  | non_existent_column.column_name = "k3"; | 
|  | non_existent_column.column_type.type = TPrimitiveType::INT; | 
|  | _columns.push_back(non_existent_column); | 
|  |  | 
|  | // 11. Create IndexBuilder | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 12. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  | // 13. Build indexes - should only build for existing columns | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 14. Check paths and files | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15680); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14680); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 15. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : old_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(old_idx_file_count, 1) | 
|  | << "Old directory should contain exactly 1 .idx file for the original k1 index"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Old directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find(".dat") != std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // Should have 2 index files: original k1 index and new k2 index (k3 should be skipped) | 
|  | EXPECT_EQ(new_idx_file_count, 1) | 
|  | << "New directory should contain exactly 1 .idx files (for k1 and k2, not k3)"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "New directory should contain exactly 1 .dat file"; | 
|  |  | 
|  | // 16. Confirm storage format is still V1 | 
|  | EXPECT_EQ(v1_schema->_inverted_index_storage_format, InvertedIndexStorageFormatPB::V1); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, NonNullIndexDataTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14681); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15681); | 
|  | writer_context.tablet_id = 15681; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15681); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer with non-null values | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write non-null data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns with no null values | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 5. Prepare indexes for building - only use non-nullable fields | 
|  | TOlapTableIndex index1; | 
|  | index1.index_id = 1; | 
|  | index1.columns.emplace_back("k1"); | 
|  | index1.index_name = "k1_index"; | 
|  | index1.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index1); | 
|  |  | 
|  | // 6. Force columns to be treated as non-null by modifying schema | 
|  | TabletSchemaSPtr non_null_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(non_null_schema, KeysType::DUP_KEYS); | 
|  | // Set the second column to be non-nullable explicitly | 
|  | TabletColumn& k2_column = non_null_schema->mutable_column(1); | 
|  | k2_column.set_is_nullable(false); | 
|  |  | 
|  | // 7. Create IndexBuilder with the modified schema | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 8. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  |  | 
|  | // 9. Build index - should trigger _add_data rather than _add_nullable | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 10. Verify results | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15681); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14681); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  |  | 
|  | // 11. Check files in old and new directories | 
|  | std::vector<io::FileInfo> old_files; | 
|  | bool old_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(old_tablet_path, true, &old_files, &old_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(old_dir_exists); | 
|  |  | 
|  | std::vector<io::FileInfo> new_files; | 
|  | bool new_dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem() | 
|  | ->list(new_tablet_path, true, &new_files, &new_dir_exists) | 
|  | .ok()); | 
|  | EXPECT_TRUE(new_dir_exists); | 
|  | int new_idx_file_count = 0; | 
|  | for (const auto& file : new_files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find(".idx") != std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | } | 
|  | EXPECT_EQ(new_idx_file_count, 1) << "Should have created 1 index file"; | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14682); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15682); | 
|  | writer_context.tablet_id = 15682; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = _absolute_dir + "/" + std::to_string(15682); | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 5. First add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | _tablet_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 6. Prepare indexes for building - specifying column by unique_id that doesn't exist | 
|  | _alter_indexes.clear(); | 
|  |  | 
|  | // Use drop operation to test column_unique_ids path | 
|  | TOlapTableIndex drop_index; | 
|  | drop_index.index_id = 1; | 
|  | drop_index.columns.emplace_back("non_existent_column"); | 
|  | drop_index.column_unique_ids.push_back(999); // This unique ID doesn't exist | 
|  | _alter_indexes.push_back(drop_index); | 
|  |  | 
|  | // 7. Create IndexBuilder with drop operation | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, true); | 
|  |  | 
|  | // 8. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  |  | 
|  | // 9. Execute drop operation - should handle non-existent column gracefully | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 10. Verify paths exists - operations should complete without errors | 
|  | auto old_tablet_path = _absolute_dir + "/" + std::to_string(15682); | 
|  | auto new_tablet_path = _absolute_dir + "/" + std::to_string(14682); | 
|  | bool old_exists = false; | 
|  | bool new_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(old_tablet_path, &old_exists).ok()); | 
|  | EXPECT_TRUE(old_exists); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(new_tablet_path, &new_exists).ok()); | 
|  | EXPECT_TRUE(new_exists); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { | 
|  | // 1. Create new schema using V1 format | 
|  | auto v1_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(v1_schema, KeysType::DUP_KEYS); | 
|  |  | 
|  | // 2. Modify to V1 format | 
|  | v1_schema->_inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; | 
|  |  | 
|  | // 3. Add an initial index to the schema (for k1 column) | 
|  | TabletIndex initial_index; | 
|  | initial_index._index_id = 1; | 
|  | initial_index._index_name = "k1_index"; | 
|  | initial_index._index_type = IndexType::INVERTED; | 
|  | initial_index._col_unique_ids.push_back(1); // unique_id for k1 | 
|  | v1_schema->append_index(std::move(initial_index)); | 
|  |  | 
|  | // 4. Update schema in tablet | 
|  | TabletMetaPB tablet_meta_pb; | 
|  | _tablet_meta->to_meta_pb(&tablet_meta_pb, false); | 
|  |  | 
|  | TabletSchemaPB v1_schema_pb; | 
|  | v1_schema->to_schema_pb(&v1_schema_pb); | 
|  | tablet_meta_pb.mutable_schema()->CopyFrom(v1_schema_pb); | 
|  |  | 
|  | _tablet_meta->init_from_pb(tablet_meta_pb); | 
|  |  | 
|  | // 5. Reinitialize tablet to use new schema | 
|  | _tablet = std::make_shared<Tablet>(*_engine_ref, _tablet_meta, _data_dir.get()); | 
|  | ASSERT_TRUE(_tablet->init().ok()); | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(15683); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 6. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 7. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15683); | 
|  | writer_context.tablet_id = 15683; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = v1_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 8. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 9. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = v1_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 10. Prepare to drop the k1 index | 
|  | _alter_indexes.clear(); | 
|  | TOlapTableIndex drop_index; | 
|  | drop_index.index_id = 1; | 
|  | drop_index.columns.emplace_back("k1"); | 
|  | drop_index.index_name = "k1_index"; | 
|  | drop_index.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(drop_index); | 
|  |  | 
|  | // 11. Create IndexBuilder with drop operation | 
|  | IndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, true); | 
|  |  | 
|  | // 12. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  |  | 
|  | // 13. Execute drop operation | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 14. Verify paths exists | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists); | 
|  |  | 
|  | // 15. Verify the index has been removed | 
|  | std::vector<io::FileInfo> files; | 
|  | bool dir_exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->list(tablet_path, true, &files, &dir_exists).ok()); | 
|  | EXPECT_TRUE(dir_exists); | 
|  |  | 
|  | // Verify no index files in the new directory | 
|  | int new_idx_file_count = 0; | 
|  | int new_dat_file_count = 0; | 
|  | int old_idx_file_count = 0; | 
|  | int old_dat_file_count = 0; | 
|  | for (const auto& file : files) { | 
|  | std::string filename = file.file_name; | 
|  | if (filename.find("15683_0_1.idx") != std::string::npos) { | 
|  | old_idx_file_count++; | 
|  | } | 
|  | if (filename.find("15683_0.dat") != std::string::npos) { | 
|  | old_dat_file_count++; | 
|  | } | 
|  | if (filename.find("020000000000000100000000000000000000000000000000_0_1.idx") != | 
|  | std::string::npos) { | 
|  | new_idx_file_count++; | 
|  | } | 
|  | if (filename.find("020000000000000100000000000000000000000000000000_0.dat") != | 
|  | std::string::npos) { | 
|  | new_dat_file_count++; | 
|  | } | 
|  | } | 
|  | // The index should have been removed | 
|  | EXPECT_EQ(old_idx_file_count, 1) << "Tablet path should have 1 .idx file before drop"; | 
|  | EXPECT_EQ(old_dat_file_count, 1) << "Tablet path should have 1 .dat file before drop"; | 
|  | EXPECT_EQ(new_idx_file_count, 0) << "Tablet path should have no .idx file after drop"; | 
|  | EXPECT_EQ(new_dat_file_count, 1) << "Tablet path should have 1 .dat file after drop"; | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, ResourceCleanupTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(15684); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int num_rows = 1000; | 
|  |  | 
|  | // 2. Create a rowset writer context | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15684); | 
|  | writer_context.tablet_id = 15684; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 3. Create a rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 4. Write data to the rowset | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < num_rows; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 5. Prepare indexes for building | 
|  | TOlapTableIndex index1; | 
|  | index1.index_id = 1; | 
|  | index1.columns.emplace_back("k1"); | 
|  | index1.index_name = "k1_index"; | 
|  | index1.index_type = TIndexType::INVERTED; | 
|  | _alter_indexes.push_back(index1); | 
|  |  | 
|  | // Create a custom IndexBuilder with a spy function to test resource cleanup | 
|  | class TestIndexBuilder : public IndexBuilder { | 
|  | public: | 
|  | TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, | 
|  | const std::vector<TColumn>& columns, | 
|  | const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, | 
|  | bool is_drop_op) | 
|  | : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op) {} | 
|  |  | 
|  | ~TestIndexBuilder() override = default; | 
|  | // Override update_inverted_index_info to inject failure | 
|  | Status update_inverted_index_info() override { | 
|  | RETURN_IF_ERROR(IndexBuilder::update_inverted_index_info()); | 
|  | // Create a fake error to trigger cleanup | 
|  | return Status::Error<ErrorCode::INTERNAL_ERROR>("Simulated error for testing cleanup"); | 
|  | } | 
|  | }; | 
|  |  | 
|  | // 6. Create our test builder | 
|  | TestIndexBuilder builder(ExecEnv::GetInstance()->storage_engine().to_local(), _tablet, _columns, | 
|  | _alter_indexes, false); | 
|  |  | 
|  | // 7. Initialize and verify | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  | EXPECT_EQ(builder._alter_index_ids.size(), 1); | 
|  |  | 
|  | // 8. Build index - should fail with our simulated error | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_FALSE(status.ok()) << "Expected failure, but got success"; | 
|  | EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) << "Expected internal error"; | 
|  | EXPECT_EQ(status.to_string(), "[INTERNAL_ERROR]Simulated error for testing cleanup") | 
|  | << "Error message doesn't match expected"; | 
|  |  | 
|  | // Verify the paths haven't been modified since the operation failed | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(tablet_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists); | 
|  |  | 
|  | auto rowset_id = extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat"); | 
|  | EXPECT_TRUE(_engine_ref->check_rowset_id_in_unused_rowsets(rowset_id)) | 
|  | << "Rowset id should be in unused rowsets"; | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { | 
|  | // 1. Prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14685); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 2. Create tablet schema with array type | 
|  | auto tablet_schema = std::make_shared<TabletSchema>(); | 
|  | TabletColumn column_1(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, | 
|  | FieldType::OLAP_FIELD_TYPE_INT, true); | 
|  | column_1.set_unique_id(1); | 
|  | column_1.set_is_key(true); | 
|  | column_1.set_name("k1"); | 
|  | column_1.set_index_length(4); | 
|  | tablet_schema->append_column(column_1); | 
|  |  | 
|  | // Array type column | 
|  | TabletColumn column_2; | 
|  | column_2.set_unique_id(2); | 
|  | column_2.set_is_key(false); | 
|  | column_2.set_name("array_col"); | 
|  | column_2.set_type(FieldType::OLAP_FIELD_TYPE_ARRAY); | 
|  | column_2.set_is_nullable(false); | 
|  | // Add a primitive type for array items | 
|  | TabletColumn array_item_column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, | 
|  | FieldType::OLAP_FIELD_TYPE_VARCHAR, true); | 
|  | array_item_column.set_is_key(false); | 
|  | array_item_column.set_length(64); | 
|  | column_2.add_sub_column(array_item_column); | 
|  | tablet_schema->append_column(column_2); | 
|  |  | 
|  | // 3. Create tablet | 
|  | auto tablet_meta = create_tablet_meta(); | 
|  | auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); | 
|  | tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(tablet->init().ok()); | 
|  |  | 
|  | // 4. Add inverted index for array column | 
|  | _columns.clear(); | 
|  | TColumn tc1; | 
|  | tc1.column_name = "array_col"; | 
|  | _columns.push_back(tc1); | 
|  |  | 
|  | _alter_indexes.clear(); | 
|  | TOlapTableIndex tt_index; | 
|  | tt_index.index_id = 1; | 
|  | tt_index.index_name = "array_index"; | 
|  | tt_index.columns.emplace_back("array_col"); | 
|  | tt_index.column_unique_ids.push_back(2); | 
|  | tt_index.index_type = TIndexType::type::INVERTED; | 
|  | _alter_indexes.push_back(tt_index); | 
|  |  | 
|  | // 5. Create a rowset writer | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(14685); | 
|  | writer_context.tablet_id = 14685; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | // 6. Create rowset writer | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | // 7. Create data block and write data | 
|  | { | 
|  | vectorized::Block block = tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Prepare columns for k1 and array_col | 
|  | for (int i = 0; i < 1000; i++) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // array_col column | 
|  | // Create array data with 1-5 elements | 
|  | int array_size = i % 5 + 1; | 
|  |  | 
|  | // For array type, we need to create a complex nested column structure | 
|  | auto& array_col = static_cast<vectorized::ColumnArray&>(*columns[1]); | 
|  | vectorized::Array arr; | 
|  | // Add string elements to the array | 
|  | for (int j = 0; j < array_size; j++) { | 
|  | std::string val = "item_" + std::to_string(i) + "_" + std::to_string(j); | 
|  | arr.push_back(vectorized::Field::create_field<TYPE_STRING>(val)); | 
|  | } | 
|  | array_col.insert(vectorized::Field::create_field<TYPE_ARRAY>(arr)); | 
|  | } | 
|  |  | 
|  | // Add block to rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  | } | 
|  |  | 
|  | // 8. Build rowset | 
|  | RowsetSharedPtr rowset; | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  | ASSERT_TRUE(rowset != nullptr); | 
|  | ASSERT_TRUE(tablet->add_rowset(rowset).ok()); | 
|  |  | 
|  | // 9. Initialize and build inverted index | 
|  | IndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false); | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 10. Verify that the index has been created | 
|  | std::string segment_path = local_segment_path( | 
|  | tablet->tablet_path(), | 
|  | extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), | 
|  | 0); | 
|  |  | 
|  | if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { | 
|  | // V1 format | 
|  | auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( | 
|  | InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, ""); | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists) << "Index file not found: " << index_path; | 
|  | } else { | 
|  | // V2+ format | 
|  | auto index_path = InvertedIndexDescriptor::get_index_file_path_v2( | 
|  | InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists) << "Index file not found: " << index_path; | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { | 
|  | // 0. prepare tablet path | 
|  | auto tablet_path = _absolute_dir + "/" + std::to_string(14688); | 
|  | _tablet->_tablet_path = tablet_path; | 
|  | _tablet->_tablet_meta->_schema = _tablet_schema; | 
|  | _tablet->_tablet_meta->_schema->_keys_type = KeysType::UNIQUE_KEYS; | 
|  | _tablet->_tablet_meta->_enable_unique_key_merge_on_write = true; | 
|  | ASSERT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 1. Prepare data for writing | 
|  | RowsetSharedPtr rowset; | 
|  | const int rows_per_segment = 500; | 
|  |  | 
|  | // 2. Create a rowset writer context with segment size set to trigger multiple segments | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id.init(15677); | 
|  | writer_context.tablet_id = 15677; | 
|  | writer_context.tablet_schema_hash = 567997577; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.rowset_state = VISIBLE; | 
|  | writer_context.tablet_schema = _tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  | // Set small segment size to ensure we create multiple segments | 
|  | writer_context.max_rows_per_segment = rows_per_segment; | 
|  |  | 
|  | ASSERT_TRUE(io::global_local_filesystem()->create_directory(writer_context.tablet_path).ok()); | 
|  |  | 
|  | auto res = RowsetFactory::create_rowset_writer(*_engine_ref, writer_context, false); | 
|  | ASSERT_TRUE(res.has_value()) << res.error(); | 
|  | auto rowset_writer = std::move(res).value(); | 
|  |  | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < 1000; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(_tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Create test class that overrides methods to simulate unique key table behavior | 
|  | class TestIndexBuilder : public IndexBuilder { | 
|  | public: | 
|  | TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, | 
|  | const std::vector<TColumn>& columns, | 
|  | const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, | 
|  | bool is_drop_op) | 
|  | : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op) {} | 
|  |  | 
|  | ~TestIndexBuilder() override = default; | 
|  |  | 
|  | // Override to make sure modify_rowsets with UNIQUE_KEYS path is called | 
|  | Status modify_rowsets(const Merger::Statistics* stats = nullptr) override { | 
|  | // Call parent method which should use the UNIQUE_KEYS path | 
|  | return IndexBuilder::modify_rowsets(stats); | 
|  | } | 
|  | }; | 
|  |  | 
|  | _alter_indexes.clear(); | 
|  | TOlapTableIndex tt_index; | 
|  | tt_index.index_id = 1; | 
|  | tt_index.index_name = "k1_index"; | 
|  | tt_index.columns.emplace_back("k1"); | 
|  | tt_index.column_unique_ids.push_back(1); | 
|  | tt_index.index_type = TIndexType::type::INVERTED; | 
|  | _alter_indexes.push_back(tt_index); | 
|  | // 7. Initialize and build inverted index | 
|  | TestIndexBuilder builder(*_engine_ref, _tablet, _columns, _alter_indexes, false); | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 8. Execute build index, which should go through UNIQUE_KEYS path in modify_rowsets | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // 9. Verify that the index was created successfully | 
|  | std::string segment_path = local_segment_path( | 
|  | _tablet->tablet_path(), | 
|  | extract_rowset_id("020000000000000100000000000000000000000000000000_0.dat").to_string(), | 
|  | 0); | 
|  |  | 
|  | if (_tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { | 
|  | auto index_path = InvertedIndexDescriptor::get_index_file_path_v1( | 
|  | InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), 1, ""); | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists) << "Index file not found: " << index_path; | 
|  | } else { | 
|  | auto index_path = InvertedIndexDescriptor::get_index_file_path_v2( | 
|  | InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); | 
|  | bool exists = false; | 
|  | EXPECT_TRUE(io::global_local_filesystem()->exists(index_path, &exists).ok()); | 
|  | EXPECT_TRUE(exists) << "Index file not found: " << index_path; | 
|  | } | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { | 
|  | // 1. Create a test class that overrides handle_single_rowset to simulate error scenarios | 
|  | class TestIndexBuilder : public IndexBuilder { | 
|  | public: | 
|  | TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, | 
|  | const std::vector<TColumn>& columns, | 
|  | const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, | 
|  | bool is_drop_op, bool simulate_non_local_rowset_error = false) | 
|  | : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op), | 
|  | _simulate_non_local_rowset_error(simulate_non_local_rowset_error) {} | 
|  |  | 
|  | ~TestIndexBuilder() override = default; | 
|  |  | 
|  | // Override to simulate error conditions | 
|  | Status handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, | 
|  | std::vector<segment_v2::SegmentSharedPtr>& segments) override { | 
|  | if (_simulate_non_local_rowset_error) { | 
|  | // Simulate the condition where is_local_rowset is false | 
|  | return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", | 
|  | 123, "test_rowset_id"); | 
|  | } | 
|  |  | 
|  | // Call parent method for normal processing | 
|  | return IndexBuilder::handle_single_rowset(output_rowset_meta, segments); | 
|  | } | 
|  |  | 
|  | private: | 
|  | bool _simulate_non_local_rowset_error; | 
|  | }; | 
|  |  | 
|  | // 2. Prepare tablet path | 
|  | std::string tablet_path = _absolute_dir + "/" + std::to_string(14687); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 3. Set up tablet schema and tablet | 
|  | TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); | 
|  |  | 
|  | auto tablet_meta = create_tablet_meta(); | 
|  | tablet_meta->_schema = tablet_schema; | 
|  | auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); | 
|  | tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(tablet->init().ok()); | 
|  |  | 
|  | // 4. Create inverted index definition | 
|  | _alter_indexes.clear(); | 
|  | TOlapTableIndex tt_index; | 
|  | tt_index.index_id = 1; | 
|  | tt_index.index_name = "k1_index"; | 
|  | tt_index.columns.emplace_back("k1"); | 
|  | tt_index.column_unique_ids.push_back(1); | 
|  | tt_index.index_type = TIndexType::type::INVERTED; | 
|  | _alter_indexes.push_back(tt_index); | 
|  |  | 
|  | // 5. Create a rowset | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id = _engine_ref->next_rowset_id(); | 
|  | writer_context.tablet_id = 14687; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.tablet_schema_hash = 1111; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.segments_overlap = NONOVERLAPPING; | 
|  | writer_context.tablet_schema = tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | auto result = tablet->create_rowset_writer(writer_context, false); | 
|  | EXPECT_TRUE(result.has_value()) << result.error(); | 
|  | auto rowset_writer = std::move(result).value(); | 
|  |  | 
|  | { | 
|  | vectorized::Block block = _tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < 1000; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | RowsetSharedPtr rowset; | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  |  | 
|  | // 6. Test error scenario with non-local rowset | 
|  | TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, true); | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // Execute build_index, which should fail due to simulated error | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_FALSE(status.ok()) << "Expected failure but got success"; | 
|  | EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) | 
|  | << "Expected internal error but got: " << status.to_string(); | 
|  | EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) | 
|  | << "Error message doesn't match expected: " << status.to_string(); | 
|  | } | 
|  |  | 
|  | TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { | 
|  | // 1. Create a test class that overrides update_inverted_index_info to simulate error scenarios | 
|  | class TestIndexBuilder : public IndexBuilder { | 
|  | public: | 
|  | TestIndexBuilder(StorageEngine& engine, TabletSharedPtr tablet, | 
|  | const std::vector<TColumn>& columns, | 
|  | const std::vector<doris::TOlapTableIndex>& alter_inverted_indexes, | 
|  | bool is_drop_op, int error_type = 0) | 
|  | : IndexBuilder(engine, tablet, columns, alter_inverted_indexes, is_drop_op), | 
|  | _error_type(error_type) {} | 
|  |  | 
|  | ~TestIndexBuilder() override = default; | 
|  |  | 
|  | // Override update_inverted_index_info to inject errors | 
|  | Status update_inverted_index_info() override { | 
|  | if (_error_type == 1) { | 
|  | // Simulate non-local rowset error in update_inverted_index_info | 
|  | return Status::InternalError("should be local rowset. tablet_id={} rowset_id={}", | 
|  | 123, "test_rowset_id"); | 
|  | } else if (_error_type == 2) { | 
|  | // Simulate size retrieval error | 
|  | return Status::Error<ErrorCode::INIT_FAILED>("debug point: get fs failed"); | 
|  | } | 
|  |  | 
|  | // Call parent method for normal processing | 
|  | return IndexBuilder::update_inverted_index_info(); | 
|  | } | 
|  |  | 
|  | private: | 
|  | int _error_type; // 0: no error, 1: non-local rowset error, 2: size retrieval error | 
|  | }; | 
|  |  | 
|  | // 2. Prepare tablet path | 
|  | std::string tablet_path = _absolute_dir + "/" + std::to_string(14688); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tablet_path).ok()); | 
|  | EXPECT_TRUE(io::global_local_filesystem()->create_directory(tablet_path).ok()); | 
|  |  | 
|  | // 3. Set up tablet schema and tablet | 
|  | TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); | 
|  | create_tablet_schema(tablet_schema, KeysType::DUP_KEYS, 2); | 
|  |  | 
|  | auto tablet_meta = create_tablet_meta(); | 
|  | tablet_meta->_schema = tablet_schema; | 
|  | auto tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, _data_dir.get()); | 
|  | tablet->_tablet_path = tablet_path; | 
|  | ASSERT_TRUE(tablet->init().ok()); | 
|  |  | 
|  | // 4. Create inverted index definition | 
|  | _alter_indexes.clear(); | 
|  | TOlapTableIndex tt_index; | 
|  | tt_index.index_id = 1; | 
|  | tt_index.index_name = "k1_index"; | 
|  | tt_index.columns.emplace_back("k1"); | 
|  | tt_index.column_unique_ids.push_back(1); | 
|  | tt_index.index_type = TIndexType::type::INVERTED; | 
|  | _alter_indexes.push_back(tt_index); | 
|  |  | 
|  | // 5. Create a rowset | 
|  | RowsetWriterContext writer_context; | 
|  | writer_context.rowset_id = _engine_ref->next_rowset_id(); | 
|  | writer_context.tablet_id = 14688; | 
|  | writer_context.tablet_path = tablet_path; | 
|  | writer_context.tablet_schema_hash = 1111; | 
|  | writer_context.partition_id = 10; | 
|  | writer_context.rowset_type = BETA_ROWSET; | 
|  | writer_context.segments_overlap = NONOVERLAPPING; | 
|  | writer_context.tablet_schema = tablet_schema; | 
|  | writer_context.version.first = 10; | 
|  | writer_context.version.second = 10; | 
|  |  | 
|  | auto result = tablet->create_rowset_writer(writer_context, false); | 
|  | EXPECT_TRUE(result.has_value()) << result.error(); | 
|  | auto rowset_writer = std::move(result).value(); | 
|  |  | 
|  | // Write data | 
|  | { | 
|  | vectorized::Block block = tablet_schema->create_block(); | 
|  | auto columns = block.mutate_columns(); | 
|  |  | 
|  | // Add data for k1 and k2 columns | 
|  | for (int i = 0; i < 1000; ++i) { | 
|  | // k1 column (int) | 
|  | int32_t k1 = i * 10; | 
|  | columns[0]->insert_data((const char*)&k1, sizeof(k1)); | 
|  |  | 
|  | // k2 column (int) | 
|  | int32_t k2 = i % 100; | 
|  | columns[1]->insert_data((const char*)&k2, sizeof(k2)); | 
|  | } | 
|  |  | 
|  | // Add the block to the rowset | 
|  | Status s = rowset_writer->add_block(&block); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Flush the writer | 
|  | s = rowset_writer->flush(); | 
|  | ASSERT_TRUE(s.ok()) << s.to_string(); | 
|  |  | 
|  | // Build the rowset | 
|  | RowsetSharedPtr rowset; | 
|  | ASSERT_TRUE(rowset_writer->build(rowset).ok()); | 
|  |  | 
|  | // Add the rowset to the tablet | 
|  | ASSERT_TRUE(tablet->add_rowset(rowset).ok()); | 
|  | } | 
|  | // 6. Test error scenarios | 
|  |  | 
|  | // 6.1 Test non-local rowset error | 
|  | { | 
|  | TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, 1); | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // Execute build_index, which should fail due to simulated error | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_FALSE(status.ok()) << "Expected failure but got success"; | 
|  | EXPECT_TRUE(status.is<ErrorCode::INTERNAL_ERROR>()) | 
|  | << "Expected internal error but got: " << status.to_string(); | 
|  | EXPECT_TRUE(status.to_string().find("should be local rowset") != std::string::npos) | 
|  | << "Error message doesn't match expected: " << status.to_string(); | 
|  | } | 
|  |  | 
|  | // 6.2 Test size retrieval error | 
|  | { | 
|  | TestIndexBuilder builder(*_engine_ref, tablet, _columns, _alter_indexes, false, 2); | 
|  | auto status = builder.init(); | 
|  | EXPECT_TRUE(status.ok()) << status.to_string(); | 
|  |  | 
|  | // Execute build_index, which should fail due to simulated error | 
|  | status = builder.do_build_inverted_index(); | 
|  | EXPECT_FALSE(status.ok()) << "Expected failure but got success"; | 
|  | EXPECT_TRUE(status.is<ErrorCode::INIT_FAILED>()) | 
|  | << "Expected INIT_FAILED but got: " << status.to_string(); | 
|  | EXPECT_TRUE(status.to_string().find("debug point: get fs failed") != std::string::npos) | 
|  | << "Error message doesn't match expected: " << status.to_string(); | 
|  | } | 
|  | } | 
|  |  | 
|  | } // namespace doris |