| // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
| // This source code is licensed under both the GPLv2 (found in the |
| // COPYING file in the root directory) and Apache 2.0 License |
| // (found in the LICENSE.Apache file in the root directory). |
| // |
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. See the AUTHORS file for names of contributors. |
| |
| #include "table/index_builder.h" |
| #include <assert.h> |
| #include <inttypes.h> |
| |
| #include <list> |
| #include <string> |
| |
| #include "rocksdb/comparator.h" |
| #include "rocksdb/flush_block_policy.h" |
| #include "table/format.h" |
| #include "table/partitioned_filter_block.h" |
| |
| // Without anonymous namespace here, we fail the warning -Wmissing-prototypes |
| namespace rocksdb { |
| // using namespace rocksdb; |
| // Create a index builder based on its type. |
| IndexBuilder* IndexBuilder::CreateIndexBuilder( |
| BlockBasedTableOptions::IndexType index_type, |
| const InternalKeyComparator* comparator, |
| const InternalKeySliceTransform* int_key_slice_transform, |
| const BlockBasedTableOptions& table_opt) { |
| switch (index_type) { |
| case BlockBasedTableOptions::kBinarySearch: { |
| return new ShortenedIndexBuilder(comparator, |
| table_opt.index_block_restart_interval); |
| } |
| case BlockBasedTableOptions::kHashSearch: { |
| return new HashIndexBuilder(comparator, int_key_slice_transform, |
| table_opt.index_block_restart_interval); |
| } |
| case BlockBasedTableOptions::kTwoLevelIndexSearch: { |
| return PartitionedIndexBuilder::CreateIndexBuilder(comparator, table_opt); |
| } |
| default: { |
| assert(!"Do not recognize the index type "); |
| return nullptr; |
| } |
| } |
| // impossible. |
| assert(false); |
| return nullptr; |
| } |
| |
| PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder( |
| const InternalKeyComparator* comparator, |
| const BlockBasedTableOptions& table_opt) { |
| return new PartitionedIndexBuilder(comparator, table_opt); |
| } |
| |
| PartitionedIndexBuilder::PartitionedIndexBuilder( |
| const InternalKeyComparator* comparator, |
| const BlockBasedTableOptions& table_opt) |
| : IndexBuilder(comparator), |
| index_block_builder_(table_opt.index_block_restart_interval), |
| sub_index_builder_(nullptr), |
| table_opt_(table_opt) {} |
| |
| PartitionedIndexBuilder::~PartitionedIndexBuilder() { |
| delete sub_index_builder_; |
| } |
| |
| void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { |
| assert(sub_index_builder_ == nullptr); |
| sub_index_builder_ = new ShortenedIndexBuilder( |
| comparator_, table_opt_.index_block_restart_interval); |
| flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( |
| table_opt_.metadata_block_size, table_opt_.block_size_deviation, |
| sub_index_builder_->index_block_builder_)); |
| partition_cut_requested_ = false; |
| } |
| |
| void PartitionedIndexBuilder::RequestPartitionCut() { |
| partition_cut_requested_ = true; |
| } |
| |
| void PartitionedIndexBuilder::AddIndexEntry( |
| std::string* last_key_in_current_block, |
| const Slice* first_key_in_next_block, const BlockHandle& block_handle) { |
| // Note: to avoid two consecuitive flush in the same method call, we do not |
| // check flush policy when adding the last key |
| if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys |
| if (sub_index_builder_ == nullptr) { |
| MakeNewSubIndexBuilder(); |
| } |
| sub_index_builder_->AddIndexEntry(last_key_in_current_block, |
| first_key_in_next_block, block_handle); |
| sub_index_last_key_ = std::string(*last_key_in_current_block); |
| entries_.push_back( |
| {sub_index_last_key_, |
| std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)}); |
| sub_index_builder_ = nullptr; |
| cut_filter_block = true; |
| } else { |
| // apply flush policy only to non-empty sub_index_builder_ |
| if (sub_index_builder_ != nullptr) { |
| std::string handle_encoding; |
| block_handle.EncodeTo(&handle_encoding); |
| bool do_flush = |
| partition_cut_requested_ || |
| flush_policy_->Update(*last_key_in_current_block, handle_encoding); |
| if (do_flush) { |
| entries_.push_back( |
| {sub_index_last_key_, |
| std::unique_ptr<ShortenedIndexBuilder>(sub_index_builder_)}); |
| cut_filter_block = true; |
| sub_index_builder_ = nullptr; |
| } |
| } |
| if (sub_index_builder_ == nullptr) { |
| MakeNewSubIndexBuilder(); |
| } |
| sub_index_builder_->AddIndexEntry(last_key_in_current_block, |
| first_key_in_next_block, block_handle); |
| sub_index_last_key_ = std::string(*last_key_in_current_block); |
| } |
| } |
| |
| Status PartitionedIndexBuilder::Finish( |
| IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) { |
| assert(!entries_.empty()); |
| // It must be set to null after last key is added |
| assert(sub_index_builder_ == nullptr); |
| if (finishing_indexes == true) { |
| Entry& last_entry = entries_.front(); |
| std::string handle_encoding; |
| last_partition_block_handle.EncodeTo(&handle_encoding); |
| index_block_builder_.Add(last_entry.key, handle_encoding); |
| entries_.pop_front(); |
| } |
| // If there is no sub_index left, then return the 2nd level index. |
| if (UNLIKELY(entries_.empty())) { |
| index_blocks->index_block_contents = index_block_builder_.Finish(); |
| return Status::OK(); |
| } else { |
| // Finish the next partition index in line and Incomplete() to indicate we |
| // expect more calls to Finish |
| Entry& entry = entries_.front(); |
| auto s = entry.value->Finish(index_blocks); |
| finishing_indexes = true; |
| return s.ok() ? Status::Incomplete() : s; |
| } |
| } |
| |
| // Estimate size excluding the top-level index |
| // It is assumed that this method is called before writing index partition |
| // starts |
| size_t PartitionedIndexBuilder::EstimatedSize() const { |
| size_t total = 0; |
| for (auto it = entries_.begin(); it != entries_.end(); ++it) { |
| total += it->value->EstimatedSize(); |
| } |
| total += |
| sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize(); |
| return total; |
| } |
| |
| // Since when this method is called we do not know the index block offsets yet, |
| // the top-level index does not exist. Hence we estimate the block offsets and |
| // create a temporary top-level index. |
| size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize( |
| uint64_t offset) const { |
| BlockBuilder tmp_builder( |
| table_opt_.index_block_restart_interval); // tmp top-level index builder |
| for (auto it = entries_.begin(); it != entries_.end(); ++it) { |
| std::string tmp_handle_encoding; |
| uint64_t size = it->value->EstimatedSize(); |
| BlockHandle tmp_block_handle(offset, size); |
| tmp_block_handle.EncodeTo(&tmp_handle_encoding); |
| tmp_builder.Add(it->key, tmp_handle_encoding); |
| offset += size; |
| } |
| return tmp_builder.CurrentSizeEstimate(); |
| } |
| |
| size_t PartitionedIndexBuilder::NumPartitions() const { |
| return entries_.size(); |
| } |
| } // namespace rocksdb |