thirdparty/rocksdb/table/plain_table_index.cc - nifi-minifi-cpp - Git at Google

 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).

 #ifndef ROCKSDB_LITE

 #ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS
 #endif

 #include <inttypes.h>

 #include "table/plain_table_index.h"
 #include "util/coding.h"
 #include "util/hash.h"

 namespace rocksdb {

 namespace {
 inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
   assert(num_buckets > 0);
   return hash % num_buckets;
 }
 }

 Status PlainTableIndex::InitFromRawData(Slice data) {
   if (!GetVarint32(&data, &index_size_)) {
     return Status::Corruption("Couldn't read the index size!");
   }
   assert(index_size_ > 0);
   if (!GetVarint32(&data, &num_prefixes_)) {
     return Status::Corruption("Couldn't read the index size!");
   }
   sub_index_size_ =
       static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;

   char* index_data_begin = const_cast<char*>(data.data());
   index_ = reinterpret_cast<uint32_t*>(index_data_begin);
   sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
   return Status::OK();
 }

 PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
     uint32_t prefix_hash, uint32_t* bucket_value) const {
   int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
   GetUnaligned(index_ + bucket, bucket_value);
   if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
     *bucket_value ^= kSubIndexMask;
     return kSubindex;
   }
   if (*bucket_value >= kMaxFileSize) {
     return kNoPrefixForBucket;
   } else {
     // point directly to the file
     return kDirectToFile;
   }
 }

 void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
                                                         uint32_t offset) {
   if (num_records_in_current_group_ == kNumRecordsPerGroup) {
     current_group_ = AllocateNewGroup();
     num_records_in_current_group_ = 0;
   }
   auto& new_record = current_group_[num_records_in_current_group_++];
   new_record.hash = hash;
   new_record.offset = offset;
   new_record.next = nullptr;
 }

 void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
                                           uint32_t key_offset) {
   if (is_first_record_ || prev_key_prefix_ != key_prefix_slice.ToString()) {
     ++num_prefixes_;
     if (!is_first_record_) {
       keys_per_prefix_hist_.Add(num_keys_per_prefix_);
     }
     num_keys_per_prefix_ = 0;
     prev_key_prefix_ = key_prefix_slice.ToString();
     prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
     due_index_ = true;
   }

   if (due_index_) {
     // Add an index key for every kIndexIntervalForSamePrefixKeys keys
     record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
     due_index_ = false;
   }

   num_keys_per_prefix_++;
   if (index_sparseness_ == 0 || num_keys_per_prefix_ % index_sparseness_ == 0) {
     due_index_ = true;
   }
   is_first_record_ = false;
 }

 Slice PlainTableIndexBuilder::Finish() {
   AllocateIndex();
   std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
   std::vector<uint32_t> entries_per_bucket(index_size_, 0);
   BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);

   keys_per_prefix_hist_.Add(num_keys_per_prefix_);
   ROCKS_LOG_INFO(ioptions_.info_log, "Number of Keys per prefix Histogram: %s",
                  keys_per_prefix_hist_.ToString().c_str());

   // From the temp data structure, populate indexes.
   return FillIndexes(hash_to_offsets, entries_per_bucket);
 }

 void PlainTableIndexBuilder::AllocateIndex() {
   if (prefix_extractor_ == nullptr || hash_table_ratio_ <= 0) {
     // Fall back to pure binary search if the user fails to specify a prefix
     // extractor.
     index_size_ = 1;
   } else {
     double hash_table_size_multipier = 1.0 / hash_table_ratio_;
     index_size_ =
       static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
     assert(index_size_ > 0);
   }
 }

 void PlainTableIndexBuilder::BucketizeIndexes(
     std::vector<IndexRecord*>* hash_to_offsets,
     std::vector<uint32_t>* entries_per_bucket) {
   bool first = true;
   uint32_t prev_hash = 0;
   size_t num_records = record_list_.GetNumRecords();
   for (size_t i = 0; i < num_records; i++) {
     IndexRecord* index_record = record_list_.At(i);
     uint32_t cur_hash = index_record->hash;
     if (first || prev_hash != cur_hash) {
       prev_hash = cur_hash;
       first = false;
     }
     uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
     IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
     index_record->next = prev_bucket_head;
     (*hash_to_offsets)[bucket] = index_record;
     (*entries_per_bucket)[bucket]++;
   }

   sub_index_size_ = 0;
   for (auto entry_count : *entries_per_bucket) {
     if (entry_count <= 1) {
       continue;
     }
     // Only buckets with more than 1 entry will have subindex.
     sub_index_size_ += VarintLength(entry_count);
     // total bytes needed to store these entries' in-file offsets.
     sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
   }
 }

 Slice PlainTableIndexBuilder::FillIndexes(
     const std::vector<IndexRecord*>& hash_to_offsets,
     const std::vector<uint32_t>& entries_per_bucket) {
   ROCKS_LOG_DEBUG(ioptions_.info_log,
                   "Reserving %" PRIu32 " bytes for plain table's sub_index",
                   sub_index_size_);
   auto total_allocate_size = GetTotalSize();
   char* allocated = arena_->AllocateAligned(
       total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);

   auto temp_ptr = EncodeVarint32(allocated, index_size_);
   uint32_t* index =
       reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
   char* sub_index = reinterpret_cast<char*>(index + index_size_);

   uint32_t sub_index_offset = 0;
   for (uint32_t i = 0; i < index_size_; i++) {
     uint32_t num_keys_for_bucket = entries_per_bucket[i];
     switch (num_keys_for_bucket) {
       case 0:
         // No key for bucket
         PutUnaligned(index + i, (uint32_t)PlainTableIndex::kMaxFileSize);
         break;
       case 1:
         // point directly to the file offset
         PutUnaligned(index + i, hash_to_offsets[i]->offset);
         break;
       default:
         // point to second level indexes.
         PutUnaligned(index + i, sub_index_offset | PlainTableIndex::kSubIndexMask);
         char* prev_ptr = &sub_index[sub_index_offset];
         char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
         sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
         char* sub_index_pos = &sub_index[sub_index_offset];
         IndexRecord* record = hash_to_offsets[i];
         int j;
         for (j = num_keys_for_bucket - 1; j >= 0 && record;
              j--, record = record->next) {
           EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
         }
         assert(j == -1 && record == nullptr);
         sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
         assert(sub_index_offset <= sub_index_size_);
         break;
     }
   }
   assert(sub_index_offset == sub_index_size_);

   ROCKS_LOG_DEBUG(ioptions_.info_log,
                   "hash table size: %d, suffix_map length %" ROCKSDB_PRIszt,
                   index_size_, sub_index_size_);
   return Slice(allocated, GetTotalSize());
 }

 const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
     "PlainTableIndexBlock";
 };  // namespace rocksdb

 #endif  // ROCKSDB_LITE
	// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
	// This source code is licensed under both the GPLv2 (found in the
	// COPYING file in the root directory) and Apache 2.0 License
	// (found in the LICENSE.Apache file in the root directory).

	#ifndef ROCKSDB_LITE

	#ifndef __STDC_FORMAT_MACROS
	#define __STDC_FORMAT_MACROS
	#endif

	#include <inttypes.h>

	#include "table/plain_table_index.h"
	#include "util/coding.h"
	#include "util/hash.h"

	namespace rocksdb {

	namespace {
	inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
	assert(num_buckets > 0);
	return hash % num_buckets;
	}
	}

	Status PlainTableIndex::InitFromRawData(Slice data) {
	if (!GetVarint32(&data, &index_size_)) {
	return Status::Corruption("Couldn't read the index size!");
	}
	assert(index_size_ > 0);
	if (!GetVarint32(&data, &num_prefixes_)) {
	return Status::Corruption("Couldn't read the index size!");
	}
	sub_index_size_ =
	static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;

	char* index_data_begin = const_cast<char*>(data.data());
	index_ = reinterpret_cast<uint32_t*>(index_data_begin);
	sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
	return Status::OK();
	}

	PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
	uint32_t prefix_hash, uint32_t* bucket_value) const {
	int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
	GetUnaligned(index_ + bucket, bucket_value);
	if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
	*bucket_value ^= kSubIndexMask;
	return kSubindex;
	}
	if (*bucket_value >= kMaxFileSize) {
	return kNoPrefixForBucket;
	} else {
	// point directly to the file
	return kDirectToFile;
	}
	}

	void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
	uint32_t offset) {
	if (num_records_in_current_group_ == kNumRecordsPerGroup) {
	current_group_ = AllocateNewGroup();
	num_records_in_current_group_ = 0;
	}
	auto& new_record = current_group_[num_records_in_current_group_++];
	new_record.hash = hash;
	new_record.offset = offset;
	new_record.next = nullptr;
	}

	void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
	uint32_t key_offset) {
	if (is_first_record_ \|\| prev_key_prefix_ != key_prefix_slice.ToString()) {
	++num_prefixes_;
	if (!is_first_record_) {
	keys_per_prefix_hist_.Add(num_keys_per_prefix_);
	}
	num_keys_per_prefix_ = 0;
	prev_key_prefix_ = key_prefix_slice.ToString();
	prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
	due_index_ = true;
	}

	if (due_index_) {
	// Add an index key for every kIndexIntervalForSamePrefixKeys keys
	record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
	due_index_ = false;
	}

	num_keys_per_prefix_++;
	if (index_sparseness_ == 0 \|\| num_keys_per_prefix_ % index_sparseness_ == 0) {
	due_index_ = true;
	}
	is_first_record_ = false;
	}

	Slice PlainTableIndexBuilder::Finish() {
	AllocateIndex();
	std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
	std::vector<uint32_t> entries_per_bucket(index_size_, 0);
	BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);

	keys_per_prefix_hist_.Add(num_keys_per_prefix_);
	ROCKS_LOG_INFO(ioptions_.info_log, "Number of Keys per prefix Histogram: %s",
	keys_per_prefix_hist_.ToString().c_str());

	// From the temp data structure, populate indexes.
	return FillIndexes(hash_to_offsets, entries_per_bucket);
	}

	void PlainTableIndexBuilder::AllocateIndex() {
	if (prefix_extractor_ == nullptr \|\| hash_table_ratio_ <= 0) {
	// Fall back to pure binary search if the user fails to specify a prefix
	// extractor.
	index_size_ = 1;
	} else {
	double hash_table_size_multipier = 1.0 / hash_table_ratio_;
	index_size_ =
	static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
	assert(index_size_ > 0);
	}
	}

	void PlainTableIndexBuilder::BucketizeIndexes(
	std::vector<IndexRecord> hash_to_offsets,
	std::vector<uint32_t>* entries_per_bucket) {
	bool first = true;
	uint32_t prev_hash = 0;
	size_t num_records = record_list_.GetNumRecords();
	for (size_t i = 0; i < num_records; i++) {
	IndexRecord* index_record = record_list_.At(i);
	uint32_t cur_hash = index_record->hash;
	if (first \|\| prev_hash != cur_hash) {
	prev_hash = cur_hash;
	first = false;
	}
	uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
	IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
	index_record->next = prev_bucket_head;
	(*hash_to_offsets)[bucket] = index_record;
	(*entries_per_bucket)[bucket]++;
	}

	sub_index_size_ = 0;
	for (auto entry_count : *entries_per_bucket) {
	if (entry_count <= 1) {
	continue;
	}
	// Only buckets with more than 1 entry will have subindex.
	sub_index_size_ += VarintLength(entry_count);
	// total bytes needed to store these entries' in-file offsets.
	sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
	}
	}

	Slice PlainTableIndexBuilder::FillIndexes(
	const std::vector<IndexRecord*>& hash_to_offsets,
	const std::vector<uint32_t>& entries_per_bucket) {
	ROCKS_LOG_DEBUG(ioptions_.info_log,
	"Reserving %" PRIu32 " bytes for plain table's sub_index",
	sub_index_size_);
	auto total_allocate_size = GetTotalSize();
	char* allocated = arena_->AllocateAligned(
	total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);

	auto temp_ptr = EncodeVarint32(allocated, index_size_);
	uint32_t* index =
	reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
	char* sub_index = reinterpret_cast<char*>(index + index_size_);

	uint32_t sub_index_offset = 0;
	for (uint32_t i = 0; i < index_size_; i++) {
	uint32_t num_keys_for_bucket = entries_per_bucket[i];
	switch (num_keys_for_bucket) {
	case 0:
	// No key for bucket
	PutUnaligned(index + i, (uint32_t)PlainTableIndex::kMaxFileSize);
	break;
	case 1:
	// point directly to the file offset
	PutUnaligned(index + i, hash_to_offsets[i]->offset);
	break;
	default:
	// point to second level indexes.
	PutUnaligned(index + i, sub_index_offset \| PlainTableIndex::kSubIndexMask);
	char* prev_ptr = &sub_index[sub_index_offset];
	char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
	sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
	char* sub_index_pos = &sub_index[sub_index_offset];
	IndexRecord* record = hash_to_offsets[i];
	int j;
	for (j = num_keys_for_bucket - 1; j >= 0 && record;
	j--, record = record->next) {
	EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
	}
	assert(j == -1 && record == nullptr);
	sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
	assert(sub_index_offset <= sub_index_size_);
	break;
	}
	}
	assert(sub_index_offset == sub_index_size_);

	ROCKS_LOG_DEBUG(ioptions_.info_log,
	"hash table size: %d, suffix_map length %" ROCKSDB_PRIszt,
	index_size_, sub_index_size_);
	return Slice(allocated, GetTotalSize());
	}

	const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
	"PlainTableIndexBlock";
	}; // namespace rocksdb

	#endif // ROCKSDB_LITE