thirdparty/rocksdb/utilities/persistent_cache/hash_table.h - nifi-minifi-cpp - Git at Google

 //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
 //
 #pragma once

 #ifndef ROCKSDB_LITE

 #include <assert.h>
 #include <list>
 #include <vector>

 #ifdef OS_LINUX
 #include <sys/mman.h>
 #endif

 #include "include/rocksdb/env.h"
 #include "util/mutexlock.h"

 namespace rocksdb {

 // HashTable<T, Hash, Equal>
 //
 // Traditional implementation of hash table with synchronization built on top
 // don't perform very well in multi-core scenarios. This is an implementation
 // designed for multi-core scenarios with high lock contention.
 //
 //                         |<-------- alpha ------------->|
 //               Buckets   Collision list
 //          ---- +----+    +---+---+--- ...... ---+---+---+
 //         /     |    |--->|   |   |              |   |   |
 //        /      +----+    +---+---+--- ...... ---+---+---+
 //       /       |    |
 // Locks/        +----+
 // +--+/         .    .
 // |  |          .    .
 // +--+          .    .
 // |  |          .    .
 // +--+          .    .
 // |  |          .    .
 // +--+          .    .
 //     \         +----+
 //      \        |    |
 //       \       +----+
 //        \      |    |
 //         \---- +----+
 //
 // The lock contention is spread over an array of locks. This helps improve
 // concurrent access. The spine is designed for a certain capacity and load
 // factor. When the capacity planning is done correctly we can expect
 // O(load_factor = 1) insert, access and remove time.
 //
 // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
 // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
 // blocks were of 4K, the hash table can support  a virtual throughput of
 // 6 GB/s.
 //
 // T      Object type (contains both key and value)
 // Hash   Function that returns an hash from type T
 // Equal  Returns if two objects are equal
 //        (We need explicit equal for pointer type)
 //
 template <class T, class Hash, class Equal>
 class HashTable {
  public:
   explicit HashTable(const size_t capacity = 1024 * 1024,
                      const float load_factor = 2.0, const uint32_t nlocks = 256)
       : nbuckets_(
             static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
         nlocks_(nlocks) {
     // pre-conditions
     assert(capacity);
     assert(load_factor);
     assert(nbuckets_);
     assert(nlocks_);

     buckets_.reset(new Bucket[nbuckets_]);
 #ifdef OS_LINUX
     mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
 #endif

     // initialize locks
     locks_.reset(new port::RWMutex[nlocks_]);
 #ifdef OS_LINUX
     mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
 #endif

     // post-conditions
     assert(buckets_);
     assert(locks_);
   }

   virtual ~HashTable() { AssertEmptyBuckets(); }

   //
   // Insert given record to hash table
   //
   bool Insert(const T& t) {
     const uint64_t h = Hash()(t);
     const uint32_t bucket_idx = h % nbuckets_;
     const uint32_t lock_idx = bucket_idx % nlocks_;

     WriteLock _(&locks_[lock_idx]);
     auto& bucket = buckets_[bucket_idx];
     return Insert(&bucket, t);
   }

   // Lookup hash table
   //
   // Please note that read lock should be held by the caller. This is because
   // the caller owns the data, and should hold the read lock as long as he
   // operates on the data.
   bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
     const uint64_t h = Hash()(t);
     const uint32_t bucket_idx = h % nbuckets_;
     const uint32_t lock_idx = bucket_idx % nlocks_;

     port::RWMutex& lock = locks_[lock_idx];
     lock.ReadLock();

     auto& bucket = buckets_[bucket_idx];
     if (Find(&bucket, t, ret)) {
       *ret_lock = &lock;
       return true;
     }

     lock.ReadUnlock();
     return false;
   }

   //
   // Erase a given key from the hash table
   //
   bool Erase(const T& t, T* ret) {
     const uint64_t h = Hash()(t);
     const uint32_t bucket_idx = h % nbuckets_;
     const uint32_t lock_idx = bucket_idx % nlocks_;

     WriteLock _(&locks_[lock_idx]);

     auto& bucket = buckets_[bucket_idx];
     return Erase(&bucket, t, ret);
   }

   // Fetch the mutex associated with a key
   // This call is used to hold the lock for a given data for extended period of
   // time.
   port::RWMutex* GetMutex(const T& t) {
     const uint64_t h = Hash()(t);
     const uint32_t bucket_idx = h % nbuckets_;
     const uint32_t lock_idx = bucket_idx % nlocks_;

     return &locks_[lock_idx];
   }

   void Clear(void (*fn)(T)) {
     for (uint32_t i = 0; i < nbuckets_; ++i) {
       const uint32_t lock_idx = i % nlocks_;
       WriteLock _(&locks_[lock_idx]);
       for (auto& t : buckets_[i].list_) {
         (*fn)(t);
       }
       buckets_[i].list_.clear();
     }
   }

  protected:
   // Models bucket of keys that hash to the same bucket number
   struct Bucket {
     std::list<T> list_;
   };

   // Substitute for std::find with custom comparator operator
   typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
     for (auto it = list->begin(); it != list->end(); ++it) {
       if (Equal()(*it, t)) {
         return it;
       }
     }
     return list->end();
   }

   bool Insert(Bucket* bucket, const T& t) {
     // Check if the key already exists
     auto it = Find(&bucket->list_, t);
     if (it != bucket->list_.end()) {
       return false;
     }

     // insert to bucket
     bucket->list_.push_back(t);
     return true;
   }

   bool Find(Bucket* bucket, const T& t, T* ret) {
     auto it = Find(&bucket->list_, t);
     if (it != bucket->list_.end()) {
       if (ret) {
         *ret = *it;
       }
       return true;
     }
     return false;
   }

   bool Erase(Bucket* bucket, const T& t, T* ret) {
     auto it = Find(&bucket->list_, t);
     if (it != bucket->list_.end()) {
       if (ret) {
         *ret = *it;
       }

       bucket->list_.erase(it);
       return true;
     }
     return false;
   }

   // assert that all buckets are empty
   void AssertEmptyBuckets() {
 #ifndef NDEBUG
     for (size_t i = 0; i < nbuckets_; ++i) {
       WriteLock _(&locks_[i % nlocks_]);
       assert(buckets_[i].list_.empty());
     }
 #endif
   }

   const uint32_t nbuckets_;                 // No. of buckets in the spine
   std::unique_ptr<Bucket[]> buckets_;       // Spine of the hash buckets
   const uint32_t nlocks_;                   // No. of locks
   std::unique_ptr<port::RWMutex[]> locks_;  // Granular locks
 };

 }  // namespace rocksdb

 #endif
	// Copyright (c) 2013, Facebook, Inc. All rights reserved.
	// This source code is licensed under both the GPLv2 (found in the
	// COPYING file in the root directory) and Apache 2.0 License
	// (found in the LICENSE.Apache file in the root directory).
	//
	#pragma once

	#ifndef ROCKSDB_LITE

	#include <assert.h>
	#include <list>
	#include <vector>

	#ifdef OS_LINUX
	#include <sys/mman.h>
	#endif

	#include "include/rocksdb/env.h"
	#include "util/mutexlock.h"

	namespace rocksdb {

	// HashTable<T, Hash, Equal>
	//
	// Traditional implementation of hash table with synchronization built on top
	// don't perform very well in multi-core scenarios. This is an implementation
	// designed for multi-core scenarios with high lock contention.
	//
	// \|<-------- alpha ------------->\|
	// Buckets Collision list
	// ---- +----+ +---+---+--- ...... ---+---+---+
	// / \| \|--->\| \| \| \| \| \|
	// / +----+ +---+---+--- ...... ---+---+---+
	// / \| \|
	// Locks/ +----+
	// +--+/ . .
	// \| \| . .
	// +--+ . .
	// \| \| . .
	// +--+ . .
	// \| \| . .
	// +--+ . .
	// \ +----+
	// \ \| \|
	// \ +----+
	// \ \| \|
	// \---- +----+
	//
	// The lock contention is spread over an array of locks. This helps improve
	// concurrent access. The spine is designed for a certain capacity and load
	// factor. When the capacity planning is done correctly we can expect
	// O(load_factor = 1) insert, access and remove time.
	//
	// Micro benchmark on debug build gives about .5 Million/sec rate of insert,
	// erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
	// blocks were of 4K, the hash table can support a virtual throughput of
	// 6 GB/s.
	//
	// T Object type (contains both key and value)
	// Hash Function that returns an hash from type T
	// Equal Returns if two objects are equal
	// (We need explicit equal for pointer type)
	//
	template <class T, class Hash, class Equal>
	class HashTable {
	public:
	explicit HashTable(const size_t capacity = 1024 * 1024,
	const float load_factor = 2.0, const uint32_t nlocks = 256)
	: nbuckets_(
	static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
	nlocks_(nlocks) {
	// pre-conditions
	assert(capacity);
	assert(load_factor);
	assert(nbuckets_);
	assert(nlocks_);

	buckets_.reset(new Bucket[nbuckets_]);
	#ifdef OS_LINUX
	mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
	#endif

	// initialize locks
	locks_.reset(new port::RWMutex[nlocks_]);
	#ifdef OS_LINUX
	mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
	#endif

	// post-conditions
	assert(buckets_);
	assert(locks_);
	}

	virtual ~HashTable() { AssertEmptyBuckets(); }

	//
	// Insert given record to hash table
	//
	bool Insert(const T& t) {
	const uint64_t h = Hash()(t);
	const uint32_t bucket_idx = h % nbuckets_;
	const uint32_t lock_idx = bucket_idx % nlocks_;

	WriteLock _(&locks_[lock_idx]);
	auto& bucket = buckets_[bucket_idx];
	return Insert(&bucket, t);
	}

	// Lookup hash table
	//
	// Please note that read lock should be held by the caller. This is because
	// the caller owns the data, and should hold the read lock as long as he
	// operates on the data.
	bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
	const uint64_t h = Hash()(t);
	const uint32_t bucket_idx = h % nbuckets_;
	const uint32_t lock_idx = bucket_idx % nlocks_;

	port::RWMutex& lock = locks_[lock_idx];
	lock.ReadLock();

	auto& bucket = buckets_[bucket_idx];
	if (Find(&bucket, t, ret)) {
	*ret_lock = &lock;
	return true;
	}

	lock.ReadUnlock();
	return false;
	}

	//
	// Erase a given key from the hash table
	//
	bool Erase(const T& t, T* ret) {
	const uint64_t h = Hash()(t);
	const uint32_t bucket_idx = h % nbuckets_;
	const uint32_t lock_idx = bucket_idx % nlocks_;

	WriteLock _(&locks_[lock_idx]);

	auto& bucket = buckets_[bucket_idx];
	return Erase(&bucket, t, ret);
	}

	// Fetch the mutex associated with a key
	// This call is used to hold the lock for a given data for extended period of
	// time.
	port::RWMutex* GetMutex(const T& t) {
	const uint64_t h = Hash()(t);
	const uint32_t bucket_idx = h % nbuckets_;
	const uint32_t lock_idx = bucket_idx % nlocks_;

	return &locks_[lock_idx];
	}

	void Clear(void (*fn)(T)) {
	for (uint32_t i = 0; i < nbuckets_; ++i) {
	const uint32_t lock_idx = i % nlocks_;
	WriteLock _(&locks_[lock_idx]);
	for (auto& t : buckets_[i].list_) {
	(*fn)(t);
	}
	buckets_[i].list_.clear();
	}
	}

	protected:
	// Models bucket of keys that hash to the same bucket number
	struct Bucket {
	std::list<T> list_;
	};

	// Substitute for std::find with custom comparator operator
	typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
	for (auto it = list->begin(); it != list->end(); ++it) {
	if (Equal()(*it, t)) {
	return it;
	}
	}
	return list->end();
	}

	bool Insert(Bucket* bucket, const T& t) {
	// Check if the key already exists
	auto it = Find(&bucket->list_, t);
	if (it != bucket->list_.end()) {
	return false;
	}

	// insert to bucket
	bucket->list_.push_back(t);
	return true;
	}

	bool Find(Bucket* bucket, const T& t, T* ret) {
	auto it = Find(&bucket->list_, t);
	if (it != bucket->list_.end()) {
	if (ret) {
	ret = it;
	}
	return true;
	}
	return false;
	}

	bool Erase(Bucket* bucket, const T& t, T* ret) {
	auto it = Find(&bucket->list_, t);
	if (it != bucket->list_.end()) {
	if (ret) {
	ret = it;
	}

	bucket->list_.erase(it);
	return true;
	}
	return false;
	}

	// assert that all buckets are empty
	void AssertEmptyBuckets() {
	#ifndef NDEBUG
	for (size_t i = 0; i < nbuckets_; ++i) {
	WriteLock _(&locks_[i % nlocks_]);
	assert(buckets_[i].list_.empty());
	}
	#endif
	}

	const uint32_t nbuckets_; // No. of buckets in the spine
	std::unique_ptr<Bucket[]> buckets_; // Spine of the hash buckets
	const uint32_t nlocks_; // No. of locks
	std::unique_ptr<port::RWMutex[]> locks_; // Granular locks
	};

	} // namespace rocksdb

	#endif