blob: 669dd5e37dd72bce8ebaf2ea62aa0206840538a1 [file] [log] [blame]
#ifndef RDESTL_HASH_MAP_H
#define RDESTL_HASH_MAP_H
#include "algorithm.h"
#include "allocator.h"
#include "functional.h"
#include "hash.h"
#include "pair.h"
namespace rde
{
// TLoadFactor4 - controls hash map load. 4 means 100% load, ie. hashmap will grow
// when number of items == capacity. Default value of 6 means it grows when
// number of items == capacity * 3/2 (6/4). Higher load == tighter maps, but bigger
// risk of collisions.
template<typename TKey, typename TValue,
class THashFunc = rde::hash<TKey>,
int TLoadFactor4 = 6,
class TKeyEqualFunc = rde::equal_to<TKey>,
class TAllocator = rde::allocator>
class hash_map
{
public:
typedef rde::pair<TKey, TValue> value_type;
private:
struct node
{
static const hash_value_t kUnusedHash = 0xFFFFFFFF;
static const hash_value_t kDeletedHash = 0xFFFFFFFE;
node(): hash(kUnusedHash) {}
RDE_FORCEINLINE bool is_unused() const { return hash == kUnusedHash; }
RDE_FORCEINLINE bool is_deleted() const { return hash == kDeletedHash; }
RDE_FORCEINLINE bool is_occupied() const { return hash < kDeletedHash; }
hash_value_t hash;
value_type data;
};
template<typename TNodePtr, typename TPtr, typename TRef>
class node_iterator
{
friend class hash_map;
public:
typedef forward_iterator_tag iterator_category;
explicit node_iterator(TNodePtr node, const hash_map* map)
: m_node(node),
m_map(map)
{/**/}
template<typename UNodePtr, typename UPtr, typename URef>
node_iterator(const node_iterator<UNodePtr, UPtr, URef>& rhs)
: m_node(rhs.node()),
m_map(rhs.get_map())
{/**/}
TRef operator*() const
{
RDE_ASSERT(m_node != 0);
return m_node->data;
}
TPtr operator->() const
{
return &m_node->data;
}
RDE_FORCEINLINE TNodePtr node() const
{
return m_node;
}
node_iterator& operator++()
{
RDE_ASSERT(m_node != 0);
++m_node;
move_to_next_occupied_node();
return *this;
}
node_iterator operator++(int)
{
node_iterator copy(*this);
++(*this);
return copy;
}
RDE_FORCEINLINE bool operator==(const node_iterator& rhs) const
{
return rhs.m_node == m_node;
}
bool operator!=(const node_iterator& rhs) const
{
return !(rhs == *this);
}
const hash_map* get_map() const { return m_map; }
private:
void move_to_next_occupied_node()
{
// @todo: save nodeEnd in constructor?
TNodePtr nodeEnd = m_map->m_nodes + m_map->bucket_count();
for (/**/; m_node < nodeEnd; ++m_node)
{
if (m_node->is_occupied())
break;
}
}
TNodePtr m_node;
const hash_map* m_map;
};
public:
typedef TKey key_type;
typedef TValue mapped_type;
typedef TAllocator allocator_type;
typedef node_iterator<node*, value_type*, value_type&> iterator;
typedef node_iterator<const node*, const value_type*, const value_type&> const_iterator;
typedef int size_type;
static const size_type kNodeSize = sizeof(node);
static const size_type kInitialCapacity = 64;
hash_map()
: m_nodes(&ms_emptyNode),
m_size(0),
m_capacity(0),
m_capacityMask(0),
m_numUsed(0)
{
RDE_ASSERT((kInitialCapacity & (kInitialCapacity - 1)) == 0); // Must be power-of-two
}
explicit hash_map(const allocator_type& allocator)
: m_nodes(&ms_emptyNode),
m_size(0),
m_capacity(0),
m_capacityMask(0),
m_numUsed(0),
m_allocator(allocator)
{
/**/
}
explicit hash_map(size_type initial_bucket_count,
const allocator_type& allocator = allocator_type())
: m_nodes(&ms_emptyNode),
m_size(0),
m_capacity(0),
m_capacityMask(0),
m_numUsed(0),
m_allocator(allocator)
{
reserve(initial_bucket_count);
}
hash_map(size_type initial_bucket_count,
const THashFunc& hashFunc,
const allocator_type& allocator = allocator_type())
: m_nodes(&ms_emptyNode),
m_size(0),
m_capacity(0),
m_capacityMask(0),
m_numUsed(0),
m_hashFunc(hashFunc),
m_allocator(allocator)
{
reserve(initial_bucket_count);
}
hash_map(const hash_map& rhs, const allocator_type& allocator = allocator_type())
: m_nodes(&ms_emptyNode),
m_size(0),
m_capacity(0),
m_capacityMask(0),
m_numUsed(0),
m_allocator(allocator)
{
*this = rhs;
}
explicit hash_map(e_noinitialize)
{
/**/
}
~hash_map()
{
delete_nodes();
}
iterator begin()
{
iterator it(m_nodes, this);
it.move_to_next_occupied_node();
return it;
}
const_iterator begin() const
{
const_iterator it(m_nodes, this);
it.move_to_next_occupied_node();
return it;
}
iterator end() { return iterator(m_nodes + m_capacity, this); }
const_iterator end() const { return const_iterator(m_nodes + m_capacity, this); }
// @note: Added for compatiblity sake.
// Personally, I consider it "risky". Use find/insert for more
// explicit operations.
mapped_type& operator[](const key_type& key)
{
hash_value_t hash;
node* n = find_for_insert(key, &hash);
if (n == 0 || !n->is_occupied())
{
return insert_at(value_type(key, TValue()), n, hash).first->second;
}
return n->data.second;
}
// @note: Doesn't copy allocator.
hash_map& operator=(const hash_map& rhs)
{
RDE_ASSERT(invariant());
if (&rhs != this)
{
clear();
if (m_capacity < rhs.bucket_count())
{
delete_nodes();
m_nodes = allocate_nodes(rhs.bucket_count());
m_capacity = rhs.bucket_count();
m_capacityMask = m_capacity - 1;
}
rehash(m_capacity, m_nodes, rhs.m_capacity, rhs.m_nodes, false);
m_size = rhs.size();
m_numUsed = rhs.m_numUsed;
}
RDE_ASSERT(invariant());
return *this;
}
void swap(hash_map& rhs)
{
if (&rhs != this)
{
RDE_ASSERT(invariant());
RDE_ASSERT(m_allocator == rhs.m_allocator);
rde::swap(m_nodes, rhs.m_nodes);
rde::swap(m_size, rhs.m_size);
rde::swap(m_capacity, rhs.m_capacity);
rde::swap(m_capacityMask, rhs.m_capacityMask);
rde::swap(m_numUsed, rhs.m_numUsed);
rde::swap(m_hashFunc, rhs.m_hashFunc);
rde::swap(m_keyEqualFunc, rhs.m_keyEqualFunc);
RDE_ASSERT(invariant());
}
}
rde::pair<iterator, bool> insert(const value_type& v)
{
typedef rde::pair<iterator, bool> ret_type_t;
RDE_ASSERT(invariant());
if (m_numUsed * TLoadFactor4 >= m_capacity * 4)
grow();
hash_value_t hash;
node* n = find_for_insert(v.first, &hash);
if (n->is_occupied())
{
RDE_ASSERT(hash == n->hash && m_keyEqualFunc(v.first, n->data.first));
return ret_type_t(iterator(n, this), false);
}
if (n->is_unused())
++m_numUsed;
rde::copy_construct(&n->data, v);
n->hash = hash;
++m_size;
RDE_ASSERT(invariant());
return ret_type_t(iterator(n, this), true);
}
size_type erase(const key_type& key)
{
node* n = lookup(key);
if (n != (m_nodes + m_capacity) && n->is_occupied())
{
erase_node(n);
return 1;
}
return 0;
}
void erase(iterator it)
{
RDE_ASSERT(it.get_map() == this);
if (it != end())
{
RDE_ASSERT(!empty());
erase_node(it.node());
}
}
void erase(iterator from, iterator to)
{
for (/**/; from != to; ++from)
{
node* n = from.node();
if (n->is_occupied())
erase_node(n);
}
}
iterator find(const key_type& key)
{
node* n = lookup(key);
return iterator(n, this);
}
const_iterator find(const key_type& key) const
{
const node* n = lookup(key);
return const_iterator(n, this);
}
void clear()
{
node* endNode = m_nodes + m_capacity;
for (node* iter = m_nodes; iter != endNode; ++iter)
{
if( iter )
{
if (iter->is_occupied())
{
rde::destruct(&iter->data);
}
// We can make them unused, because we clear whole hash_map,
// so we can guarantee there'll be no holes.
iter->hash = node::kUnusedHash;
}
}
m_size = 0;
m_numUsed = 0;
}
void reserve(size_type min_size)
{
size_type newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity);
while (newCapacity < min_size)
newCapacity *= 2;
if (newCapacity > m_capacity)
grow(newCapacity);
}
size_type bucket_count() const { return m_capacity; }
size_type size() const { return m_size; }
size_type empty() const { return size() == 0; }
size_type nonempty_bucket_count() const { return m_numUsed; }
size_type used_memory() const
{
return bucket_count() * kNodeSize;
}
const allocator_type& get_allocator() const { return m_allocator; }
void set_allocator(const allocator_type& allocator)
{
m_allocator = allocator;
}
private:
void grow()
{
const int newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity * 2);
grow(newCapacity);
}
void grow(int new_capacity)
{
RDE_ASSERT((new_capacity & (new_capacity - 1)) == 0); // Must be power-of-two
node* newNodes = allocate_nodes(new_capacity);
rehash(new_capacity, newNodes, m_capacity, m_nodes, true);
if (m_nodes != &ms_emptyNode)
m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
m_capacity = new_capacity;
m_capacityMask = new_capacity - 1;
m_nodes = newNodes;
m_numUsed = m_size;
RDE_ASSERT(m_numUsed < m_capacity);
}
rde::pair<iterator, bool> insert_at(const value_type& v, node* n,
hash_value_t hash)
{
RDE_ASSERT(invariant());
if (n == 0 || m_numUsed * TLoadFactor4 >= m_capacity * 4)
return insert(v);
RDE_ASSERT(!n->is_occupied());
if (n->is_unused())
++m_numUsed;
rde::copy_construct(&n->data, v);
n->hash = hash;
++m_size;
RDE_ASSERT(invariant());
return rde::pair<iterator, bool>(iterator(n, this), true);
}
node* find_for_insert(const key_type& key, hash_value_t* out_hash)
{
if (m_capacity == 0)
return 0;
const hash_value_t hash = hash_func(key);
*out_hash = hash;
uint32 i = hash & m_capacityMask;
node* n = m_nodes + i;
if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
return n;
node* freeNode(0);
if (n->is_deleted())
freeNode = n;
uint32 numProbes(0);
// Guarantees loop termination.
RDE_ASSERT(m_numUsed < m_capacity);
while (!n->is_unused())
{
++numProbes;
i = (i + numProbes) & m_capacityMask;
n = m_nodes + i;
if (compare_key(n, key, hash))
return n;
if (n->is_deleted() && freeNode == 0)
freeNode = n;
}
return freeNode ? freeNode : n;
}
node* lookup(const key_type& key) const
{
const hash_value_t hash = hash_func(key);
uint32 i = hash & m_capacityMask;
node* n = m_nodes + i;
if (n->hash == hash && m_keyEqualFunc(key, n->data.first))
return n;
uint32 numProbes(0);
// Guarantees loop termination.
RDE_ASSERT(m_capacity == 0 || m_numUsed < m_capacity);
while (!n->is_unused())
{
++numProbes;
i = (i + numProbes) & m_capacityMask;
n = m_nodes + i;
if (compare_key(n, key, hash))
return n;
}
return m_nodes + m_capacity;
}
static void rehash(int new_capacity, node* new_nodes,
int capacity, const node* nodes, bool destruct_original)
{
//if (nodes == &ms_emptyNode || new_nodes == &ms_emptyNode)
// return;
const node* it = nodes;
const node* itEnd = nodes + capacity;
const uint32 mask = new_capacity - 1;
while (it != itEnd)
{
if (it->is_occupied())
{
const hash_value_t hash = it->hash;
uint32 i = hash & mask;
node* n = new_nodes + i;
uint32 numProbes(0);
while (!n->is_unused())
{
++numProbes;
i = (i + numProbes) & mask;
n = new_nodes + i;
}
rde::copy_construct(&n->data, it->data);
n->hash = hash;
if (destruct_original)
rde::destruct(&it->data);
}
++it;
}
}
node* allocate_nodes(int n)
{
node* buckets = static_cast<node*>(m_allocator.allocate(n * sizeof(node)));
node* iterBuckets(buckets);
node* end = iterBuckets + n;
for (/**/; iterBuckets != end; ++iterBuckets)
iterBuckets->hash = node::kUnusedHash;
return buckets;
}
void delete_nodes()
{
node* it = m_nodes;
node* itEnd = it + m_capacity;
while (it != itEnd)
{
if (it && it->is_occupied())
rde::destruct(&it->data);
++it;
}
if (m_nodes != &ms_emptyNode)
m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity);
m_capacity = 0;
m_capacityMask = 0;
m_size = 0;
}
void erase_node(node* n)
{
RDE_ASSERT(!empty());
RDE_ASSERT(n->is_occupied());
rde::destruct(&n->data);
n->hash = node::kDeletedHash;
--m_size;
}
RDE_FORCEINLINE hash_value_t hash_func(const key_type& key) const
{
const hash_value_t h = m_hashFunc(key) & 0xFFFFFFFD;
//RDE_ASSERT(h < node::kDeletedHash);
return h;
}
bool invariant() const
{
RDE_ASSERT((m_capacity & (m_capacity - 1)) == 0);
RDE_ASSERT(m_numUsed >= m_size);
return true;
}
RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash) const
{
return (n->hash == hash && m_keyEqualFunc(key, n->data.first));
}
node* m_nodes;
int m_size;
int m_capacity;
uint32 m_capacityMask;
int m_numUsed;
THashFunc m_hashFunc;
TKeyEqualFunc m_keyEqualFunc;
TAllocator m_allocator;
static node ms_emptyNode;
};
// Holy ...
template<typename TKey, typename TValue,
class THashFunc,
int TLoadFactor4,
class TKeyEqualFunc,
class TAllocator>
typename hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::node hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::ms_emptyNode;
}
#endif