blob: 65830368fbb798e20803fc08b27774f237ed576c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Date: Wed Nov 27 12:59:20 CST 2013
#ifndef BUTIL_FLAT_MAP_INL_H
#define BUTIL_FLAT_MAP_INL_H
namespace butil {
inline uint32_t find_next_prime(uint32_t nbucket) {
static const unsigned long prime_list[] = {
29ul,
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
const size_t nprimes = sizeof(prime_list) / sizeof(prime_list[0]);
for (size_t i = 0; i < nprimes; i++) {
if (nbucket <= prime_list[i]) {
return prime_list[i];
}
}
return nbucket;
}
// NOTE: find_power2(0) = 0
inline uint64_t find_power2(uint64_t b) {
b -= 1;
b |= (b >> 1);
b |= (b >> 2);
b |= (b >> 4);
b |= (b >> 8);
b |= (b >> 16);
b |= (b >> 32);
return b + 1;
}
// Using next prime is slower for 10ns on average (due to %). If quality of
// the hash code is good enough, primeness of nbucket is not important. We
// choose to trust the hash code (or user should use a better hash algorithm
// when the collisions are significant) and still stick to round-to-power-2
// solution right now.
inline size_t flatmap_round(size_t nbucket) {
#ifdef FLAT_MAP_ROUND_BUCKET_BY_USE_NEXT_PRIME
return find_next_prime(nbucket);
#else
// the lowerbound fixes the corner case of nbucket=0 which results in coredump during seeking the map.
return nbucket <= 8 ? 8 : find_power2(nbucket);
#endif
}
inline size_t flatmap_mod(size_t hash_code, size_t nbucket) {
#ifdef FLAT_MAP_ROUND_BUCKET_BY_USE_NEXT_PRIME
return hash_code % nbucket;
#else
return hash_code & (nbucket - 1);
#endif
}
// Iterate FlatMap
template <typename Map, typename Value> class FlatMapIterator {
public:
typedef Value value_type;
typedef Value& reference;
typedef Value* pointer;
typedef typename add_const<Value>::type ConstValue;
typedef ConstValue& const_reference;
typedef ConstValue* const_pointer;
typedef std::forward_iterator_tag iterator_category;
typedef ptrdiff_t difference_type;
typedef typename remove_const<Value>::type NonConstValue;
FlatMapIterator() : _node(NULL), _entry(NULL) {}
FlatMapIterator(const Map* map, size_t pos) {
if (map->initialized()) {
_entry = map->_buckets + pos;
find_and_set_valid_node();
} else {
_node = NULL;
_entry = NULL;
}
}
FlatMapIterator(const FlatMapIterator<Map, NonConstValue>& rhs)
: _node(rhs._node), _entry(rhs._entry) {}
~FlatMapIterator() {} // required by style-checker
// *this == rhs
bool operator==(const FlatMapIterator& rhs) const
{ return _node == rhs._node; }
// *this != rhs
bool operator!=(const FlatMapIterator& rhs) const
{ return _node != rhs._node; }
// ++ it
FlatMapIterator& operator++() {
if (NULL == _node->next) {
++_entry;
find_and_set_valid_node();
} else {
_node = _node->next;
}
return *this;
}
// it ++
FlatMapIterator operator++(int) {
FlatMapIterator tmp = *this;
this->operator++();
return tmp;
}
reference operator*() { return _node->element().value_ref(); }
pointer operator->() { return &_node->element().value_ref(); }
const_reference operator*() const { return _node->element().value_ref(); }
const_pointer operator->() const { return &_node->element().value_ref(); }
private:
friend class FlatMapIterator<Map, ConstValue>;
friend class FlatMap<typename Map::key_type, typename Map::mapped_type,
typename Map::hasher, typename Map::key_equal,
false, typename Map::allocator_type>;
void find_and_set_valid_node() {
for (; !_entry->is_valid(); ++_entry);
_node = _entry;
}
typename Map::Bucket* _node;
typename Map::Bucket* _entry;
};
// Iterate SparseFlatMap
template <typename Map, typename Value> class SparseFlatMapIterator {
public:
typedef Value value_type;
typedef Value& reference;
typedef Value* pointer;
typedef typename add_const<Value>::type ConstValue;
typedef ConstValue& const_reference;
typedef ConstValue* const_pointer;
typedef std::forward_iterator_tag iterator_category;
typedef ptrdiff_t difference_type;
typedef typename remove_const<Value>::type NonConstValue;
SparseFlatMapIterator() : _node(NULL), _pos(0), _map(NULL) {}
SparseFlatMapIterator(const Map* map, size_t pos) {
if (map->initialized()) {
_map = map;
_pos = pos;
find_and_set_valid_node();
} else {
_node = NULL;
_map = NULL;
_pos = 0;
}
}
SparseFlatMapIterator(const SparseFlatMapIterator<Map, NonConstValue>& rhs)
: _node(rhs._node), _pos(rhs._pos), _map(rhs._map)
{}
~SparseFlatMapIterator() {} // required by style-checker
// *this == rhs
bool operator==(const SparseFlatMapIterator& rhs) const
{ return _node == rhs._node; }
// *this != rhs
bool operator!=(const SparseFlatMapIterator& rhs) const
{ return _node != rhs._node; }
// ++ it
SparseFlatMapIterator& operator++() {
if (NULL == _node->next) {
++_pos;
find_and_set_valid_node();
} else {
_node = _node->next;
}
return *this;
}
// it ++
SparseFlatMapIterator operator++(int) {
SparseFlatMapIterator tmp = *this;
this->operator++();
return tmp;
}
reference operator*() { return _node->element().value_ref(); }
pointer operator->() { return &_node->element().value_ref(); }
const_reference operator*() const { return _node->element().value_ref(); }
const_pointer operator->() const { return &_node->element().value_ref(); }
private:
friend class SparseFlatMapIterator<Map, ConstValue>;
void find_and_set_valid_node() {
if (!_map->_buckets[_pos].is_valid()) {
_pos = bit_array_first1(_map->_thumbnail, _pos + 1, _map->_nbucket);
}
_node = _map->_buckets + _pos;
}
typename Map::Bucket* _node;
size_t _pos;
const Map* _map;
};
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
FlatMap<_K, _T, _H, _E, _S, _A>::FlatMap(const hasher& hashfn, const key_equal& eql, const allocator_type& alloc)
: _size(0)
, _nbucket(0)
, _buckets(NULL)
, _thumbnail(NULL)
, _load_factor(0)
, _hashfn(hashfn)
, _eql(eql)
, _pool(alloc)
{}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
FlatMap<_K, _T, _H, _E, _S, _A>::~FlatMap() {
clear();
get_allocator().Free(_buckets);
_buckets = NULL;
free(_thumbnail);
_thumbnail = NULL;
_nbucket = 0;
_load_factor = 0;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
FlatMap<_K, _T, _H, _E, _S, _A>::FlatMap(const FlatMap& rhs)
: _size(0)
, _nbucket(0)
, _buckets(NULL)
, _thumbnail(NULL)
, _load_factor(rhs._load_factor)
, _hashfn(rhs._hashfn)
, _eql(rhs._eql) {
operator=(rhs);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
FlatMap<_K, _T, _H, _E, _S, _A>&
FlatMap<_K, _T, _H, _E, _S, _A>::operator=(const FlatMap<_K, _T, _H, _E, _S, _A>& rhs) {
if (this == &rhs) {
return *this;
}
// NOTE: assignment does not change _load_factor/_hashfn/_eql if |this| is
// initialized
clear();
if (!rhs.initialized()) {
return *this;
}
bool need_copy = !rhs.empty();
_load_factor = rhs._load_factor;
if (_buckets == NULL || is_too_crowded(rhs._size)) {
get_allocator().Free(_buckets);
_nbucket = rhs._nbucket;
// note: need an extra bucket to let iterator know where buckets end
_buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1/*note*/));
if (NULL == _buckets) {
LOG(ERROR) << "Fail to new _buckets";
return *this;
}
// If no need to copy, set buckets invalid.
if (!need_copy) {
for (size_t i = 0; i < _nbucket; ++i) {
_buckets[i].set_invalid();
}
_buckets[_nbucket].next = NULL;
}
if (_S) {
free(_thumbnail);
_thumbnail = bit_array_malloc(_nbucket);
if (NULL == _thumbnail) {
LOG(ERROR) << "Fail to new _thumbnail";
return *this;
}
bit_array_clear(_thumbnail, _nbucket);
}
}
if (!need_copy) {
return *this;
}
if (_nbucket == rhs._nbucket) {
// For equivalent _nbucket, walking through _buckets instead of using
// iterators is more efficient.
for (size_t i = 0; i < rhs._nbucket; ++i) {
if (!rhs._buckets[i].is_valid()) {
_buckets[i].set_invalid();
} else {
if (_S) {
bit_array_set(_thumbnail, i);
}
new (&_buckets[i]) Bucket(rhs._buckets[i]);
Bucket* p1 = &_buckets[i];
Bucket* p2 = rhs._buckets[i].next;
while (p2) {
p1->next = new (_pool.get()) Bucket(*p2);
p1 = p1->next;
p2 = p2->next;
}
}
}
_buckets[rhs._nbucket].next = NULL;
_size = rhs._size;
} else {
for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
operator[](Element::first_ref_from_value(*it)) =
Element::second_ref_from_value(*it);
}
}
return *this;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
int FlatMap<_K, _T, _H, _E, _S, _A>::init(size_t nbucket, u_int load_factor) {
if (initialized()) {
LOG(ERROR) << "Already initialized";
return -1;
}
if (nbucket == 0) {
LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
return -1;
}
if (load_factor < 10 || load_factor > 100) {
LOG(ERROR) << "Invalid load_factor=" << load_factor;
return -1;
}
_size = 0;
_nbucket = flatmap_round(nbucket);
_load_factor = load_factor;
_buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1));
if (NULL == _buckets) {
LOG(ERROR) << "Fail to new _buckets";
return -1;
}
for (size_t i = 0; i < _nbucket; ++i) {
_buckets[i].set_invalid();
}
_buckets[_nbucket].next = NULL;
if (_S) {
_thumbnail = bit_array_malloc(_nbucket);
if (NULL == _thumbnail) {
LOG(ERROR) << "Fail to new _thumbnail";
return -1;
}
bit_array_clear(_thumbnail, _nbucket);
}
return 0;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
void FlatMap<_K, _T, _H, _E, _S, _A>::swap(FlatMap<_K, _T, _H, _E, _S, _A> & rhs) {
std::swap(rhs._size, _size);
std::swap(rhs._nbucket, _nbucket);
std::swap(rhs._buckets, _buckets);
std::swap(rhs._thumbnail, _thumbnail);
std::swap(rhs._load_factor, _load_factor);
std::swap(rhs._hashfn, _hashfn);
std::swap(rhs._eql, _eql);
rhs._pool.swap(_pool);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
_T* FlatMap<_K, _T, _H, _E, _S, _A>::insert(const key_type& key,
const mapped_type& value) {
mapped_type *p = &operator[](key);
*p = value;
return p;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
_T* FlatMap<_K, _T, _H, _E, _S, _A>::insert(const std::pair<key_type, mapped_type>& kv) {
return insert(kv.first, kv.second);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
template <typename K2>
size_t FlatMap<_K, _T, _H, _E, _S, _A>::erase(const K2& key, _T* old_value) {
if (!initialized()) {
return 0;
}
// TODO: Do we need auto collapsing here?
const size_t index = flatmap_mod(_hashfn(key), _nbucket);
Bucket& first_node = _buckets[index];
if (!first_node.is_valid()) {
return 0;
}
if (_eql(first_node.element().first_ref(), key)) {
if (old_value) {
*old_value = first_node.element().second_movable_ref();
}
if (first_node.next == NULL) {
first_node.element().~Element();
first_node.set_invalid();
if (_S) {
bit_array_unset(_thumbnail, index);
}
} else {
// A seemingly correct solution is to copy the memory of *p to
// first_node directly like this:
// first_node.element().~Element();
// first_node = *p;
// It works at most of the time, but is wrong generally.
// If _T references self inside like this:
// Value {
// Value() : num(0), num_ptr(&num) {}
// int num;
// int* num_ptr;
// };
// After copying, num_ptr will be invalid.
// Calling operator= is the price that we have to pay.
Bucket* p = first_node.next;
first_node.next = p->next;
const_cast<_K&>(first_node.element().first_ref()) =
p->element().first_ref();
first_node.element().second_ref() = p->element().second_movable_ref();
p->element().~Element();
_pool.back(p);
}
--_size;
return 1UL;
}
Bucket *p = first_node.next;
Bucket *last_p = &first_node;
while (p) {
if (_eql(p->element().first_ref(), key)) {
if (old_value) {
*old_value = p->element().second_movable_ref();
}
last_p->next = p->next;
p->element().~Element();
_pool.back(p);
--_size;
return 1UL;
}
last_p = p;
p = p->next;
}
return 0;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
void FlatMap<_K, _T, _H, _E, _S, _A>::clear() {
if (0 == _size) {
return;
}
_size = 0;
if (NULL != _buckets) {
for (size_t i = 0; i < _nbucket; ++i) {
Bucket& first_node = _buckets[i];
if (first_node.is_valid()) {
first_node.element().~Element();
Bucket* p = first_node.next;
while (p) {
Bucket* next_p = p->next;
p->element().~Element();
_pool.back(p);
p = next_p;
}
first_node.set_invalid();
}
}
}
if (NULL != _thumbnail) {
bit_array_clear(_thumbnail, _nbucket);
}
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
void FlatMap<_K, _T, _H, _E, _S, _A>::clear_and_reset_pool() {
clear();
_pool.reset();
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
template <typename K2>
_T* FlatMap<_K, _T, _H, _E, _S, _A>::seek(const K2& key) const {
if (!initialized()) {
return NULL;
}
Bucket& first_node = _buckets[flatmap_mod(_hashfn(key), _nbucket)];
if (!first_node.is_valid()) {
return NULL;
}
if (_eql(first_node.element().first_ref(), key)) {
return &first_node.element().second_ref();
}
Bucket *p = first_node.next;
while (p) {
if (_eql(p->element().first_ref(), key)) {
return &p->element().second_ref();
}
p = p->next;
}
return NULL;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
_T& FlatMap<_K, _T, _H, _E, _S, _A>::operator[](const key_type& key) {
const size_t index = flatmap_mod(_hashfn(key), _nbucket);
Bucket& first_node = _buckets[index];
if (!first_node.is_valid()) {
++_size;
if (_S) {
bit_array_set(_thumbnail, index);
}
new (&first_node) Bucket(key);
return first_node.element().second_ref();
}
Bucket *p = &first_node;
while (1) {
if (_eql(p->element().first_ref(), key)) {
return p->element().second_ref();
}
if (NULL == p->next) {
if (is_too_crowded(_size)) {
if (resize(_nbucket + 1)) {
return operator[](key);
}
// fail to resize is OK
}
++_size;
Bucket* newp = new (_pool.get()) Bucket(key);
p->next = newp;
return newp->element().second_ref();
}
p = p->next;
}
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
void FlatMap<_K, _T, _H, _E, _S, _A>::save_iterator(
const const_iterator& it, PositionHint* hint) const {
hint->nbucket = _nbucket;
hint->offset = it._entry - _buckets;
if (it != end()) {
hint->at_entry = (it._entry == it._node);
hint->key = it->first;
} else {
hint->at_entry = false;
hint->key = key_type();
}
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
typename FlatMap<_K, _T, _H, _E, _S, _A>::const_iterator
FlatMap<_K, _T, _H, _E, _S, _A>::restore_iterator(const PositionHint& hint) const {
if (hint.nbucket != _nbucket) // resized
return begin(); // restart
if (hint.offset >= _nbucket) // invalid hint, stop the iteration
return end();
Bucket& first_node = _buckets[hint.offset];
if (hint.at_entry) {
return const_iterator(this, hint.offset);
}
if (!first_node.is_valid()) {
// All elements hashed to the entry were removed, try next entry.
return const_iterator(this, hint.offset + 1);
}
Bucket *p = &first_node;
do {
if (_eql(p->element().first_ref(), hint.key)) {
const_iterator it;
it._node = p;
it._entry = &first_node;
return it;
}
p = p->next;
} while (p);
// Last element that we iterated (and saved in PositionHint) was removed,
// don't know which element to start, just restart at the beginning of
// the entry. Some elements in the entry may be revisited, which
// shouldn't be often.
return const_iterator(this, hint.offset);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
bool FlatMap<_K, _T, _H, _E, _S, _A>::resize(size_t nbucket2) {
nbucket2 = flatmap_round(nbucket2);
if (_nbucket == nbucket2) {
return false;
}
// NOTE: following functors must be kept after resizing otherwise the
// internal state is lost.
FlatMap new_map(_hashfn, _eql, get_allocator());
if (new_map.init(nbucket2, _load_factor) != 0) {
LOG(ERROR) << "Fail to init new_map, nbucket=" << nbucket2;
return false;
}
for (iterator it = begin(); it != end(); ++it) {
new_map[Element::first_ref_from_value(*it)] =
Element::second_movable_ref_from_value(*it);
}
new_map.swap(*this);
return true;
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
BucketInfo FlatMap<_K, _T, _H, _E, _S, _A>::bucket_info() const {
size_t max_n = 0;
size_t nentry = 0;
for (size_t i = 0; i < _nbucket; ++i) {
if (_buckets[i].is_valid()) {
size_t n = 1;
for (Bucket* p = _buckets[i].next; p; p = p->next, ++n);
max_n = std::max(max_n, n);
++nentry;
}
}
const BucketInfo info = { max_n, size() / (double)nentry };
return info;
}
inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) {
return os << "{maxb=" << info.longest_length
<< " avgb=" << info.average_length << '}';
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
typename FlatMap<_K, _T, _H, _E, _S, _A>::iterator FlatMap<_K, _T, _H, _E, _S, _A>::begin() {
return iterator(this, 0);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
typename FlatMap<_K, _T, _H, _E, _S, _A>::iterator FlatMap<_K, _T, _H, _E, _S, _A>::end() {
return iterator(this, _nbucket);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
typename FlatMap<_K, _T, _H, _E, _S, _A>::const_iterator FlatMap<_K, _T, _H, _E, _S, _A>::begin() const {
return const_iterator(this, 0);
}
template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A>
typename FlatMap<_K, _T, _H, _E, _S, _A>::const_iterator FlatMap<_K, _T, _H, _E, _S, _A>::end() const {
return const_iterator(this, _nbucket);
}
} // namespace butil
#endif //BUTIL_FLAT_MAP_INL_H