Merge pull request #181 from apache/fi_allocator
support passing an allocator instance
diff --git a/fi/include/frequent_items_sketch.hpp b/fi/include/frequent_items_sketch.hpp
index b2a891f..6efe2b9 100644
--- a/fi/include/frequent_items_sketch.hpp
+++ b/fi/include/frequent_items_sketch.hpp
@@ -40,15 +40,20 @@
enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES };
-// for serialization as raw bytes
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-
// type W for weight must be an arithmetic type (integral or floating point)
-template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename S = serde<T>, typename A = std::allocator<T>>
+template<
+ typename T,
+ typename W = uint64_t,
+ typename H = std::hash<T>,
+ typename E = std::equal_to<T>,
+ typename S = serde<T>,
+ typename A = std::allocator<T>
+>
class frequent_items_sketch {
public:
+ static const uint8_t LG_MIN_MAP_SIZE = 3;
+
/**
* Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
*
@@ -59,7 +64,7 @@
* @param lg_start_map_size Log2 of the starting physical size of the internal hash
* map managed by this sketch.
*/
- explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE);
+ explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
/**
* Update this sketch with an item and a positive weight (frequency count).
@@ -232,7 +237,8 @@
// This is a convenience alias for users
// The type returned by the following serialize method
- typedef vector_u8<A> vector_bytes;
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+
/**
* This method serializes the sketch as a vector of bytes.
@@ -249,7 +255,7 @@
* @param is input stream
* @return an instance of the sketch
*/
- static frequent_items_sketch deserialize(std::istream& is);
+ static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
/**
* This method deserializes a sketch from a given array of bytes.
@@ -257,7 +263,7 @@
* @param size the size of the array
* @return an instance of the sketch
*/
- static frequent_items_sketch deserialize(const void* bytes, size_t size);
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
/**
* Returns a human readable summary of this sketch
@@ -266,7 +272,6 @@
string<A> to_string(bool print_items = false) const;
private:
- static const uint8_t LG_MIN_MAP_SIZE = 3;
static const uint8_t SERIAL_VERSION = 1;
static const uint8_t FAMILY_ID = 10;
static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
diff --git a/fi/include/frequent_items_sketch_impl.hpp b/fi/include/frequent_items_sketch_impl.hpp
index bb05aa9..df4352e 100644
--- a/fi/include/frequent_items_sketch_impl.hpp
+++ b/fi/include/frequent_items_sketch_impl.hpp
@@ -33,10 +33,14 @@
const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
+frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator):
total_weight(0),
offset(0),
-map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
+map(
+ std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
+ std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
+ allocator
+)
{
if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
}
@@ -142,7 +146,7 @@
template<typename T, typename W, typename H, typename E, typename S, typename A>
typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
- vector_row items;
+ vector_row items(map.get_allocator());
for (auto &it: map) {
const W lb = it.second;
const W ub = it.second + offset;
@@ -182,19 +186,21 @@
os.write((char*)&offset, sizeof(offset));
// copy active items and their weights to use batch serialization
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
- W* weights = AllocW().allocate(num_items);
- T* items = A().allocate(num_items);
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+ AllocW aw(map.get_allocator());
+ W* weights = aw.allocate(num_items);
+ A alloc(map.get_allocator());
+ T* items = alloc.allocate(num_items);
uint32_t i = 0;
for (auto &it: map) {
new (&items[i]) T(it.first);
weights[i++] = it.second;
}
os.write((char*)weights, sizeof(W) * num_items);
- AllocW().deallocate(weights, num_items);
+ aw.deallocate(weights, num_items);
S().serialize(os, items, num_items);
for (unsigned i = 0; i < num_items; i++) items[i].~T();
- A().deallocate(items, num_items);
+ alloc.deallocate(items, num_items);
}
}
@@ -207,9 +213,9 @@
}
template<typename T, typename W, typename H, typename E, typename S, typename A>
-vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
+auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
const size_t size = header_size_bytes + get_serialized_size_bytes();
- vector_u8<A> bytes(size);
+ vector_bytes bytes(size, 0, map.get_allocator());
uint8_t* ptr = bytes.data() + header_size_bytes;
uint8_t* end_ptr = ptr + size;
@@ -238,20 +244,22 @@
ptr += copy_to_mem(&offset, ptr, sizeof(offset));
// copy active items and their weights to use batch serialization
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
- W* weights = AllocW().allocate(num_items);
- T* items = A().allocate(num_items);
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+ AllocW aw(map.get_allocator());
+ W* weights = aw.allocate(num_items);
+ A alloc(map.get_allocator());
+ T* items = alloc.allocate(num_items);
uint32_t i = 0;
for (auto &it: map) {
new (&items[i]) T(it.first);
weights[i++] = it.second;
}
ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
- AllocW().deallocate(weights, num_items);
+ aw.deallocate(weights, num_items);
const size_t bytes_remaining = end_ptr - ptr;
ptr += S().serialize(ptr, bytes_remaining, items, num_items);
for (unsigned i = 0; i < num_items; i++) items[i].~T();
- A().deallocate(items, num_items);
+ alloc.deallocate(items, num_items);
}
return bytes;
}
@@ -259,23 +267,25 @@
template<typename T, typename W, typename H, typename E, typename S, typename A>
class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
public:
- items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
+ items_deleter(uint32_t num, bool destroy, const A& allocator):
+ allocator(allocator), num(num), destroy(destroy) {}
void set_destroy(bool destroy) { this->destroy = destroy; }
- void operator() (T* ptr) const {
+ void operator() (T* ptr) {
if (ptr != nullptr) {
if (destroy) {
for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
}
- A().deallocate(ptr, num);
+ allocator.deallocate(ptr, num);
}
}
private:
+ A allocator;
uint32_t num;
bool destroy;
};
template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
+frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
uint8_t preamble_longs;
is.read((char*)&preamble_longs, sizeof(preamble_longs));
uint8_t serial_version;
@@ -298,7 +308,7 @@
check_family_id(family_id);
check_size(lg_cur_size, lg_max_size);
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
if (!is_empty) {
uint32_t num_items;
is.read((char*)&num_items, sizeof(num_items));
@@ -310,10 +320,11 @@
is.read((char*)&offset, sizeof(offset));
// batch deserialization with intermediate array of items and weights
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
- std::vector<W, AllocW> weights(num_items);
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
is.read((char*)weights.data(), sizeof(W) * num_items);
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
+ A alloc(allocator);
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
S().deserialize(is, items.get(), num_items);
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
for (uint32_t i = 0; i < num_items; i++) {
@@ -328,7 +339,7 @@
}
template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
+frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
const char* base = static_cast<const char*>(bytes);
@@ -355,7 +366,7 @@
check_size(lg_cur_size, lg_max_size);
ensure_minimum_memory(size, 1 << preamble_longs);
- frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
if (!is_empty) {
uint32_t num_items;
ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
@@ -368,10 +379,11 @@
ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
// batch deserialization with intermediate array of items and weights
- typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
- std::vector<W, AllocW> weights(num_items);
+ using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+ std::vector<W, AllocW> weights(num_items, 0, allocator);
ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
- std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
+ A alloc(allocator);
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
const size_t bytes_remaining = size - (ptr - base);
ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
diff --git a/fi/include/reverse_purge_hash_map.hpp b/fi/include/reverse_purge_hash_map.hpp
index e3e7bfb..fc4cd83 100644
--- a/fi/include/reverse_purge_hash_map.hpp
+++ b/fi/include/reverse_purge_hash_map.hpp
@@ -39,33 +39,39 @@
using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
- reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
+ reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator);
reverse_purge_hash_map(const reverse_purge_hash_map& other);
reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
~reverse_purge_hash_map();
reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
- V adjust_or_insert(const K& key, V value);
- V adjust_or_insert(K&& key, V value);
+
+ template<typename FwdK>
+ V adjust_or_insert(FwdK&& key, V value);
+
V get(const K& key) const;
uint8_t get_lg_cur_size() const;
uint8_t get_lg_max_size() const;
uint32_t get_capacity() const;
uint32_t get_num_active() const;
+ const A& get_allocator() const;
+
class iterator;
iterator begin() const;
iterator end() const;
+
private:
static constexpr double LOAD_FACTOR = 0.75;
static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
- uint8_t lg_cur_size;
- uint8_t lg_max_size;
- uint32_t num_active;
- K* keys;
- V* values;
- uint16_t* states;
+ A allocator_;
+ uint8_t lg_cur_size_;
+ uint8_t lg_max_size_;
+ uint32_t num_active_;
+ K* keys_;
+ V* values_;
+ uint16_t* states_;
inline bool is_active(uint32_t probe) const;
void subtract_and_keep_positive_only(V amount);
@@ -83,8 +89,8 @@
friend class reverse_purge_hash_map<K, V, H, E, A>;
iterator& operator++() {
++count;
- if (count < map->num_active) {
- const uint32_t mask = (1 << map->lg_cur_size) - 1;
+ if (count < map->num_active_) {
+ const uint32_t mask = (1 << map->lg_cur_size_) - 1;
do {
index = (index + stride) & mask;
} while (!map->is_active(index));
@@ -95,7 +101,7 @@
bool operator==(const iterator& rhs) const { return count == rhs.count; }
bool operator!=(const iterator& rhs) const { return count != rhs.count; }
const std::pair<K&, V> operator*() const {
- return std::pair<K&, V>(map->keys[index], map->values[index]);
+ return std::pair<K&, V>(map->keys_[index], map->values_[index]);
}
private:
static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
@@ -104,7 +110,7 @@
uint32_t count;
uint32_t stride;
iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
- map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
+ map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
};
} /* namespace datasketches */
diff --git a/fi/include/reverse_purge_hash_map_impl.hpp b/fi/include/reverse_purge_hash_map_impl.hpp
index 732d923..beccea4 100644
--- a/fi/include/reverse_purge_hash_map_impl.hpp
+++ b/fi/include/reverse_purge_hash_map_impl.hpp
@@ -34,113 +34,121 @@
constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
template<typename K, typename V, typename H, typename E, typename A>
-reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
-lg_cur_size(lg_cur_size),
-lg_max_size(lg_max_size),
-num_active(0),
-keys(A().allocate(1 << lg_cur_size)),
-values(AllocV().allocate(1 << lg_cur_size)),
-states(AllocU16().allocate(1 << lg_cur_size))
+reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator):
+allocator_(allocator),
+lg_cur_size_(lg_cur_size),
+lg_max_size_(lg_max_size),
+num_active_(0),
+keys_(allocator_.allocate(1 << lg_cur_size)),
+values_(nullptr),
+states_(nullptr)
{
- std::fill(states, &states[1 << lg_cur_size], 0);
+ AllocV av(allocator_);
+ values_ = av.allocate(1 << lg_cur_size);
+ AllocU16 au16(allocator_);
+ states_ = au16.allocate(1 << lg_cur_size);
+ std::fill(states_, states_ + (1 << lg_cur_size), 0);
}
template<typename K, typename V, typename H, typename E, typename A>
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
-lg_cur_size(other.lg_cur_size),
-lg_max_size(other.lg_max_size),
-num_active(other.num_active),
-keys(A().allocate(1 << lg_cur_size)),
-values(AllocV().allocate(1 << lg_cur_size)),
-states(AllocU16().allocate(1 << lg_cur_size))
+allocator_(other.allocator_),
+lg_cur_size_(other.lg_cur_size_),
+lg_max_size_(other.lg_max_size_),
+num_active_(other.num_active_),
+keys_(allocator_.allocate(1 << lg_cur_size_)),
+values_(nullptr),
+states_(nullptr)
{
- const uint32_t size = 1 << lg_cur_size;
- if (num_active > 0) {
- auto num = num_active;
+ AllocV av(allocator_);
+ values_ = av.allocate(1 << lg_cur_size_);
+ AllocU16 au16(allocator_);
+ states_ = au16.allocate(1 << lg_cur_size_);
+ const uint32_t size = 1 << lg_cur_size_;
+ if (num_active_ > 0) {
+ auto num = num_active_;
for (uint32_t i = 0; i < size; i++) {
- if (other.states[i] > 0) {
- new (&keys[i]) K(other.keys[i]);
- values[i] = other.values[i];
+ if (other.states_[i] > 0) {
+ new (&keys_[i]) K(other.keys_[i]);
+ values_[i] = other.values_[i];
}
if (--num == 0) break;
}
}
- std::copy(&other.states[0], &other.states[size], states);
+ std::copy(other.states_, other.states_ + size, states_);
}
template<typename K, typename V, typename H, typename E, typename A>
reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
-lg_cur_size(other.lg_cur_size),
-lg_max_size(other.lg_max_size),
-num_active(other.num_active),
-keys(nullptr),
-values(nullptr),
-states(nullptr)
+allocator_(std::move(other.allocator_)),
+lg_cur_size_(other.lg_cur_size_),
+lg_max_size_(other.lg_max_size_),
+num_active_(other.num_active_),
+keys_(nullptr),
+values_(nullptr),
+states_(nullptr)
{
- std::swap(keys, other.keys);
- std::swap(values, other.values);
- std::swap(states, other.states);
- other.num_active = 0;
+ std::swap(keys_, other.keys_);
+ std::swap(values_, other.values_);
+ std::swap(states_, other.states_);
+ other.num_active_ = 0;
}
template<typename K, typename V, typename H, typename E, typename A>
reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
- const uint32_t size = 1 << lg_cur_size;
- if (num_active > 0) {
+ const uint32_t size = 1 << lg_cur_size_;
+ if (num_active_ > 0) {
for (uint32_t i = 0; i < size; i++) {
if (is_active(i)) {
- keys[i].~K();
- if (--num_active == 0) break;
+ keys_[i].~K();
+ if (--num_active_ == 0) break;
}
}
}
- if (keys != nullptr)
- A().deallocate(keys, size);
- if (values != nullptr)
- AllocV().deallocate(values, size);
- if (states != nullptr)
- AllocU16().deallocate(states, size);
+ if (keys_ != nullptr) {
+ allocator_.deallocate(keys_, size);
+ }
+ if (values_ != nullptr) {
+ AllocV av(allocator_);
+ av.deallocate(values_, size);
+ }
+ if (states_ != nullptr) {
+ AllocU16 au16(allocator_);
+ au16.deallocate(states_, size);
+ }
}
template<typename K, typename V, typename H, typename E, typename A>
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
- std::swap(lg_cur_size, other.lg_cur_size);
- std::swap(lg_max_size, other.lg_max_size);
- std::swap(num_active, other.num_active);
- std::swap(keys, other.keys);
- std::swap(values, other.values);
- std::swap(states, other.states);
+ std::swap(allocator_, other.allocator_);
+ std::swap(lg_cur_size_, other.lg_cur_size_);
+ std::swap(lg_max_size_, other.lg_max_size_);
+ std::swap(num_active_, other.num_active_);
+ std::swap(keys_, other.keys_);
+ std::swap(values_, other.values_);
+ std::swap(states_, other.states_);
return *this;
}
template<typename K, typename V, typename H, typename E, typename A>
reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
- std::swap(lg_cur_size, other.lg_cur_size);
- std::swap(lg_max_size, other.lg_max_size);
- std::swap(num_active, other.num_active);
- std::swap(keys, other.keys);
- std::swap(values, other.values);
- std::swap(states, other.states);
+ std::swap(allocator_, other.allocator_);
+ std::swap(lg_cur_size_, other.lg_cur_size_);
+ std::swap(lg_max_size_, other.lg_max_size_);
+ std::swap(num_active_, other.num_active_);
+ std::swap(keys_, other.keys_);
+ std::swap(values_, other.values_);
+ std::swap(states_, other.states_);
return *this;
}
template<typename K, typename V, typename H, typename E, typename A>
-V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) {
- const uint32_t num_active_before = num_active;
+template<typename FwdK>
+V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
+ const uint32_t num_active_before = num_active_;
const uint32_t index = internal_adjust_or_insert(key, value);
- if (num_active > num_active_before) {
- new (&keys[index]) K(key);
- return resize_or_purge_if_needed();
- }
- return 0;
-}
-
-template<typename K, typename V, typename H, typename E, typename A>
-V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
- const uint32_t num_active_before = num_active;
- const uint32_t index = internal_adjust_or_insert(key, value);
- if (num_active > num_active_before) {
- new (&keys[index]) K(std::move(key));
+ if (num_active_ > num_active_before) {
+ new (&keys_[index]) K(std::forward<FwdK>(key));
return resize_or_purge_if_needed();
}
return 0;
@@ -148,10 +156,10 @@
template<typename K, typename V, typename H, typename E, typename A>
V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
- const uint32_t mask = (1 << lg_cur_size) - 1;
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
uint32_t probe = fmix64(H()(key)) & mask;
while (is_active(probe)) {
- if (E()(keys[probe], key)) return values[probe];
+ if (E()(keys_[probe], key)) return values_[probe];
probe = (probe + 1) & mask;
}
return 0;
@@ -159,27 +167,32 @@
template<typename K, typename V, typename H, typename E, typename A>
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
- return lg_cur_size;
+ return lg_cur_size_;
}
template<typename K, typename V, typename H, typename E, typename A>
uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
- return lg_max_size;
+ return lg_max_size_;
}
template<typename K, typename V, typename H, typename E, typename A>
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
- return (1 << lg_cur_size) * LOAD_FACTOR;
+ return (1 << lg_cur_size_) * LOAD_FACTOR;
}
template<typename K, typename V, typename H, typename E, typename A>
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
- return num_active;
+ return num_active_;
+}
+
+template<typename K, typename V, typename H, typename E, typename A>
+const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const {
+ return allocator_;
}
template<typename K, typename V, typename H, typename E, typename A>
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
- const uint32_t size = 1 << lg_cur_size;
+ const uint32_t size = 1 << lg_cur_size_;
uint32_t i = 0;
while (i < size && !is_active(i)) i++;
return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
@@ -187,40 +200,40 @@
template<typename K, typename V, typename H, typename E, typename A>
typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
- return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active);
+ return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_);
}
template<typename K, typename V, typename H, typename E, typename A>
bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
- return states[index] > 0;
+ return states_[index] > 0;
}
template<typename K, typename V, typename H, typename E, typename A>
void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
// starting from the back, find the first empty cell,
// which establishes the high end of a cluster.
- uint32_t first_probe = (1 << lg_cur_size) - 1;
+ uint32_t first_probe = (1 << lg_cur_size_) - 1;
while (is_active(first_probe)) first_probe--;
// when we find the next non-empty cell, we know we are at the high end of a cluster
// work towards the front, delete any non-positive entries.
for (uint32_t probe = first_probe; probe-- > 0;) {
if (is_active(probe)) {
- if (values[probe] <= amount) {
+ if (values_[probe] <= amount) {
hash_delete(probe); // does the work of deletion and moving higher items towards the front
- num_active--;
+ num_active_--;
} else {
- values[probe] -= amount;
+ values_[probe] -= amount;
}
}
}
// now work on the first cluster that was skipped
- for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) {
+ for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
if (is_active(probe)) {
- if (values[probe] <= amount) {
+ if (values_[probe] <= amount) {
hash_delete(probe);
- num_active--;
+ num_active_--;
} else {
- values[probe] -= amount;
+ values_[probe] -= amount;
}
}
}
@@ -231,20 +244,20 @@
// Looks ahead in the table to search for another
// item to move to this location
// if none are found, the status is changed
- states[delete_index] = 0; // mark as empty
- keys[delete_index].~K();
+ states_[delete_index] = 0; // mark as empty
+ keys_[delete_index].~K();
uint32_t drift = 1;
- const uint32_t mask = (1 << lg_cur_size) - 1;
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
// advance until we find a free location replacing locations as needed
while (is_active(probe)) {
- if (states[probe] > drift) {
+ if (states_[probe] > drift) {
// move current element
- new (&keys[delete_index]) K(std::move(keys[probe]));
- values[delete_index] = values[probe];
- states[delete_index] = states[probe] - drift;
- states[probe] = 0; // mark as empty
- keys[probe].~K();
+ new (&keys_[delete_index]) K(std::move(keys_[probe]));
+ values_[delete_index] = values_[probe];
+ states_[delete_index] = states_[probe] - drift;
+ states_[probe] = 0; // mark as empty
+ keys_[probe].~K();
drift = 0;
delete_index = probe;
}
@@ -257,13 +270,13 @@
template<typename K, typename V, typename H, typename E, typename A>
uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
- const uint32_t mask = (1 << lg_cur_size) - 1;
+ const uint32_t mask = (1 << lg_cur_size_) - 1;
uint32_t index = fmix64(H()(key)) & mask;
uint16_t drift = 1;
while (is_active(index)) {
- if (E()(keys[index], key)) {
+ if (E()(keys_[index], key)) {
// adjusting the value of an existing key
- values[index] += value;
+ values_[index] += value;
return index;
}
index = (index + 1) & mask;
@@ -272,23 +285,23 @@
if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
}
// adding the key and value to the table
- if (num_active > get_capacity()) {
- throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity()));
+ if (num_active_ > get_capacity()) {
+ throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity()));
}
- values[index] = value;
- states[index] = drift;
- num_active++;
+ values_[index] = value;
+ states_[index] = drift;
+ num_active_++;
return index;
}
template<typename K, typename V, typename H, typename E, typename A>
V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
- if (num_active > get_capacity()) {
- if (lg_cur_size < lg_max_size) { // can grow
- resize(lg_cur_size + 1);
+ if (num_active_ > get_capacity()) {
+ if (lg_cur_size_ < lg_max_size_) { // can grow
+ resize(lg_cur_size_ + 1);
} else { // at target size, must purge
const V offset = purge();
- if (num_active > get_capacity()) {
+ if (num_active_ > get_capacity()) {
throw std::logic_error("purge did not reduce number of active items");
}
return offset;
@@ -299,43 +312,46 @@
template<typename K, typename V, typename H, typename E, typename A>
void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
- const uint32_t old_size = 1 << lg_cur_size;
- K* old_keys = keys;
- V* old_values = values;
- uint16_t* old_states = states;
+ const uint32_t old_size = 1 << lg_cur_size_;
+ K* old_keys = keys_;
+ V* old_values = values_;
+ uint16_t* old_states = states_;
const uint32_t new_size = 1 << lg_new_size;
- keys = A().allocate(new_size);
- values = AllocV().allocate(new_size);
- states = AllocU16().allocate(new_size);
- std::fill(states, &states[new_size], 0);
- num_active = 0;
- lg_cur_size = lg_new_size;
+ keys_ = allocator_.allocate(new_size);
+ AllocV av(allocator_);
+ values_ = av.allocate(new_size);
+ AllocU16 au16(allocator_);
+ states_ = au16.allocate(new_size);
+ std::fill(states_, states_ + new_size, 0);
+ num_active_ = 0;
+ lg_cur_size_ = lg_new_size;
for (uint32_t i = 0; i < old_size; i++) {
if (old_states[i] > 0) {
adjust_or_insert(std::move(old_keys[i]), old_values[i]);
old_keys[i].~K();
}
}
- A().deallocate(old_keys, old_size);
- AllocV().deallocate(old_values, old_size);
- AllocU16().deallocate(old_states, old_size);
+ allocator_.deallocate(old_keys, old_size);
+ av.deallocate(old_values, old_size);
+ au16.deallocate(old_states, old_size);
}
template<typename K, typename V, typename H, typename E, typename A>
V reverse_purge_hash_map<K, V, H, E, A>::purge() {
- const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active);
+ const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
uint32_t num_samples = 0;
uint32_t i = 0;
- V* samples = AllocV().allocate(limit);
+ AllocV av(allocator_);
+ V* samples = av.allocate(limit);
while (num_samples < limit) {
if (is_active(i)) {
- samples[num_samples++] = values[i];
+ samples[num_samples++] = values_[i];
}
i++;
}
- std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
+ std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
const V median = samples[num_samples / 2];
- AllocV().deallocate(samples, limit);
+ av.deallocate(samples, limit);
subtract_and_keep_positive_only(median);
return median;
}
diff --git a/fi/test/reverse_purge_hash_map_test.cpp b/fi/test/reverse_purge_hash_map_test.cpp
index e372171..a74345c 100644
--- a/fi/test/reverse_purge_hash_map_test.cpp
+++ b/fi/test/reverse_purge_hash_map_test.cpp
@@ -24,20 +24,20 @@
namespace datasketches {
TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") {
- reverse_purge_hash_map<int> map(3, 3);
+ reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
REQUIRE(map.get_num_active() == 0);
REQUIRE(map.get_lg_cur_size() == 3); // static_cast<uint8_t>(3)
}
TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
- reverse_purge_hash_map<int> map(3, 3);
+ reverse_purge_hash_map<int> map(3, 3, std::allocator<int>());
map.adjust_or_insert(1, 1);
REQUIRE(map.get_num_active() == 1);
REQUIRE(map.get(1) == 1);
}
TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
- reverse_purge_hash_map<int> map(3, 4);
+ reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
int sum = 0;
for (auto &it: map) sum += it.second;