support passing an allocator instance
diff --git a/fi/include/frequent_items_sketch.hpp b/fi/include/frequent_items_sketch.hpp
index b2a891f..6efe2b9 100644
--- a/fi/include/frequent_items_sketch.hpp
+++ b/fi/include/frequent_items_sketch.hpp
@@ -40,15 +40,20 @@
 
 enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES };
 
-// for serialization as raw bytes
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-
 // type W for weight must be an arithmetic type (integral or floating point)
-template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename S = serde<T>, typename A = std::allocator<T>>
+template<
+  typename T,
+  typename W = uint64_t,
+  typename H = std::hash<T>,
+  typename E = std::equal_to<T>,
+  typename S = serde<T>,
+  typename A = std::allocator<T>
+>
 class frequent_items_sketch {
 public:
 
+  static const uint8_t LG_MIN_MAP_SIZE = 3;
+
   /**
    * Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
    *
@@ -59,7 +64,7 @@
    * @param lg_start_map_size Log2 of the starting physical size of the internal hash
    * map managed by this sketch.
    */
-  explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE);
+  explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
 
   /**
    * Update this sketch with an item and a positive weight (frequency count).
@@ -232,7 +237,8 @@
 
   // This is a convenience alias for users
   // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+
 
   /**
    * This method serializes the sketch as a vector of bytes.
@@ -249,7 +255,7 @@
    * @param is input stream
    * @return an instance of the sketch
    */
-  static frequent_items_sketch deserialize(std::istream& is);
+  static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -257,7 +263,7 @@
    * @param size the size of the array
    * @return an instance of the sketch
    */
-  static frequent_items_sketch deserialize(const void* bytes, size_t size);
+  static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
 
   /**
    * Returns a human readable summary of this sketch
@@ -266,7 +272,6 @@
   string<A> to_string(bool print_items = false) const;
 
 private:
-  static const uint8_t LG_MIN_MAP_SIZE = 3;
   static const uint8_t SERIAL_VERSION = 1;
   static const uint8_t FAMILY_ID = 10;
   static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
diff --git a/fi/include/frequent_items_sketch_impl.hpp b/fi/include/frequent_items_sketch_impl.hpp
index bb05aa9..df4352e 100644
--- a/fi/include/frequent_items_sketch_impl.hpp
+++ b/fi/include/frequent_items_sketch_impl.hpp
@@ -33,10 +33,14 @@
 const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
 
 template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
+frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator):
 total_weight(0),
 offset(0),
-map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
+map(
+  std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
+  std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE),
+  allocator
+)
 {
   if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
 }
@@ -142,7 +146,7 @@
 template<typename T, typename W, typename H, typename E, typename S, typename A>
 typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
 frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
-  vector_row items;
+  vector_row items(map.get_allocator());
   for (auto &it: map) {
     const W lb = it.second;
     const W ub = it.second + offset;
@@ -182,19 +186,21 @@
     os.write((char*)&offset, sizeof(offset));
 
     // copy active items and their weights to use batch serialization
-    typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
-    W* weights = AllocW().allocate(num_items);
-    T* items = A().allocate(num_items);
+    using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+    AllocW aw(map.get_allocator());
+    W* weights = aw.allocate(num_items);
+    A alloc(map.get_allocator());
+    T* items = alloc.allocate(num_items);
     uint32_t i = 0;
     for (auto &it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
     os.write((char*)weights, sizeof(W) * num_items);
-    AllocW().deallocate(weights, num_items);
+    aw.deallocate(weights, num_items);
     S().serialize(os, items, num_items);
     for (unsigned i = 0; i < num_items; i++) items[i].~T();
-    A().deallocate(items, num_items);
+    alloc.deallocate(items, num_items);
   }
 }
 
@@ -207,9 +213,9 @@
 }
 
 template<typename T, typename W, typename H, typename E, typename S, typename A>
-vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
+auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
   const size_t size = header_size_bytes + get_serialized_size_bytes();
-  vector_u8<A> bytes(size);
+  vector_bytes bytes(size, 0, map.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   uint8_t* end_ptr = ptr + size;
 
@@ -238,20 +244,22 @@
     ptr += copy_to_mem(&offset, ptr, sizeof(offset));
 
     // copy active items and their weights to use batch serialization
-    typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
-    W* weights = AllocW().allocate(num_items);
-    T* items = A().allocate(num_items);
+    using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+    AllocW aw(map.get_allocator());
+    W* weights = aw.allocate(num_items);
+    A alloc(map.get_allocator());
+    T* items = alloc.allocate(num_items);
     uint32_t i = 0;
     for (auto &it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
     ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
-    AllocW().deallocate(weights, num_items);
+    aw.deallocate(weights, num_items);
     const size_t bytes_remaining = end_ptr - ptr;
     ptr += S().serialize(ptr, bytes_remaining, items, num_items);
     for (unsigned i = 0; i < num_items; i++) items[i].~T();
-    A().deallocate(items, num_items);
+    alloc.deallocate(items, num_items);
   }
   return bytes;
 }
@@ -259,23 +267,25 @@
 template<typename T, typename W, typename H, typename E, typename S, typename A>
 class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
 public:
-  items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
+  items_deleter(uint32_t num, bool destroy, const A& allocator):
+    allocator(allocator), num(num), destroy(destroy) {}
   void set_destroy(bool destroy) { this->destroy = destroy; }
-  void operator() (T* ptr) const {
+  void operator() (T* ptr) {
     if (ptr != nullptr) {
       if (destroy) {
         for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
       }
-      A().deallocate(ptr, num);
+      allocator.deallocate(ptr, num);
     }
   }
 private:
+  A allocator;
   uint32_t num;
   bool destroy;
 };
 
 template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
+frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
   uint8_t preamble_longs;
   is.read((char*)&preamble_longs, sizeof(preamble_longs));
   uint8_t serial_version;
@@ -298,7 +308,7 @@
   check_family_id(family_id);
   check_size(lg_cur_size, lg_max_size);
 
-  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
+  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
   if (!is_empty) {
     uint32_t num_items;
     is.read((char*)&num_items, sizeof(num_items));
@@ -310,10 +320,11 @@
     is.read((char*)&offset, sizeof(offset));
 
     // batch deserialization with intermediate array of items and weights
-    typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
-    std::vector<W, AllocW> weights(num_items);
+    using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+    std::vector<W, AllocW> weights(num_items, 0, allocator);
     is.read((char*)weights.data(), sizeof(W) * num_items);
-    std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
+    A alloc(allocator);
+    std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
     S().deserialize(is, items.get(), num_items);
     items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
     for (uint32_t i = 0; i < num_items; i++) {
@@ -328,7 +339,7 @@
 }
 
 template<typename T, typename W, typename H, typename E, typename S, typename A>
-frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
+frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
@@ -355,7 +366,7 @@
   check_size(lg_cur_size, lg_max_size);
   ensure_minimum_memory(size, 1 << preamble_longs);
 
-  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
+  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
   if (!is_empty) {
     uint32_t num_items;
     ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
@@ -368,10 +379,11 @@
 
     ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
     // batch deserialization with intermediate array of items and weights
-    typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
-    std::vector<W, AllocW> weights(num_items);
+    using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
+    std::vector<W, AllocW> weights(num_items, 0, allocator);
     ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
-    std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
+    A alloc(allocator);
+    std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
     const size_t bytes_remaining = size - (ptr - base);
     ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
     items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
diff --git a/fi/include/reverse_purge_hash_map.hpp b/fi/include/reverse_purge_hash_map.hpp
index e3e7bfb..fc4cd83 100644
--- a/fi/include/reverse_purge_hash_map.hpp
+++ b/fi/include/reverse_purge_hash_map.hpp
@@ -39,33 +39,39 @@
   using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
   using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
 
-  reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size);
+  reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator);
   reverse_purge_hash_map(const reverse_purge_hash_map& other);
   reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
   ~reverse_purge_hash_map();
   reverse_purge_hash_map& operator=(reverse_purge_hash_map other);
   reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
-  V adjust_or_insert(const K& key, V value);
-  V adjust_or_insert(K&& key, V value);
+
+  template<typename FwdK>
+  V adjust_or_insert(FwdK&& key, V value);
+
   V get(const K& key) const;
   uint8_t get_lg_cur_size() const;
   uint8_t get_lg_max_size() const;
   uint32_t get_capacity() const;
   uint32_t get_num_active() const;
+  const A& get_allocator() const;
+
   class iterator;
   iterator begin() const;
   iterator end() const;
+
 private:
   static constexpr double LOAD_FACTOR = 0.75;
   static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
   static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
 
-  uint8_t lg_cur_size;
-  uint8_t lg_max_size;
-  uint32_t num_active;
-  K* keys;
-  V* values;
-  uint16_t* states;
+  A allocator_;
+  uint8_t lg_cur_size_;
+  uint8_t lg_max_size_;
+  uint32_t num_active_;
+  K* keys_;
+  V* values_;
+  uint16_t* states_;
 
   inline bool is_active(uint32_t probe) const;
   void subtract_and_keep_positive_only(V amount);
@@ -83,8 +89,8 @@
   friend class reverse_purge_hash_map<K, V, H, E, A>;
   iterator& operator++() {
     ++count;
-    if (count < map->num_active) {
-      const uint32_t mask = (1 << map->lg_cur_size) - 1;
+    if (count < map->num_active_) {
+      const uint32_t mask = (1 << map->lg_cur_size_) - 1;
       do {
         index = (index + stride) & mask;
       } while (!map->is_active(index));
@@ -95,7 +101,7 @@
   bool operator==(const iterator& rhs) const { return count == rhs.count; }
   bool operator!=(const iterator& rhs) const { return count != rhs.count; }
   const std::pair<K&, V> operator*() const {
-    return std::pair<K&, V>(map->keys[index], map->values[index]);
+    return std::pair<K&, V>(map->keys_[index], map->values_[index]);
   }
 private:
   static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
@@ -104,7 +110,7 @@
   uint32_t count;
   uint32_t stride;
   iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
-    map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
+    map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
 };
 
 } /* namespace datasketches */
diff --git a/fi/include/reverse_purge_hash_map_impl.hpp b/fi/include/reverse_purge_hash_map_impl.hpp
index 732d923..beccea4 100644
--- a/fi/include/reverse_purge_hash_map_impl.hpp
+++ b/fi/include/reverse_purge_hash_map_impl.hpp
@@ -34,113 +34,121 @@
 constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
 
 template<typename K, typename V, typename H, typename E, typename A>
-reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size):
-lg_cur_size(lg_cur_size),
-lg_max_size(lg_max_size),
-num_active(0),
-keys(A().allocate(1 << lg_cur_size)),
-values(AllocV().allocate(1 << lg_cur_size)),
-states(AllocU16().allocate(1 << lg_cur_size))
+reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator):
+allocator_(allocator),
+lg_cur_size_(lg_cur_size),
+lg_max_size_(lg_max_size),
+num_active_(0),
+keys_(allocator_.allocate(1 << lg_cur_size)),
+values_(nullptr),
+states_(nullptr)
 {
-  std::fill(states, &states[1 << lg_cur_size], 0);
+  AllocV av(allocator_);
+  values_ = av.allocate(1 << lg_cur_size);
+  AllocU16 au16(allocator_);
+  states_ = au16.allocate(1 << lg_cur_size);
+  std::fill(states_, states_ + (1 << lg_cur_size), 0);
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other):
-lg_cur_size(other.lg_cur_size),
-lg_max_size(other.lg_max_size),
-num_active(other.num_active),
-keys(A().allocate(1 << lg_cur_size)),
-values(AllocV().allocate(1 << lg_cur_size)),
-states(AllocU16().allocate(1 << lg_cur_size))
+allocator_(other.allocator_),
+lg_cur_size_(other.lg_cur_size_),
+lg_max_size_(other.lg_max_size_),
+num_active_(other.num_active_),
+keys_(allocator_.allocate(1 << lg_cur_size_)),
+values_(nullptr),
+states_(nullptr)
 {
-  const uint32_t size = 1 << lg_cur_size;
-  if (num_active > 0) {
-    auto num = num_active;
+  AllocV av(allocator_);
+  values_ = av.allocate(1 << lg_cur_size_);
+  AllocU16 au16(allocator_);
+  states_ = au16.allocate(1 << lg_cur_size_);
+  const uint32_t size = 1 << lg_cur_size_;
+  if (num_active_ > 0) {
+    auto num = num_active_;
     for (uint32_t i = 0; i < size; i++) {
-      if (other.states[i] > 0) {
-        new (&keys[i]) K(other.keys[i]);
-        values[i] = other.values[i];
+      if (other.states_[i] > 0) {
+        new (&keys_[i]) K(other.keys_[i]);
+        values_[i] = other.values_[i];
       }
       if (--num == 0) break;
     }
   }
-  std::copy(&other.states[0], &other.states[size], states);
+  std::copy(other.states_, other.states_ + size, states_);
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
-lg_cur_size(other.lg_cur_size),
-lg_max_size(other.lg_max_size),
-num_active(other.num_active),
-keys(nullptr),
-values(nullptr),
-states(nullptr)
+allocator_(std::move(other.allocator_)),
+lg_cur_size_(other.lg_cur_size_),
+lg_max_size_(other.lg_max_size_),
+num_active_(other.num_active_),
+keys_(nullptr),
+values_(nullptr),
+states_(nullptr)
 {
-  std::swap(keys, other.keys);
-  std::swap(values, other.values);
-  std::swap(states, other.states);
-  other.num_active = 0;
+  std::swap(keys_, other.keys_);
+  std::swap(values_, other.values_);
+  std::swap(states_, other.states_);
+  other.num_active_ = 0;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
-  const uint32_t size = 1 << lg_cur_size;
-  if (num_active > 0) {
+  const uint32_t size = 1 << lg_cur_size_;
+  if (num_active_ > 0) {
     for (uint32_t i = 0; i < size; i++) {
       if (is_active(i)) {
-        keys[i].~K();
-        if (--num_active == 0) break;
+        keys_[i].~K();
+        if (--num_active_ == 0) break;
       }
     }
   }
-  if (keys != nullptr)
-    A().deallocate(keys, size);
-  if (values != nullptr)
-    AllocV().deallocate(values, size);
-  if (states != nullptr)
-    AllocU16().deallocate(states, size);
+  if (keys_ != nullptr) {
+    allocator_.deallocate(keys_, size);
+  }
+  if (values_ != nullptr) {
+    AllocV av(allocator_);
+    av.deallocate(values_, size);
+  }
+  if (states_ != nullptr) {
+    AllocU16 au16(allocator_);
+    au16.deallocate(states_, size);
+  }
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) {
-  std::swap(lg_cur_size, other.lg_cur_size);
-  std::swap(lg_max_size, other.lg_max_size);
-  std::swap(num_active, other.num_active);
-  std::swap(keys, other.keys);
-  std::swap(values, other.values);
-  std::swap(states, other.states);
+  std::swap(allocator_, other.allocator_);
+  std::swap(lg_cur_size_, other.lg_cur_size_);
+  std::swap(lg_max_size_, other.lg_max_size_);
+  std::swap(num_active_, other.num_active_);
+  std::swap(keys_, other.keys_);
+  std::swap(values_, other.values_);
+  std::swap(states_, other.states_);
   return *this;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
-  std::swap(lg_cur_size, other.lg_cur_size);
-  std::swap(lg_max_size, other.lg_max_size);
-  std::swap(num_active, other.num_active);
-  std::swap(keys, other.keys);
-  std::swap(values, other.values);
-  std::swap(states, other.states);
+  std::swap(allocator_, other.allocator_);
+  std::swap(lg_cur_size_, other.lg_cur_size_);
+  std::swap(lg_max_size_, other.lg_max_size_);
+  std::swap(num_active_, other.num_active_);
+  std::swap(keys_, other.keys_);
+  std::swap(values_, other.values_);
+  std::swap(states_, other.states_);
   return *this;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
-V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) {
-  const uint32_t num_active_before = num_active;
+template<typename FwdK>
+V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
+  const uint32_t num_active_before = num_active_;
   const uint32_t index = internal_adjust_or_insert(key, value);
-  if (num_active > num_active_before) {
-    new (&keys[index]) K(key);
-    return resize_or_purge_if_needed();
-  }
-  return 0;
-}
-
-template<typename K, typename V, typename H, typename E, typename A>
-V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
-  const uint32_t num_active_before = num_active;
-  const uint32_t index = internal_adjust_or_insert(key, value);
-  if (num_active > num_active_before) {
-    new (&keys[index]) K(std::move(key));
+  if (num_active_ > num_active_before) {
+    new (&keys_[index]) K(std::forward<FwdK>(key));
     return resize_or_purge_if_needed();
   }
   return 0;
@@ -148,10 +156,10 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
-  const uint32_t mask = (1 << lg_cur_size) - 1;
+  const uint32_t mask = (1 << lg_cur_size_) - 1;
   uint32_t probe = fmix64(H()(key)) & mask;
   while (is_active(probe)) {
-    if (E()(keys[probe], key)) return values[probe];
+    if (E()(keys_[probe], key)) return values_[probe];
     probe = (probe + 1) & mask;
   }
   return 0;
@@ -159,27 +167,32 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const {
-  return lg_cur_size;
+  return lg_cur_size_;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
-  return lg_max_size;
+  return lg_max_size_;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
-  return (1 << lg_cur_size) * LOAD_FACTOR;
+  return (1 << lg_cur_size_) * LOAD_FACTOR;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const {
-  return num_active;
+  return num_active_;
+}
+
+template<typename K, typename V, typename H, typename E, typename A>
+const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const {
+  return allocator_;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const {
-  const uint32_t size = 1 << lg_cur_size;
+  const uint32_t size = 1 << lg_cur_size_;
   uint32_t i = 0;
   while (i < size && !is_active(i)) i++;
   return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0);
@@ -187,40 +200,40 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const {
-  return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active);
+  return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_);
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const {
-  return states[index] > 0;
+  return states_[index] > 0;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
   // starting from the back, find the first empty cell,
   // which establishes the high end of a cluster.
-  uint32_t first_probe = (1 << lg_cur_size) - 1;
+  uint32_t first_probe = (1 << lg_cur_size_) - 1;
   while (is_active(first_probe)) first_probe--;
   // when we find the next non-empty cell, we know we are at the high end of a cluster
   // work towards the front, delete any non-positive entries.
   for (uint32_t probe = first_probe; probe-- > 0;) {
     if (is_active(probe)) {
-      if (values[probe] <= amount) {
+      if (values_[probe] <= amount) {
         hash_delete(probe); // does the work of deletion and moving higher items towards the front
-        num_active--;
+        num_active_--;
       } else {
-        values[probe] -= amount;
+        values_[probe] -= amount;
       }
     }
   }
   // now work on the first cluster that was skipped
-  for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) {
+  for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
     if (is_active(probe)) {
-      if (values[probe] <= amount) {
+      if (values_[probe] <= amount) {
         hash_delete(probe);
-        num_active--;
+        num_active_--;
       } else {
-        values[probe] -= amount;
+        values_[probe] -= amount;
       }
     }
   }
@@ -231,20 +244,20 @@
   // Looks ahead in the table to search for another
   // item to move to this location
   // if none are found, the status is changed
-  states[delete_index] = 0; // mark as empty
-  keys[delete_index].~K();
+  states_[delete_index] = 0; // mark as empty
+  keys_[delete_index].~K();
   uint32_t drift = 1;
-  const uint32_t mask = (1 << lg_cur_size) - 1;
+  const uint32_t mask = (1 << lg_cur_size_) - 1;
   uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
   // advance until we find a free location replacing locations as needed
   while (is_active(probe)) {
-    if (states[probe] > drift) {
+    if (states_[probe] > drift) {
       // move current element
-      new (&keys[delete_index]) K(std::move(keys[probe]));
-      values[delete_index] = values[probe];
-      states[delete_index] = states[probe] - drift;
-      states[probe] = 0; // mark as empty
-      keys[probe].~K();
+      new (&keys_[delete_index]) K(std::move(keys_[probe]));
+      values_[delete_index] = values_[probe];
+      states_[delete_index] = states_[probe] - drift;
+      states_[probe] = 0; // mark as empty
+      keys_[probe].~K();
       drift = 0;
       delete_index = probe;
     }
@@ -257,13 +270,13 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
-  const uint32_t mask = (1 << lg_cur_size) - 1;
+  const uint32_t mask = (1 << lg_cur_size_) - 1;
   uint32_t index = fmix64(H()(key)) & mask;
   uint16_t drift = 1;
   while (is_active(index)) {
-    if (E()(keys[index], key)) {
+    if (E()(keys_[index], key)) {
       // adjusting the value of an existing key
-      values[index] += value;
+      values_[index] += value;
       return index;
     }
     index = (index + 1) & mask;
@@ -272,23 +285,23 @@
     if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached");
   }
   // adding the key and value to the table
-  if (num_active > get_capacity()) {
-    throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity()));
+  if (num_active_ > get_capacity()) {
+    throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity()));
   }
-  values[index] = value;
-  states[index] = drift;
-  num_active++;
+  values_[index] = value;
+  states_[index] = drift;
+  num_active_++;
   return index;
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
-  if (num_active > get_capacity()) {
-    if (lg_cur_size < lg_max_size) { // can grow
-      resize(lg_cur_size + 1);
+  if (num_active_ > get_capacity()) {
+    if (lg_cur_size_ < lg_max_size_) { // can grow
+      resize(lg_cur_size_ + 1);
     } else { // at target size, must purge
       const V offset = purge();
-      if (num_active > get_capacity()) {
+      if (num_active_ > get_capacity()) {
         throw std::logic_error("purge did not reduce number of active items");
       }
       return offset;
@@ -299,43 +312,46 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
-  const uint32_t old_size = 1 << lg_cur_size;
-  K* old_keys = keys;
-  V* old_values = values;
-  uint16_t* old_states = states;
+  const uint32_t old_size = 1 << lg_cur_size_;
+  K* old_keys = keys_;
+  V* old_values = values_;
+  uint16_t* old_states = states_;
   const uint32_t new_size = 1 << lg_new_size;
-  keys = A().allocate(new_size);
-  values = AllocV().allocate(new_size);
-  states = AllocU16().allocate(new_size);
-  std::fill(states, &states[new_size], 0);
-  num_active = 0;
-  lg_cur_size = lg_new_size;
+  keys_ = allocator_.allocate(new_size);
+  AllocV av(allocator_);
+  values_ = av.allocate(new_size);
+  AllocU16 au16(allocator_);
+  states_ = au16.allocate(new_size);
+  std::fill(states_, states_ + new_size, 0);
+  num_active_ = 0;
+  lg_cur_size_ = lg_new_size;
   for (uint32_t i = 0; i < old_size; i++) {
     if (old_states[i] > 0) {
       adjust_or_insert(std::move(old_keys[i]), old_values[i]);
       old_keys[i].~K();
     }
   }
-  A().deallocate(old_keys, old_size);
-  AllocV().deallocate(old_values, old_size);
-  AllocU16().deallocate(old_states, old_size);
+  allocator_.deallocate(old_keys, old_size);
+  av.deallocate(old_values, old_size);
+  au16.deallocate(old_states, old_size);
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
 V reverse_purge_hash_map<K, V, H, E, A>::purge() {
-  const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active);
+  const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
   uint32_t num_samples = 0;
   uint32_t i = 0;
-  V* samples = AllocV().allocate(limit);
+  AllocV av(allocator_);
+  V* samples = av.allocate(limit);
   while (num_samples < limit) {
     if (is_active(i)) {
-      samples[num_samples++] = values[i];
+      samples[num_samples++] = values_[i];
     }
     i++;
   }
-  std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
+  std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
   const V median = samples[num_samples / 2];
-  AllocV().deallocate(samples, limit);
+  av.deallocate(samples, limit);
   subtract_and_keep_positive_only(median);
   return median;
 }