Merge pull request #183 from apache/cpc_allocator

support allocator instance
diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index 9a766b8..cde110f 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -44,6 +44,8 @@
 
 template<typename A>
 struct compressed_state {
+  explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
+      window_data(allocator), window_data_words(0) {}
   vector_u32<A> table_data;
   uint32_t table_data_words;
   uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
@@ -53,6 +55,7 @@
 
 template<typename A>
 struct uncompressed_state {
+  explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
   u32_table<A> table;
   vector_u8<A> window;
 };
diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index 55fa3b8..73db797 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -129,14 +129,14 @@
   void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
 
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
+  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
   void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
 
   static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
   static size_t safe_length_for_compressed_window_buf(uint64_t k);
   static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
 
-  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space);
+  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
   static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
 };
 
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index b951b05..e3398c8 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -160,7 +160,7 @@
 void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
   switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
     case cpc_sketch_alloc<A>::flavor::EMPTY:
-      target.table = u32_table<A>(2, 6 + lg_k);
+      target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
       break;
     case cpc_sketch_alloc<A>::flavor::SPARSE:
       uncompress_sparse_flavor(source, target, lg_k);
@@ -191,8 +191,9 @@
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
 }
 
 // This is complicated because it effectively builds a Sparse version
@@ -206,7 +207,7 @@
   if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
   const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
 
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size());
+  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size(), source.get_allocator());
 
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -221,7 +222,8 @@
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
 
   // In the hybrid flavor, some of these pairs actually
   // belong in the window, so we will separate them out,
@@ -240,7 +242,7 @@
       pairs[next_true_pair++] = row_col; // move true pair down
     }
   }
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
 }
 
 template<typename A>
@@ -264,21 +266,23 @@
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
     for (size_t i = 0; i < num_pairs; i++) {
       if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
       pairs[i] += 8;
     }
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -314,15 +318,17 @@
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
 
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
     if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
@@ -342,7 +348,7 @@
       pairs[i] = (row << 6) | col;
     }
 
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -364,9 +370,10 @@
 }
 
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
+vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs,
+    uint8_t lg_k, const A& allocator) const {
   const size_t k = 1 << lg_k;
-  vector_u32<A> pairs(num_pairs);
+  vector_u32<A> pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
@@ -388,7 +395,8 @@
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window,
+    uint8_t lg_k, uint32_t num_coupons) const {
   const size_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
   const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -710,9 +718,10 @@
 // The empty space that this leaves at the beginning of the output array
 // will be filled in later by the caller.
 template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) {
+vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
+    uint32_t empty_space, const A& allocator) {
   const size_t output_length = empty_space + num_pairs_to_get;
-  vector_u32<A> pairs(output_length);
+  vector_u32<A> pairs(output_length, 0, allocator);
   size_t pair_index = empty_space;
   for (unsigned row_index = 0; row_index < k; row_index++) {
     uint8_t byte = window[row_index];
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index 9aba16f..a4bf8f6 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -49,7 +49,7 @@
 template<typename A> class cpc_union_alloc;
 
 // alias with default allocator for convenience
-typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
+using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>;
 
 // allocation and initialization of global decompression (decoding) tables
 // call this before anything else if you want to control the initialization time
@@ -67,7 +67,10 @@
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+
+  using allocator_type = A;
+  A get_allocator() const;
 
   /**
    * @return configured lg_k of this sketch
@@ -204,7 +207,7 @@
 
   // This is a convenience alias for users
   // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  using vector_bytes = vector_u8<A>;
 
   /**
    * This method serializes the sketch as a vector of bytes.
@@ -221,7 +224,7 @@
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of a sketch
    */
-  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -230,7 +233,7 @@
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of the sketch
    */
-  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   // for internal use
   uint32_t get_num_coupons() const;
diff --git a/cpc/include/cpc_sketch_impl.hpp b/cpc/include/cpc_sketch_impl.hpp
index e6bc010..a314de8 100644
--- a/cpc/include/cpc_sketch_impl.hpp
+++ b/cpc/include/cpc_sketch_impl.hpp
@@ -41,13 +41,13 @@
 }
 
 template<typename A>
-cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
+cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 was_merged(false),
 num_coupons(0),
-surprising_value_table(2, 6 + lg_k),
-sliding_window(),
+surprising_value_table(2, 6 + lg_k, allocator),
+sliding_window(allocator),
 window_offset(0),
 first_interesting_column(0),
 kxp(1 << lg_k),
@@ -59,6 +59,11 @@
 }
 
 template<typename A>
+A cpc_sketch_alloc<A>::get_allocator() const {
+  return sliding_window.get_allocator();
+}
+
+template<typename A>
 uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
   return lg_k;
 }
@@ -277,7 +282,7 @@
 
   sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
 
-  u32_table<A> new_table(2, 6 + lg_k);
+  u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
 
   const uint32_t* old_slots = surprising_value_table.get_slots();
   const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
@@ -401,7 +406,7 @@
 
 template<typename A>
 void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(A(sliding_window.get_allocator()));
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -454,7 +459,7 @@
 
 template<typename A>
 vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -464,7 +469,7 @@
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
   const uint8_t serial_version = SERIAL_VERSION;
@@ -511,7 +516,7 @@
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -529,7 +534,7 @@
   const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -583,7 +588,7 @@
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   if (!is.good())
     throw std::runtime_error("error reading from std::istream"); 
@@ -592,7 +597,7 @@
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
@@ -614,7 +619,7 @@
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
   ensure_minimum_memory(size, preamble_ints << 2);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -677,7 +682,7 @@
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
       std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
@@ -766,7 +771,7 @@
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row);
+  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
 
   if (num_coupons == 0) return matrix;
 
diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp
index e56aa72..dd59abc 100644
--- a/cpc/include/cpc_union.hpp
+++ b/cpc/include/cpc_union.hpp
@@ -35,7 +35,7 @@
  */
 
 // alias with default allocator for convenience
-typedef cpc_union_alloc<std::allocator<void>> cpc_union;
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 
 template<typename A>
 class cpc_union_alloc {
@@ -45,7 +45,7 @@
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   cpc_union_alloc(const cpc_union_alloc<A>& other);
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
diff --git a/cpc/include/cpc_union_impl.hpp b/cpc/include/cpc_union_impl.hpp
index 65d933c..5acfe5f 100644
--- a/cpc/include/cpc_union_impl.hpp
+++ b/cpc/include/cpc_union_impl.hpp
@@ -25,16 +25,16 @@
 namespace datasketches {
 
 template<typename A>
-cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
+cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 accumulator(nullptr),
-bit_matrix()
+bit_matrix(allocator)
 {
   if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
     throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
   }
-  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
+  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
 
 template<typename A>
@@ -200,13 +200,13 @@
 
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
 
-  vector_u8<A> sliding_window(k);
+  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
 
   // dynamically growing caused snowplow effect
   uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
   if (table_lg_size < 2) table_lg_size = 2;
-  u32_table<A> table(table_lg_size, 6 + lg_k);
+  u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
 
   // the following should work even when the offset is zero
   const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
@@ -314,7 +314,7 @@
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const size_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0);
+    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index 2316fc1..fe228a5 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -39,8 +39,8 @@
 class u32_table {
 public:
 
-  u32_table();
-  u32_table(uint8_t lg_size, uint8_t num_valid_bits);
+  u32_table(const A& allocator);
+  u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
 
   inline size_t get_num_items() const;
   inline const uint32_t* get_slots() const;
@@ -52,7 +52,7 @@
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
 
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
+  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
 
   vector_u32<A> unwrapping_get_items() const;
 
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index aa44ba2..bf8ece9 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -29,19 +29,19 @@
 namespace datasketches {
 
 template<typename A>
-u32_table<A>::u32_table():
+u32_table<A>::u32_table(const A& allocator):
 lg_size(0),
 num_valid_bits(0),
 num_items(0),
-slots()
+slots(allocator)
 {}
 
 template<typename A>
-u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
+u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX)
+slots(1 << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
@@ -110,10 +110,10 @@
 
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
-  u32_table<A> table(lg_num_slots, 6 + lg_k);
+  u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
   // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
   // However, we are starting out with the correct final table size, so the problem might not occur
   for (size_t i = 0; i < num_pairs; i++) {
@@ -152,7 +152,7 @@
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
-  slots = vector_u32<A>(new_size, UINT32_MAX);
+  slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -169,9 +169,9 @@
 // The result is nearly sorted, so make sure to use an efficient sort for that case
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return vector_u32<A>();
+  if (num_items == 0) return vector_u32<A>(slots.get_allocator());
   const size_t table_size = 1 << lg_size;
-  vector_u32<A> result(num_items);
+  vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;