Merge remote-tracking branch 'origin/master' into cleanup_warnings
diff --git a/common/include/MurmurHash3.h b/common/include/MurmurHash3.h
index c1cbeab..2ca72a6 100644
--- a/common/include/MurmurHash3.h
+++ b/common/include/MurmurHash3.h
@@ -76,7 +76,7 @@
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
+FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
 {
   return p[i];
 }
@@ -95,7 +95,7 @@
   return k;
 }
 
-FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
+FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
   static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
   static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
 
@@ -106,13 +106,13 @@
 
   // Number of full 128-bit blocks of 16 bytes.
   // Possible exclusion of a remainder of up to 15 bytes.
-  const int nblocks = lenBytes >> 4; // bytes / 16 
+  const size_t nblocks = lenBytes >> 4; // bytes / 16 
 
   // Process the 128-bit blocks (the body) into the hash
   const uint64_t* blocks = (const uint64_t*)(data);
-  for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
-    uint64_t k1 = getblock64(blocks,i*2+0);
-    uint64_t k2 = getblock64(blocks,i*2+1);
+  for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
+    uint64_t k1 = getblock64(blocks, i * 2 + 0);
+    uint64_t k2 = getblock64(blocks, i * 2 + 1);
 
     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
     out.h1 = ROTL64(out.h1,27);
diff --git a/common/include/binomial_bounds.hpp b/common/include/binomial_bounds.hpp
index 0f0222a..c1243e5 100644
--- a/common/include/binomial_bounds.hpp
+++ b/common/include/binomial_bounds.hpp
@@ -381,7 +381,7 @@
   // The following computes an approximation to the lower bound of a Frequentist
   // confidence interval based on the tails of the Binomial distribution.
   static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
-    if (theta == 1) return num_samples;
+    if (theta == 1) return static_cast<double>(num_samples);
     if (num_samples == 0) return 0;
     if (num_samples == 1) {
       const double delta = delta_of_num_std_devs[num_std_devs];
@@ -395,24 +395,24 @@
     }
     // at this point we know 2 <= num_samples <= 120
     if (theta > (1 - 1e-5)) { // empirically-determined threshold
-      return num_samples;
+      return static_cast<double>(num_samples);
     }
     if (theta < (num_samples / 360.0)) { // empirically-determined threshold
       // here we use the Gaussian approximation, but with a modified num_std_devs
-      const unsigned index = 3 * num_samples + (num_std_devs - 1);
+      const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
       const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
       return raw_lb - 0.5; // fake round down
     }
     // This is the most difficult range to approximate; we will compute an "exact" LB.
     // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
     const double delta = delta_of_num_std_devs[num_std_devs];
-    return special_n_star(num_samples, theta, delta); // no need to round
+    return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
   }
 
   // The following computes an approximation to the upper bound of a Frequentist
   // confidence interval based on the tails of the Binomial distribution.
   static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
-    if (theta == 1) return num_samples;
+    if (theta == 1) return static_cast<double>(num_samples);
     if (num_samples == 0) {
       const double delta = delta_of_num_std_devs[num_std_devs];
       const double raw_ub = std::log(delta) / std::log(1 - theta);
@@ -425,18 +425,18 @@
     }
     // at this point we know 2 <= num_samples <= 120
     if (theta > (1 - 1e-5)) { // empirically-determined threshold
-      return num_samples + 1;
+      return static_cast<double>(num_samples + 1);
     }
     if (theta < (num_samples / 360.0)) { // empirically-determined threshold
       // here we use the Gaussian approximation, but with a modified num_std_devs
-      const unsigned index = 3 * num_samples + (num_std_devs - 1);
+      const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
       const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
       return raw_ub + 0.5; // fake round up
     }
     // This is the most difficult range to approximate; we will compute an "exact" UB.
     // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
     const double delta = delta_of_num_std_devs[num_std_devs];
-    return special_n_prime_f(num_samples, theta, delta); // no need to round
+    return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
   }
 
   static void check_theta(double theta) {
diff --git a/common/include/bounds_binomial_proportions.hpp b/common/include/bounds_binomial_proportions.hpp
index 06ab484..abfe8db 100644
--- a/common/include/bounds_binomial_proportions.hpp
+++ b/common/include/bounds_binomial_proportions.hpp
@@ -110,14 +110,14 @@
    * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
     else if (k == 0) { return 0.0; }
     else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
     else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
     else {
-      double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
+      double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
       return (1.0 - x); // which is p
     }
   }
@@ -145,18 +145,18 @@
    * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
     else if (k == n) { return 1.0; }
     else if (k == (n - 1)) {
-      return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
+      return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
     }
     else if (k == 0) {
       return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
     }
     else {
-      double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
+      double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
       return (1.0 - x); // which is p
     }
   }
@@ -167,7 +167,7 @@
    * @param k is the number of successes. Must be non-negative, and cannot exceed n.
    * @return the estimate of the unknown binomial proportion.
    */
-  static inline double estimate_unknown_p(long n, long k) {
+  static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
     check_inputs(n, k);
     if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
     else { return ((double) k / (double) n); }
@@ -193,9 +193,7 @@
   }
 
 private:
-  static inline void check_inputs(long n, long k) {
-    if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
-    if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
+  static inline void check_inputs(uint64_t n, uint64_t k) {
     if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
   }
 
@@ -251,8 +249,7 @@
   // and it is worth keeping it that way so that it will always be easy to verify
   // that the formula was typed in correctly.
 
-  static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
-      double yp) {
+  static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
     const double b2m1 = (2.0 * b) - 1.0;
     const double a2m1 = (2.0 * a) - 1.0;
     const double lambda = ((yp * yp) - 3.0) / 6.0;
@@ -268,19 +265,19 @@
 
   // Formulas for some special cases.
 
-  static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
     return (1.0 - pow(delta, (1.0 / n)));
   }
 
-  static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
     return (pow(delta, (1.0 / n)));
   }
 
-  static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
     return (1.0 - pow((1.0 - delta), (1.0 / n)));
   }
 
-  static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
     return (pow((1.0 - delta), (1.0 / n)));
   }
 
diff --git a/common/include/count_zeros.hpp b/common/include/count_zeros.hpp
index 0c9f6b4..cdd9940 100644
--- a/common/include/count_zeros.hpp
+++ b/common/include/count_zeros.hpp
@@ -94,7 +94,7 @@
 static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
   for (int i = 0; i < 4; i++) {
     const int byte = input & 0xff;
-    if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
+    if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
     input >>= 8;
   }
   return 32;
@@ -103,7 +103,7 @@
 static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
   for (int i = 0; i < 8; i++) {
     const int byte = input & 0xff;
-    if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
+    if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
     input >>= 8;
   }
   return 64;
diff --git a/common/include/serde.hpp b/common/include/serde.hpp
index 73e0901..9b3349b 100644
--- a/common/include/serde.hpp
+++ b/common/include/serde.hpp
@@ -51,7 +51,7 @@
     bool failure = false;
     try {
       os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
-    } catch (std::ostream::failure& e) {
+    } catch (std::ostream::failure&) {
       failure = true;
     }
     if (failure || !os.good()) {
@@ -62,7 +62,7 @@
     bool failure = false;
     try {
       is.read((char*)items, sizeof(T) * num);
-    } catch (std::istream::failure& e) {
+    } catch (std::istream::failure&) {
       failure = true;
     }
     if (failure || !is.good()) {
@@ -99,11 +99,11 @@
     bool failure = false;
     try {
       for (; i < num && os.good(); i++) {
-        uint32_t length = items[i].size();
+        uint32_t length = static_cast<uint32_t>(items[i].size());
         os.write((char*)&length, sizeof(length));
         os.write(items[i].c_str(), length);
       }
-    } catch (std::ostream::failure& e) {
+    } catch (std::ostream::failure&) {
       failure = true;
     }
     if (failure || !os.good()) {
@@ -121,12 +121,12 @@
         std::string str;
         str.reserve(length);
         for (uint32_t j = 0; j < length; j++) {
-          str.push_back(is.get());
+          str.push_back(static_cast<char>(is.get()));
         }
         if (!is.good()) { break; }
         new (&items[i]) std::string(std::move(str));
       }
-    } catch (std::istream::failure& e) {
+    } catch (std::istream::failure&) {
       failure = true;
     }
     if (failure || !is.good()) {
@@ -143,7 +143,7 @@
   size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
     size_t bytes_written = 0;
     for (unsigned i = 0; i < num; ++i) {
-      const uint32_t length = items[i].size();
+      const uint32_t length = static_cast<uint32_t>(items[i].size());
       const size_t new_bytes = length + sizeof(length);
       check_memory_size(bytes_written + new_bytes, capacity);
       memcpy(ptr, &length, sizeof(length));
diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index 73db797..a8f426f 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -48,44 +48,44 @@
 class cpc_compressor {
 public:
   void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
-  void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const;
+  void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
 
   // methods below are public for testing
 
   // This returns the number of compressed words that were actually used. It is the caller's
   // responsibility to ensure that the compressed_words array is long enough to prevent over-run.
-  size_t low_level_compress_bytes(
+  uint32_t low_level_compress_bytes(
       const uint8_t* byte_array, // input
-      size_t num_bytes_to_encode,
+      uint32_t num_bytes_to_encode,
       const uint16_t* encoding_table,
       uint32_t* compressed_words  // output
   ) const;
 
   void low_level_uncompress_bytes(
       uint8_t* byte_array, // output
-      size_t num_bytes_to_decode,
+      uint32_t num_bytes_to_decode,
       const uint16_t* decoding_table,
       const uint32_t* compressed_words,
-      size_t num_compressed_words // input
+      uint32_t num_compressed_words // input
   ) const;
 
   // Here "pairs" refers to row-column pairs that specify
   // the positions of surprising values in the bit matrix.
 
   // returns the number of compressedWords actually used
-  size_t low_level_compress_pairs(
+  uint32_t low_level_compress_pairs(
       const uint32_t* pair_array, // input
-      size_t num_pairs_to_encode,
-      size_t num_base_bits,
+      uint32_t num_pairs_to_encode,
+      uint8_t num_base_bits,
       uint32_t* compressed_words // output
   ) const;
 
   void low_level_uncompress_pairs(
       uint32_t* pair_array, // output
-      size_t num_pairs_to_decode,
-      size_t num_base_bits,
+      uint32_t num_pairs_to_decode,
+      uint8_t num_base_bits,
       const uint32_t* compressed_words, // input
-      size_t num_compressed_words // input
+      uint32_t num_compressed_words // input
   ) const;
 
 private:
@@ -122,22 +122,22 @@
   void uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
   void uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
 
-  uint8_t* make_inverse_permutation(const uint8_t* permu, int length);
-  uint16_t* make_decoding_table(const uint16_t* encoding_table, int num_byte_values);
+  uint8_t* make_inverse_permutation(const uint8_t* permu, unsigned length);
+  uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
   void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
 
   void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
 
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
-  void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
+  vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
+  void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
 
-  static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
-  static size_t safe_length_for_compressed_window_buf(uint64_t k);
-  static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
+  static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
+  static size_t safe_length_for_compressed_window_buf(uint32_t k);
+  static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
 
   static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
-  static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
+  static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
 };
 
 } /* namespace datasketches */
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index e3398c8..f163db8 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -49,12 +49,12 @@
 }
 
 template<typename A>
-uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, int length) {
+uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, unsigned length) {
   uint8_t* inverse = new uint8_t[length]; // use new for global initialization
-  for (int i = 0; i < length; i++) {
-    inverse[permu[i]] = i;
+  for (unsigned i = 0; i < length; i++) {
+    inverse[permu[i]] = static_cast<uint8_t>(i);
   }
-  for (int i = 0; i < length; i++) {
+  for (unsigned i = 0; i < length; i++) {
     if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
   }
   return inverse;
@@ -64,17 +64,17 @@
    of length at most 12, this builds a size-4096 decoding table */
 // The second argument is typically 256, but can be other values such as 65.
 template<typename A>
-uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, int num_byte_values) {
+uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values) {
   uint16_t* decoding_table = new uint16_t[4096]; // use new for global initialization
-  for (int byte_value = 0; byte_value < num_byte_values; byte_value++) {
-    const int encoding_entry = encoding_table[byte_value];
-    const int code_value = encoding_entry & 0xfff;
-    const int code_length = encoding_entry >> 12;
-    const int decoding_entry = (code_length << 8) | byte_value;
-    const int garbage_length = 12 - code_length;
-    const int num_copies = 1 << garbage_length;
-    for (int garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
-      const int extended_code_value = code_value | (garbage_bits << code_length);
+  for (unsigned byte_value = 0; byte_value < num_byte_values; byte_value++) {
+    const uint16_t encoding_entry = encoding_table[byte_value];
+    const uint16_t code_value = encoding_entry & 0xfff;
+    const uint8_t code_length = encoding_entry >> 12;
+    const uint16_t decoding_entry = static_cast<uint16_t>((code_length << 8) | byte_value);
+    const uint8_t garbage_length = 12 - code_length;
+    const unsigned num_copies = 1 << garbage_length;
+    for (unsigned garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
+      const uint16_t extended_code_value = static_cast<uint16_t>(code_value | (garbage_bits << code_length));
       decoding_table[extended_code_value & 0xfff] = decoding_entry;
     }
   }
@@ -157,7 +157,7 @@
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
+void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
   switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
     case cpc_sketch_alloc<A>::flavor::EMPTY:
       target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
@@ -202,16 +202,17 @@
 void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
   if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
-  const size_t k = 1 << source.get_lg_k();
+  const uint32_t k = 1 << source.get_lg_k();
   vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
-  if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
-  const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
+  const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
+  if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
+  const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
 
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size(), source.get_allocator());
+  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
 
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
-      all_pairs.data(), pairs_from_table.size(), num_pairs_from_window,
+      all_pairs.data(), num_pairs_from_table, num_pairs_from_window,
       all_pairs.data(), 0
   );  // note the overlapping subarray trick
 
@@ -228,15 +229,15 @@
   // In the hybrid flavor, some of these pairs actually
   // belong in the window, so we will separate them out,
   // moving the "true" pairs to the bottom of the array.
-  const size_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   target.window.resize(k, 0); // important: zero the memory
-  size_t next_true_pair = 0;
-  for (size_t i = 0; i < source.table_num_entries; i++) {
+  uint32_t next_true_pair = 0;
+  for (uint32_t i = 0; i < source.table_num_entries; i++) {
     const uint32_t row_col = pairs[i];
     if (row_col == UINT32_MAX) throw std::logic_error("empty marker is not expected");
     const uint8_t col = row_col & 63;
     if (col < 8) {
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       target.window[row] |= 1 << col; // set the window bit
     } else {
       pairs[next_true_pair++] = row_col; // move true pair down
@@ -270,7 +271,7 @@
     uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
-  const size_t num_pairs = source.table_num_entries;
+  const uint32_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
@@ -278,7 +279,7 @@
     vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
         lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
-    for (size_t i = 0; i < num_pairs; i++) {
+    for (uint32_t i = 0; i < num_pairs; i++) {
       if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
       pairs[i] += 8;
     }
@@ -302,7 +303,7 @@
 
     for (size_t i = 0; i < pairs.size(); i++) {
       const uint32_t row_col = pairs[i];
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       uint8_t col = row_col & 63;
       // first rotate the columns into a canonical configuration: new = ((old - (offset+8)) + 64) mod 64
       col = (col + 56 - offset) & 63;
@@ -322,7 +323,7 @@
     uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
-  const size_t num_pairs = source.table_num_entries;
+  const uint32_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
@@ -337,9 +338,9 @@
     uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
     if (offset > 56) throw std::out_of_range("offset out of range");
 
-    for (size_t i = 0; i < num_pairs; i++) {
+    for (uint32_t i = 0; i < num_pairs; i++) {
       const uint32_t row_col = pairs[i];
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       uint8_t col = row_col & 63;
       // first undo the permutation
       col = permutation[col];
@@ -354,25 +355,26 @@
 
 template<typename A>
 void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
-  const size_t k = 1 << lg_k;
-  const uint64_t num_base_bits = golomb_choose_number_of_base_bits(k + pairs.size(), pairs.size());
-  const uint64_t table_len = safe_length_for_compressed_pair_buf(k, pairs.size(), num_base_bits);
+  const uint32_t k = 1 << lg_k;
+  const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
+  const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
+  const uint64_t table_len = safe_length_for_compressed_pair_buf(k, num_pairs, num_base_bits);
   result.table_data.resize(table_len);
 
-  size_t csv_length = low_level_compress_pairs(pairs.data(), pairs.size(), num_base_bits, result.table_data.data());
+  uint32_t csv_length = low_level_compress_pairs(pairs.data(), static_cast<uint32_t>(pairs.size()), num_base_bits, result.table_data.data());
 
   // At this point we could free the unused portion of the compression output buffer,
   // but it is not necessary if it is temporary
   // Note: realloc caused strange timing spikes for lgK = 11 and 12.
 
   result.table_data_words = csv_length;
-  result.table_num_entries = pairs.size();
+  result.table_num_entries = num_pairs;
 }
 
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs,
+vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
     uint8_t lg_k, const A& allocator) const {
-  const size_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   vector_u32<A> pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
@@ -381,7 +383,7 @@
 
 template<typename A>
 void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const {
-  const size_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
   target.window_data.resize(window_buf_len);
   const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -391,20 +393,20 @@
   // but it is not necessary if it is temporary
   // Note: realloc caused strange timing spikes for lgK = 11 and 12.
 
-  target.window_data_words = data_words;
+  target.window_data_words = static_cast<uint32_t>(data_words);
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window,
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window,
     uint8_t lg_k, uint32_t num_coupons) const {
-  const size_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
   const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
   low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
 }
 
 template<typename A>
-size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits) {
+size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits) {
   // Long ybits = k + numPairs; // simpler and safer UB
   // The following tighter UB on ybits is based on page 198
   // of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
@@ -422,14 +424,14 @@
 // So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
 // so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
 template<typename A>
-size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint64_t k) { // measured in 32-bit words
+size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint32_t k) { // measured in 32-bit words
   const size_t bits = 12 * k + 11; // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
   return divide_longs_rounding_up(bits, 32);
 }
 
 template<typename A>
-uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint64_t c) {
-  const size_t k = 1 << lg_k;
+uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint32_t c) {
+  const uint32_t k = 1 << lg_k;
   // This mid-range logic produces pseudo-phases. They are used to select encoding tables.
   // The thresholds were chosen by hand after looking at plots of measured compression.
   if (1000 * c < 2375 * k) {
@@ -450,7 +452,7 @@
   }
 }
 
-static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, size_t& wordindex) {
+static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, uint32_t& wordindex) {
   if (bufbits >= 32) {
     wordarr[wordindex++] = bitbuf & 0xffffffff;
     bitbuf = bitbuf >> 32;
@@ -458,7 +460,7 @@
   }
 }
 
-static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, size_t& wordindex, uint8_t minbits) {
+static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, uint32_t& wordindex, uint8_t minbits) {
   if (bufbits < minbits) {
     bitbuf |= static_cast<uint64_t>(wordarr[wordindex++]) << bufbits;
     bufbits += 32;
@@ -468,20 +470,20 @@
 // This returns the number of compressed words that were actually used.
 // It is the caller's responsibility to ensure that the compressed_words array is long enough.
 template<typename A>
-size_t cpc_compressor<A>::low_level_compress_bytes(
+uint32_t cpc_compressor<A>::low_level_compress_bytes(
     const uint8_t* byte_array, // input
-    size_t num_bytes_to_encode,
+    uint32_t num_bytes_to_encode,
     const uint16_t* encoding_table,
     uint32_t* compressed_words // output
 ) const {
   uint64_t bitbuf = 0; // bits are packed into this first, then are flushed to compressed_words
   uint8_t bufbits = 0; // number of bits currently in bitbuf; must be between 0 and 31
-  size_t next_word_index = 0;
+  uint32_t next_word_index = 0;
 
-  for (size_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
-    const uint64_t code_info = encoding_table[byte_array[byte_index]];
+  for (uint32_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
+    const uint16_t code_info = encoding_table[byte_array[byte_index]];
     const uint64_t code_val = code_info & 0xfff;
-    const int code_len = code_info >> 12;
+    const uint8_t code_len = code_info >> 12;
     bitbuf |= (code_val << bufbits);
     bufbits += code_len;
     maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
@@ -502,12 +504,12 @@
 template<typename A>
 void cpc_compressor<A>::low_level_uncompress_bytes(
     uint8_t* byte_array, // output
-    size_t num_bytes_to_decode,
+    uint32_t num_bytes_to_decode,
     const uint16_t* decoding_table,
     const uint32_t* compressed_words, // input
-    size_t num_compressed_words
+    uint32_t num_compressed_words
 ) const {
-  size_t word_index = 0;
+  uint32_t word_index = 0;
   uint64_t bitbuf = 0;
   uint8_t bufbits = 0;
 
@@ -515,7 +517,7 @@
   if (decoding_table == nullptr) throw std::logic_error("decoding_table == NULL");
   if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
 
-  for (size_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
+  for (uint32_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
     maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
 
     const size_t peek12 = bitbuf & 0xfff; // These 12 bits will include an entire Huffman codeword.
@@ -533,14 +535,14 @@
 
 static inline uint64_t read_unary(
     const uint32_t* compressed_words,
-    size_t& next_word_index,
+    uint32_t& next_word_index,
     uint64_t& bitbuf,
     uint8_t& bufbits
 );
 
 static inline void write_unary(
     uint32_t* compressed_words,
-    size_t& next_word_index_ptr,
+    uint32_t& next_word_index_ptr,
     uint64_t& bit_buf_ptr,
     uint8_t& buf_bits_ptr,
     uint64_t value
@@ -551,38 +553,38 @@
 
 // returns the number of compressed_words actually used
 template<typename A>
-size_t cpc_compressor<A>::low_level_compress_pairs(
+uint32_t cpc_compressor<A>::low_level_compress_pairs(
     const uint32_t* pair_array,  // input
-    size_t num_pairs_to_encode,
-    size_t num_base_bits,
+    uint32_t num_pairs_to_encode,
+    uint8_t num_base_bits,
     uint32_t* compressed_words // output
 ) const {
   uint64_t bitbuf = 0;
   uint8_t bufbits = 0;
-  size_t next_word_index = 0;
+  uint32_t next_word_index = 0;
   const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
-  uint64_t predicted_row_index = 0;
-  uint16_t predicted_col_index = 0;
+  uint32_t predicted_row_index = 0;
+  uint8_t predicted_col_index = 0;
 
-  for (size_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
+  for (uint32_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
     const uint32_t row_col = pair_array[pair_index];
-    const uint64_t row_index = row_col >> 6;
-    const uint16_t col_index = row_col & 63;
+    const uint32_t row_index = row_col >> 6;
+    const uint8_t col_index = row_col & 63;
 
     if (row_index != predicted_row_index) predicted_col_index = 0;
 
     if (row_index < predicted_row_index) throw std::logic_error("row_index < predicted_row_index");
     if (col_index < predicted_col_index) throw std::logic_error("col_index < predicted_col_index");
 
-    const uint64_t y_delta = row_index - predicted_row_index;
-    const uint16_t x_delta = col_index - predicted_col_index;
+    const uint32_t y_delta = row_index - predicted_row_index;
+    const uint8_t x_delta = col_index - predicted_col_index;
 
     predicted_row_index = row_index;
     predicted_col_index = col_index + 1;
 
-    const uint64_t code_info = length_limited_unary_encoding_table65[x_delta];
+    const uint16_t code_info = length_limited_unary_encoding_table65[x_delta];
     const uint64_t code_val = code_info & 0xfff;
-    const uint8_t code_len = code_info >> 12;
+    const uint8_t code_len = static_cast<uint8_t>(code_info >> 12);
     bitbuf |= code_val << bufbits;
     bufbits += code_len;
     maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
@@ -614,29 +616,29 @@
 template<typename A>
 void cpc_compressor<A>::low_level_uncompress_pairs(
     uint32_t* pair_array, // output
-    size_t num_pairs_to_decode,
-    size_t num_base_bits,
+    uint32_t num_pairs_to_decode,
+    uint8_t num_base_bits,
     const uint32_t* compressed_words, // input
-    size_t num_compressed_words
+    uint32_t num_compressed_words
 ) const {
-  size_t word_index = 0;
+  uint32_t word_index = 0;
   uint64_t bitbuf = 0;
   uint8_t bufbits = 0;
   const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
-  uint64_t predicted_row_index = 0;
-  uint16_t predicted_col_index = 0;
+  uint32_t predicted_row_index = 0;
+  uint8_t predicted_col_index = 0;
 
   // for each pair we need to read:
   // x_delta (12-bit length-limited unary)
   // y_delta_hi (unary)
   // y_delta_lo (basebits)
 
-  for (size_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
+  for (uint32_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
     maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
     const size_t peek12 = bitbuf & 0xfff;
     const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
-    const int code_word_length = lookup >> 8;
-    const int16_t x_delta = lookup & 0xff;
+    const uint8_t code_word_length = lookup >> 8;
+    const int8_t x_delta = lookup & 0xff;
     bitbuf >>= code_word_length;
     bufbits -= code_word_length;
 
@@ -650,8 +652,8 @@
 
     // Now that we have x_delta and y_delta, we can compute the pair's row and column
     if (y_delta > 0) predicted_col_index = 0;
-    const uint64_t row_index = predicted_row_index + y_delta;
-    const uint16_t col_index = predicted_col_index + x_delta;
+    const uint32_t row_index = static_cast<uint32_t>(predicted_row_index + y_delta);
+    const uint8_t col_index = predicted_col_index + x_delta;
     const uint32_t row_col = (row_index << 6) | col_index;
     pair_array[pair_index] = row_col;
     predicted_row_index = row_index;
@@ -662,7 +664,7 @@
 
 uint64_t read_unary(
     const uint32_t* compressed_words,
-    size_t& next_word_index,
+    uint32_t& next_word_index,
     uint64_t& bitbuf,
     uint8_t& bufbits
 ) {
@@ -689,7 +691,7 @@
 
 void write_unary(
     uint32_t* compressed_words,
-    size_t& next_word_index,
+    uint32_t& next_word_index,
     uint64_t& bitbuf,
     uint8_t& bufbits,
     uint64_t value
@@ -709,9 +711,9 @@
 
   if (remaining > 15) throw std::out_of_range("remaining out of range");
 
-  const uint64_t the_unary_code = 1 << remaining;
+  const uint64_t the_unary_code = 1ULL << remaining;
   bitbuf |= the_unary_code << bufbits;
-  bufbits += 1 + remaining;
+  bufbits += static_cast<uint8_t>(remaining + 1);
   maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
 }
 
@@ -738,12 +740,12 @@
 // returns an integer that is between
 // zero and ceiling(log_2(k)) - 1, inclusive
 template<typename A>
-uint64_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint64_t k, uint64_t count) {
+uint8_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint32_t k, uint64_t count) {
   if (k < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: k < 1");
   if (count < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: count < 1");
   const uint64_t quotient = (k - count) / count; // integer division
   if (quotient == 0) return 0;
-  else return long_floor_log2_of_long(quotient);
+  else return floor_log2_of_long(quotient);
 }
 
 } /* namespace datasketches */
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index a4bf8f6..651c254 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -192,7 +192,7 @@
    * @param data pointer to the data
    * @param length of the data in bytes
    */
-  void update(const void* value, int size);
+  void update(const void* value, size_t size);
 
   /**
    * Returns a human-readable summary of this sketch
diff --git a/cpc/include/cpc_sketch_impl.hpp b/cpc/include/cpc_sketch_impl.hpp
index 60429b6..1bb1be1 100644
--- a/cpc/include/cpc_sketch_impl.hpp
+++ b/cpc/include/cpc_sketch_impl.hpp
@@ -176,7 +176,7 @@
 
 static inline uint32_t row_col_from_two_hashes(uint64_t hash0, uint64_t hash1, uint8_t lg_k) {
   if (lg_k > 26) throw std::logic_error("lg_k > 26");
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   uint8_t col = count_leading_zeros_in_u64(hash1); // 0 <= col <= 64
   if (col > 63) col = 63; // clip so that 0 <= col <= 63
   const uint32_t row = hash0 & (k - 1);
@@ -188,7 +188,7 @@
 }
 
 template<typename A>
-void cpc_sketch_alloc<A>::update(const void* value, int size) {
+void cpc_sketch_alloc<A>::update(const void* value, size_t size) {
   HashState hashes;
   MurmurHash3_x64_128(value, size, seed, hashes);
   row_col_update(row_col_from_two_hashes(hashes.h1, hashes.h2, lg_k));
@@ -208,7 +208,7 @@
 
 template<typename A>
 void cpc_sketch_alloc<A>::update_sparse(uint32_t row_col) {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const uint64_t c32pre = static_cast<uint64_t>(num_coupons) << 5;
   if (c32pre >= 3 * k) throw std::logic_error("c32pre >= 3 * k"); // C < 3K/32, in other words flavor == SPARSE
   bool is_novel = surprising_value_table.maybe_insert(row_col);
@@ -224,7 +224,7 @@
 template<typename A>
 void cpc_sketch_alloc<A>::update_windowed(uint32_t row_col) {
   if (window_offset > 56) throw std::logic_error("wrong window offset");
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const uint64_t c32pre = static_cast<uint64_t>(num_coupons) << 5;
   if (c32pre < 3 * k) throw std::logic_error("c32pre < 3 * k"); // C < 3K/32, in other words flavor >= HYBRID
   const uint64_t c8pre = static_cast<uint64_t>(num_coupons) << 3;
@@ -266,7 +266,7 @@
 // Call this whenever a new coupon has been collected.
 template<typename A>
 void cpc_sketch_alloc<A>::update_hip(uint32_t row_col) {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const uint8_t col = row_col & 63;
   const double one_over_p = static_cast<double>(k) / kxp;
   hip_est_accum += one_over_p;
@@ -276,7 +276,7 @@
 // In terms of flavor, this promotes SPARSE to HYBRID
 template<typename A>
 void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const uint64_t c32 = static_cast<uint64_t>(num_coupons) << 5;
   if (!(c32 == 3 * k || (lg_k == 4 && c32 > 3 * k))) throw std::logic_error("wrong c32");
 
@@ -285,16 +285,16 @@
   u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
 
   const uint32_t* old_slots = surprising_value_table.get_slots();
-  const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
+  const uint32_t old_num_slots = 1 << surprising_value_table.get_lg_size();
 
   if (window_offset != 0) throw std::logic_error("window_offset != 0");
 
-  for (size_t i = 0; i < old_num_slots; i++) {
+  for (uint32_t i = 0; i < old_num_slots; i++) {
     const uint32_t row_col = old_slots[i];
     if (row_col != UINT32_MAX) {
       const uint8_t col = row_col & 63;
       if (col < 8) {
-        const size_t row = row_col >> 6;
+        const uint32_t row = row_col >> 6;
         sliding_window[row] |= 1 << col;
       } else {
         // cannot use u32_table::must_insert(), because it doesn't provide for growth
@@ -314,7 +314,7 @@
   if (new_offset != determine_correct_offset(lg_k, num_coupons)) throw std::logic_error("new_offset is wrong");
 
   if (sliding_window.size() == 0) throw std::logic_error("no sliding window");
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
 
   // Construct the full-sized bit matrix that corresponds to the sketch
   vector_u64<A> bit_matrix = build_bit_matrix();
@@ -328,7 +328,7 @@
   const uint64_t mask_for_flipping_early_zone = (static_cast<uint64_t>(1) << new_offset) - 1;
   uint64_t all_surprises_ored = 0;
 
-  for (size_t i = 0; i < k; i++) {
+  for (uint32_t i = 0; i < k; i++) {
     uint64_t pattern = bit_matrix[i];
     sliding_window[i] = (pattern >> new_offset) & 0xff;
     pattern &= mask_for_clearing_window;
@@ -357,7 +357,7 @@
 // so that it will reflect changes that were previously outside the mantissa.
 template<typename A>
 void cpc_sketch_alloc<A>::refresh_kxp(const uint64_t* bit_matrix) {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
 
   // for improved numerical accuracy, we separately sum the bytes of the U64's
   double byte_sums[8]; // allocating on the stack
@@ -689,7 +689,7 @@
 template<typename A>
 bool cpc_sketch_alloc<A>::validate() const {
   vector_u64<A> bit_matrix = build_bit_matrix();
-  const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(), 1 << lg_k);
+  const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(), 1ULL << lg_k);
   return num_bits_set == num_coupons;
 }
 
@@ -737,7 +737,7 @@
 
 template<typename A>
 typename cpc_sketch_alloc<A>::flavor cpc_sketch_alloc<A>::determine_flavor(uint8_t lg_k, uint64_t c) {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const uint64_t c2 = c << 1;
   const uint64_t c8 = c << 3;
   const uint64_t c32 = c << 5;
@@ -750,15 +750,15 @@
 
 template<typename A>
 uint8_t cpc_sketch_alloc<A>::determine_correct_offset(uint8_t lg_k, uint64_t c) {
-  const uint64_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   const int64_t tmp = static_cast<int64_t>(c << 3) - static_cast<int64_t>(19 * k); // 8C - 19K
   if (tmp < 0) return 0;
-  return tmp >> (lg_k + 3); // tmp / 8K
+  return static_cast<uint8_t>(tmp >> (lg_k + 3)); // tmp / 8K
 }
 
 template<typename A>
 vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
-  const size_t k = 1 << lg_k;
+  const uint32_t k = 1 << lg_k;
   if (window_offset > 56) throw std::logic_error("offset > 56");
 
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
@@ -775,12 +775,12 @@
   }
 
   const uint32_t* slots = surprising_value_table.get_slots();
-  const size_t num_slots = 1 << surprising_value_table.get_lg_size();
+  const uint32_t num_slots = 1 << surprising_value_table.get_lg_size();
   for (size_t i = 0; i < num_slots; i++) {
     const uint32_t row_col = slots[i];
     if (row_col != UINT32_MAX) {
       const uint8_t col = row_col & 63;
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       // Flip the specified matrix bit from its default value.
       // In the "early" zone the bit changes from 1 to 0.
       // In the "late" zone the bit changes from 0 to 1.
diff --git a/cpc/include/cpc_union_impl.hpp b/cpc/include/cpc_union_impl.hpp
index 5acfe5f..e5a1e5d 100644
--- a/cpc/include/cpc_union_impl.hpp
+++ b/cpc/include/cpc_union_impl.hpp
@@ -191,8 +191,8 @@
 
 template<typename A>
 cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
-  const uint64_t k = 1 << lg_k;
-  const uint64_t num_coupons = count_bits_set_in_matrix(bit_matrix.data(), k);
+  const uint32_t k = 1 << lg_k;
+  const uint32_t num_coupons = count_bits_set_in_matrix(bit_matrix.data(), k);
 
   const auto flavor = cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons);
   if (flavor != cpc_sketch_alloc<A>::flavor::HYBRID && flavor != cpc_sketch_alloc<A>::flavor::PINNED
@@ -215,7 +215,7 @@
 
   // The snowplow effect was caused by processing the rows in order,
   // but we have fixed it by using a sufficiently large hash table.
-  for (unsigned i = 0; i < k; i++) {
+  for (uint32_t i = 0; i < k; i++) {
     uint64_t pattern = bit_matrix[i];
     sliding_window[i] = (pattern >> offset) & 0xff;
     pattern &= mask_for_clearing_window;
@@ -250,17 +250,17 @@
 template<typename A>
 void cpc_union_alloc<A>::walk_table_updating_sketch(const u32_table<A>& table) {
   const uint32_t* slots = table.get_slots();
-  const size_t num_slots = 1 << table.get_lg_size();
+  const uint32_t num_slots = 1 << table.get_lg_size();
   const uint64_t dst_mask = (((1 << accumulator->get_lg_k()) - 1) << 6) | 63; // downsamples when dst lgK < src LgK
 
   // Using a golden ratio stride fixes the snowplow effect.
   const double golden = 0.6180339887498949025;
-  size_t stride = static_cast<size_t>(golden * static_cast<double>(num_slots));
+  uint32_t stride = static_cast<uint32_t>(golden * static_cast<double>(num_slots));
   if (stride < 2) throw std::logic_error("stride < 2");
   if (stride == ((stride >> 1) << 1)) stride += 1; // force the stride to be odd
   if (stride < 3 || stride >= num_slots) throw std::out_of_range("stride out of range");
 
-  for (size_t i = 0, j = 0; i < num_slots; i++, j += stride) {
+  for (uint32_t i = 0, j = 0; i < num_slots; i++, j += stride) {
     j &= num_slots - 1;
     const uint32_t row_col = slots[j];
     if (row_col != UINT32_MAX) {
@@ -272,13 +272,13 @@
 template<typename A>
 void cpc_union_alloc<A>::or_table_into_matrix(const u32_table<A>& table) {
   const uint32_t* slots = table.get_slots();
-  const size_t num_slots = 1 << table.get_lg_size();
+  const uint32_t num_slots = 1 << table.get_lg_size();
   const uint64_t dest_mask = (1 << lg_k) - 1;  // downsamples when dst lgK < sr LgK
-  for (size_t i = 0; i < num_slots; i++) {
+  for (uint32_t i = 0; i < num_slots; i++) {
     const uint32_t row_col = slots[i];
     if (row_col != UINT32_MAX) {
       const uint8_t col = row_col & 63;
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       bit_matrix[row & dest_mask] |= static_cast<uint64_t>(1) << col; // set the bit
     }
   }
@@ -288,8 +288,8 @@
 void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
-  const size_t src_k = 1 << src_lg_k;
-  for (size_t src_row = 0; src_row < src_k; src_row++) {
+  const uint32_t src_k = 1 << src_lg_k;
+  for (uint32_t src_row = 0; src_row < src_k; src_row++) {
     bit_matrix[src_row & dst_mask] |= static_cast<uint64_t>(sliding_window[src_row]) << offset;
   }
 }
@@ -298,8 +298,8 @@
 void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
-  const size_t src_k = 1 << src_lg_k;
-  for (size_t src_row = 0; src_row < src_k; src_row++) {
+  const uint32_t src_k = 1 << src_lg_k;
+  for (uint32_t src_row = 0; src_row < src_k; src_row++) {
     bit_matrix[src_row & dst_mask] |= src_matrix[src_row];
   }
 }
@@ -313,7 +313,7 @@
     if (accumulator != nullptr) throw std::logic_error("accumulator is not null");
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
-    const size_t new_k = 1 << new_lg_k;
+    const uint32_t new_k = 1 << new_lg_k;
     bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
diff --git a/cpc/include/cpc_util.hpp b/cpc/include/cpc_util.hpp
index 1a33b3a..9bf8aa3 100644
--- a/cpc/include/cpc_util.hpp
+++ b/cpc/include/cpc_util.hpp
@@ -31,9 +31,9 @@
   else return quotient + 1;
 }
 
-static inline uint64_t long_floor_log2_of_long(uint64_t x) {
-  if (x < 1) throw std::invalid_argument("long_floor_log2_of_long: bad argument");
-  uint64_t p = 0;
+static inline uint8_t floor_log2_of_long(uint64_t x) {
+  if (x < 1) throw std::invalid_argument("floor_log2_of_long: bad argument");
+  uint8_t p = 0;
   uint64_t y = 1;
   while (true) {
     if (y == x) return p;
@@ -69,7 +69,7 @@
 // Note: this is an adaptation of the Java code,
 // which is apparently a variation of Figure 5-2 in "Hacker's Delight"
 // by Henry S. Warren.
-static inline uint64_t warren_bit_count(uint64_t i) {
+static inline uint32_t warren_bit_count(uint64_t i) {
   i = i - ((i >> 1) & 0x5555555555555555ULL);
   i = (i & 0x3333333333333333ULL) + ((i >> 2) & 0x3333333333333333ULL);
   i = (i + (i >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
@@ -79,9 +79,9 @@
   return i & 0x7f;
 }
 
-static inline uint64_t warren_count_bits_set_in_matrix(const uint64_t* array, size_t length) {
-  uint64_t count = 0;
-  for (size_t i = 0; i < length; i++) {
+static inline uint32_t warren_count_bits_set_in_matrix(const uint64_t* array, uint32_t length) {
+  uint32_t count = 0;
+  for (uint32_t i = 0; i < length; i++) {
     count += warren_bit_count(array[i]);
   }
   return count;
@@ -91,13 +91,13 @@
 
 #define CSA(h,l,a,b,c) {uint64_t u = a ^ b; uint64_t v = c; h = (a & b) | (u & v); l = u ^ v;}
 
-static inline uint64_t count_bits_set_in_matrix(const uint64_t* a, size_t length) {
+static inline uint32_t count_bits_set_in_matrix(const uint64_t* a, uint32_t length) {
   if ((length & 0x7) != 0) throw std::invalid_argument("the length of the array must be a multiple of 8");
-  uint64_t total = 0;
+  uint32_t total = 0;
   uint64_t ones, twos, twos_a, twos_b, fours, fours_a, fours_b, eights;
   fours = twos = ones = 0;
 
-  for (size_t i = 0; i <= length - 8; i = i + 8) {
+  for (uint32_t i = 0; i <= length - 8; i += 8) {
     CSA(twos_a, ones, ones, a[i+0], a[i+1]);
     CSA(twos_b, ones, ones, a[i+2], a[i+3]);
     CSA(fours_a, twos, twos, twos_a, twos_b);
diff --git a/cpc/include/icon_estimator.hpp b/cpc/include/icon_estimator.hpp
index 4a9daea..fb3c0c6 100644
--- a/cpc/include/icon_estimator.hpp
+++ b/cpc/include/icon_estimator.hpp
@@ -245,12 +245,12 @@
   return (0.7940236163830469 * k * pow(2.0, c / k));
 }
 
-static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
+static inline double compute_icon_estimate(uint8_t lg_k, uint32_t c) {
   if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
   if (c < 2) return ((c == 0) ? 0.0 : 1.0);
-  const size_t k = 1 << lg_k;
-  const double double_k = k;
-  const double double_c = c;
+  const uint32_t k = 1 << lg_k;
+  const double double_k = static_cast<double>(k);
+  const double double_c = static_cast<double>(c);
   // Differing thresholds ensure that the approximated estimator is monotonically increasing.
   const double threshold_factor = ((lg_k < 14) ? 5.7 : 5.6);
   if (double_c > (threshold_factor * double_k)) return icon_exponential_approximation(double_k, double_c);
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index fe228a5..a344a17 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -29,11 +29,11 @@
 
 namespace datasketches {
 
-static const uint64_t U32_TABLE_UPSIZE_NUMER = 3LL;
-static const uint64_t U32_TABLE_UPSIZE_DENOM = 4LL;
+static const uint32_t U32_TABLE_UPSIZE_NUMER = 3LL;
+static const uint32_t U32_TABLE_UPSIZE_DENOM = 4LL;
 
-static const uint64_t U32_TABLE_DOWNSIZE_NUMER = 1LL;
-static const uint64_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
+static const uint32_t U32_TABLE_DOWNSIZE_NUMER = 1LL;
+static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
 
 template<typename A>
 class u32_table {
@@ -42,7 +42,7 @@
   u32_table(const A& allocator);
   u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
 
-  inline size_t get_num_items() const;
+  inline uint32_t get_num_items() const;
   inline const uint32_t* get_slots() const;
   inline uint8_t get_lg_size() const;
   inline void clear();
@@ -52,7 +52,7 @@
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
 
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
+  static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator);
 
   vector_u32<A> unwrapping_get_items() const;
 
@@ -69,10 +69,10 @@
 
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
-  size_t num_items;
+  uint32_t num_items;
   vector_u32<A> slots;
 
-  inline size_t lookup(uint32_t item) const;
+  inline uint32_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);
   inline void rebuild(uint8_t new_lg_size);
 };
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index bf8ece9..a82e7de 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -41,14 +41,14 @@
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX, allocator)
+slots(1ULL << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
 }
 
 template<typename A>
-size_t u32_table<A>::get_num_items() const {
+uint32_t u32_table<A>::get_num_items() const {
   return num_items;
 }
 
@@ -70,7 +70,7 @@
 
 template<typename A>
 bool u32_table<A>::maybe_insert(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == item) return false;
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
@@ -83,7 +83,7 @@
 
 template<typename A>
 bool u32_table<A>::maybe_delete(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == UINT32_MAX) return false;
   if (slots[index] != item) throw std::logic_error("item does not exist");
   if (num_items == 0) throw std::logic_error("delete error");
@@ -110,7 +110,7 @@
 
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
   u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
@@ -124,11 +124,11 @@
 }
 
 template<typename A>
-size_t u32_table<A>::lookup(uint32_t item) const {
-  const size_t size = 1 << lg_size;
-  const size_t mask = size - 1;
+uint32_t u32_table<A>::lookup(uint32_t item) const {
+  const uint32_t size = 1 << lg_size;
+  const uint32_t mask = size - 1;
   const uint8_t shift = num_valid_bits - lg_size;
-  size_t probe = item >> shift;
+  uint32_t probe = item >> shift;
   if (probe > mask) throw std::logic_error("probe out of range");
   while (slots[probe] != item && slots[probe] != UINT32_MAX) {
     probe = (probe + 1) & mask;
@@ -139,7 +139,7 @@
 // counts and resizing must be handled by the caller
 template<typename A>
 void u32_table<A>::must_insert(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == item) throw std::logic_error("item exists");
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
@@ -148,13 +148,13 @@
 template<typename A>
 void u32_table<A>::rebuild(uint8_t new_lg_size) {
   if (new_lg_size < 2) throw std::logic_error("lg_size must be >= 2");
-  const size_t old_size = 1 << lg_size;
-  const size_t new_size = 1 << new_lg_size;
+  const uint32_t old_size = 1 << lg_size;
+  const uint32_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
   slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
-  for (size_t i = 0; i < old_size; i++) {
+  for (uint32_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
       must_insert(old_slots[i]);
     }
@@ -170,7 +170,7 @@
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
   if (num_items == 0) return vector_u32<A>(slots.get_allocator());
-  const size_t table_size = 1 << lg_size;
+  const uint32_t table_size = 1 << lg_size;
   vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;
diff --git a/cpc/test/compression_test.cpp b/cpc/test/compression_test.cpp
index ed98274..9ce3e06 100644
--- a/cpc/test/compression_test.cpp
+++ b/cpc/test/compression_test.cpp
@@ -27,38 +27,38 @@
 typedef u32_table<std::allocator<void>> table;
 
 TEST_CASE("cpc sketch: compress and decompress pairs", "[cpc_sketch]") {
-  const int N = 200;
-  const int MAXWORDS = 1000;
+  const size_t N = 200;
+  const size_t MAXWORDS = 1000;
 
   HashState twoHashes;
   uint32_t pairArray[N];
   uint32_t pairArray2[N];
   uint64_t value = 35538947; // some arbitrary starting value
   const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
-  for (int i = 0; i < N; i++) {
+  for (size_t i = 0; i < N; i++) {
     MurmurHash3_x64_128(&value, sizeof(value), 0, twoHashes);
     uint32_t rand = twoHashes.h1 & 0xffff;
     pairArray[i] = rand;
     value += golden64;
   }
   //table::knuth_shell_sort3(pairArray, 0, N - 1); // unsigned numerical sort
-  std::sort(pairArray, &pairArray[N]);
+  std::sort(pairArray, pairArray + N);
   uint32_t prev = UINT32_MAX;
-  int nxt = 0;
-  for (int i = 0; i < N; i++) { // uniquify
+  uint32_t nxt = 0;
+  for (size_t i = 0; i < N; i++) { // uniquify
     if (pairArray[i] != prev) {
       prev = pairArray[i];
       pairArray[nxt++] = pairArray[i];
     }
   }
-  int numPairs = nxt;
+  uint32_t numPairs = nxt;
 
   uint32_t compressedWords[MAXWORDS];
 
-  for (size_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) {
-    size_t numWordsWritten = get_compressor<std::allocator<void>>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords);
+  for (uint8_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) {
+    uint32_t numWordsWritten = get_compressor<std::allocator<void>>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords);
     get_compressor<std::allocator<void>>().low_level_uncompress_pairs(pairArray2, numPairs, numBaseBits, compressedWords, numWordsWritten);
-    for (int i = 0; i < numPairs; i++) {
+    for (size_t i = 0; i < numPairs; i++) {
       REQUIRE(pairArray[i] == pairArray2[i]);
     }
   }
diff --git a/cpc/test/cpc_sketch_test.cpp b/cpc/test/cpc_sketch_test.cpp
index 837a61d..0a2ca74 100644
--- a/cpc/test/cpc_sketch_test.cpp
+++ b/cpc/test/cpc_sketch_test.cpp
@@ -283,6 +283,26 @@
   REQUIRE(deserialized.validate());
 }
 
+TEST_CASE("cpc sketch: serialize deserialize sliding huge", "[cpc_sketch]") {
+  cpc_sketch sketch(26);
+  const int n = 10000000;
+  for (int i = 0; i < n; i++) sketch.update(i);
+  REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.001));
+  auto bytes = sketch.serialize();
+  cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
+  REQUIRE(deserialized.is_empty() == sketch.is_empty());
+  REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+  REQUIRE(deserialized.validate());
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
+
+  // updating again with the same values should not change the sketch
+  for (int i = 0; i < n; i++) deserialized.update(i);
+  REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+  REQUIRE(deserialized.validate());
+}
+
 TEST_CASE("cpc sketch: copy", "[cpc_sketch]") {
   cpc_sketch s1(11);
   s1.update(1);
diff --git a/cpc/test/cpc_union_test.cpp b/cpc/test/cpc_union_test.cpp
index e114cd0..688ea12 100644
--- a/cpc/test/cpc_union_test.cpp
+++ b/cpc/test/cpc_union_test.cpp
@@ -81,7 +81,7 @@
   cpc_union u(11);
   for (int i = 0; i < 1000; i++) {
     cpc_sketch tmp(11);
-    for (int i = 0; i < 10000; i++) {
+    for (int j = 0; j < 10000; j++) {
       s.update(key);
       tmp.update(key);
       key++;
diff --git a/fi/include/frequent_items_sketch_impl.hpp b/fi/include/frequent_items_sketch_impl.hpp
index b61ee55..593aa03 100644
--- a/fi/include/frequent_items_sketch_impl.hpp
+++ b/fi/include/frequent_items_sketch_impl.hpp
@@ -65,7 +65,7 @@
 void frequent_items_sketch<T, W, H, E, S, A>::merge(const frequent_items_sketch& other) {
   if (other.is_empty()) return;
   const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
-  for (auto &it: other.map) {
+  for (auto it: other.map) {
     update(it.first, it.second);
   }
   offset += other.offset;
@@ -76,7 +76,7 @@
 void frequent_items_sketch<T, W, H, E, S, A>::merge(frequent_items_sketch&& other) {
   if (other.is_empty()) return;
   const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
-  for (auto &it: other.map) {
+  for (auto it: other.map) {
     update(std::move(it.first), it.second);
   }
   offset += other.offset;
@@ -147,7 +147,7 @@
 typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
 frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
   vector_row items(map.get_allocator());
-  for (auto &it: map) {
+  for (auto it: map) {
     const W lb = it.second;
     const W ub = it.second + offset;
     if ((err_type == NO_FALSE_NEGATIVES && ub > threshold) || (err_type == NO_FALSE_POSITIVES && lb > threshold)) {
@@ -192,14 +192,14 @@
     A alloc(map.get_allocator());
     T* items = alloc.allocate(num_items);
     uint32_t i = 0;
-    for (auto &it: map) {
+    for (auto it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
     write(os, weights, sizeof(W) * num_items);
     aw.deallocate(weights, num_items);
     S().serialize(os, items, num_items);
-    for (unsigned i = 0; i < num_items; i++) items[i].~T();
+    for (i = 0; i < num_items; i++) items[i].~T();
     alloc.deallocate(items, num_items);
   }
 }
@@ -208,7 +208,7 @@
 size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
   if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
   size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
-  for (auto &it: map) size += S().size_of_item(it.first);
+  for (auto it: map) size += S().size_of_item(it.first);
   return size;
 }
 
@@ -248,7 +248,7 @@
     A alloc(map.get_allocator());
     T* items = alloc.allocate(num_items);
     uint32_t i = 0;
-    for (auto &it: map) {
+    for (auto it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
@@ -256,7 +256,7 @@
     aw.deallocate(weights, num_items);
     const size_t bytes_remaining = end_ptr - ptr;
     ptr += S().serialize(ptr, bytes_remaining, items, num_items);
-    for (unsigned i = 0; i < num_items; i++) items[i].~T();
+    for (i = 0; i < num_items; i++) items[i].~T();
     alloc.deallocate(items, num_items);
   }
   return bytes;
@@ -266,20 +266,20 @@
 class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
 public:
   items_deleter(uint32_t num, bool destroy, const A& allocator):
-    allocator(allocator), num(num), destroy(destroy) {}
-  void set_destroy(bool destroy) { this->destroy = destroy; }
+    allocator_(allocator), num_(num), destroy_(destroy) {}
+  void set_destroy(bool destroy) { destroy_ = destroy; }
   void operator() (T* ptr) {
     if (ptr != nullptr) {
-      if (destroy) {
-        for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
+      if (destroy_) {
+        for (uint32_t i = 0; i < num_; ++i) ptr[i].~T();
       }
-      allocator.deallocate(ptr, num);
+      allocator_.deallocate(ptr, num_);
     }
   }
 private:
-  A allocator;
-  uint32_t num;
-  bool destroy;
+  A allocator_;
+  uint32_t num_;
+  bool destroy_;
 };
 
 template<typename T, typename W, typename H, typename E, typename S, typename A>
@@ -350,7 +350,7 @@
   check_serial_version(serial_version);
   check_family_id(family_id);
   check_size(lg_cur_size, lg_max_size);
-  ensure_minimum_memory(size, 1 << preamble_longs);
+  ensure_minimum_memory(size, 1ULL << preamble_longs);
 
   frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
   if (!is_empty) {
@@ -431,14 +431,14 @@
   os << "### End sketch summary" << std::endl;
   if (print_items) {
     vector_row items;
-    for (auto &it: map) {
+    for (auto it: map) {
       items.push_back(row(&it.first, it.second, offset));
     }
     // sort by estimate in descending order
     std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
     os << "### Items in descending order by estimate" << std::endl;
     os << "   item, estimate, lower bound, upper bound" << std::endl;
-    for (auto &it: items) {
+    for (auto it: items) {
       os << "   " << it.get_item() << ", " << it.get_estimate() << ", "
          << it.get_lower_bound() << ", " << it.get_upper_bound() << std::endl;
     }
diff --git a/fi/include/reverse_purge_hash_map_impl.hpp b/fi/include/reverse_purge_hash_map_impl.hpp
index beccea4..0b05d89 100644
--- a/fi/include/reverse_purge_hash_map_impl.hpp
+++ b/fi/include/reverse_purge_hash_map_impl.hpp
@@ -39,15 +39,15 @@
 lg_cur_size_(lg_cur_size),
 lg_max_size_(lg_max_size),
 num_active_(0),
-keys_(allocator_.allocate(1 << lg_cur_size)),
+keys_(allocator_.allocate(1ULL << lg_cur_size)),
 values_(nullptr),
 states_(nullptr)
 {
   AllocV av(allocator_);
-  values_ = av.allocate(1 << lg_cur_size);
+  values_ = av.allocate(1ULL << lg_cur_size);
   AllocU16 au16(allocator_);
-  states_ = au16.allocate(1 << lg_cur_size);
-  std::fill(states_, states_ + (1 << lg_cur_size), 0);
+  states_ = au16.allocate(1ULL << lg_cur_size);
+  std::fill(states_, states_ + (1ULL << lg_cur_size), static_cast<uint16_t>(0));
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
@@ -56,14 +56,14 @@
 lg_cur_size_(other.lg_cur_size_),
 lg_max_size_(other.lg_max_size_),
 num_active_(other.num_active_),
-keys_(allocator_.allocate(1 << lg_cur_size_)),
+keys_(allocator_.allocate(1ULL << lg_cur_size_)),
 values_(nullptr),
 states_(nullptr)
 {
   AllocV av(allocator_);
-  values_ = av.allocate(1 << lg_cur_size_);
+  values_ = av.allocate(1ULL << lg_cur_size_);
   AllocU16 au16(allocator_);
-  states_ = au16.allocate(1 << lg_cur_size_);
+  states_ = au16.allocate(1ULL << lg_cur_size_);
   const uint32_t size = 1 << lg_cur_size_;
   if (num_active_ > 0) {
     auto num = num_active_;
@@ -177,7 +177,7 @@
 
 template<typename K, typename V, typename H, typename E, typename A>
 uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
-  return (1 << lg_cur_size_) * LOAD_FACTOR;
+  return static_cast<uint32_t>((1 << lg_cur_size_) * LOAD_FACTOR);
 }
 
 template<typename K, typename V, typename H, typename E, typename A>
@@ -246,7 +246,7 @@
   // if none are found, the status is changed
   states_[delete_index] = 0; // mark as empty
   keys_[delete_index].~K();
-  uint32_t drift = 1;
+  uint16_t drift = 1;
   const uint32_t mask = (1 << lg_cur_size_) - 1;
   uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
   // advance until we find a free location replacing locations as needed
@@ -322,7 +322,7 @@
   values_ = av.allocate(new_size);
   AllocU16 au16(allocator_);
   states_ = au16.allocate(new_size);
-  std::fill(states_, states_ + new_size, 0);
+  std::fill(states_, states_ + new_size, static_cast<uint16_t>(0));
   num_active_ = 0;
   lg_cur_size_ = lg_new_size;
   for (uint32_t i = 0; i < old_size; i++) {
diff --git a/fi/test/reverse_purge_hash_map_test.cpp b/fi/test/reverse_purge_hash_map_test.cpp
index a74345c..fedda6b 100644
--- a/fi/test/reverse_purge_hash_map_test.cpp
+++ b/fi/test/reverse_purge_hash_map_test.cpp
@@ -39,8 +39,8 @@
 TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
   reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
   for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
-  int sum = 0;
-  for (auto &it: map) sum += it.second;
+  uint64_t sum = 0;
+  for (auto it: map) sum += it.second;
   REQUIRE(sum == 11);
 }
 
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index 8109403..9caf14e 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -380,7 +380,7 @@
   size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
   size += S().size_of_item(*min_value_);
   size += S().size_of_item(*max_value_);
-  for (auto& it: *this) size += S().size_of_item(it.first);
+  for (auto it: *this) size += S().size_of_item(it.first);
   return size;
 }
 
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index f0c813e..b51f98b 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -71,7 +71,7 @@
     REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
 
     int count = 0;
-    for (auto& it: sketch) {
+    for (auto it: sketch) {
       (void) it; // to suppress "unused" warning
       FAIL("should be no iterations over an empty sketch");
     }
@@ -103,7 +103,7 @@
     REQUIRE(quantiles[2] == 1.0);
 
     int count = 0;
-    for (auto& it: sketch) {
+    for (auto it: sketch) {
       REQUIRE(it.second == 1);
       ++count;
     }
diff --git a/req/include/req_common.hpp b/req/include/req_common.hpp
index d2dc518..0e5b87a 100755
--- a/req/include/req_common.hpp
+++ b/req/include/req_common.hpp
@@ -29,7 +29,8 @@
 namespace datasketches {
 
 // TODO: have a common random bit with KLL
-static std::independent_bits_engine<std::mt19937, 1, unsigned> req_random_bit(std::chrono::system_clock::now().time_since_epoch().count());
+static std::independent_bits_engine<std::mt19937, 1, unsigned>
+  req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
 
 namespace req_constants {
   static const uint16_t MIN_K = 4;
diff --git a/req/include/req_compactor.hpp b/req/include/req_compactor.hpp
index 2ca768b..682aa49 100755
--- a/req/include/req_compactor.hpp
+++ b/req/include/req_compactor.hpp
@@ -110,8 +110,8 @@
 
   bool ensure_enough_sections();
   std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
-  void grow(size_t new_capacity);
-  void ensure_space(size_t num);
+  void grow(uint32_t new_capacity);
+  void ensure_space(uint32_t num);
 
   static uint32_t nearest_even(float value);
 
@@ -123,10 +123,10 @@
   req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
 
   template<typename S>
-  static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, size_t num);
+  static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
 
   template<typename S>
-  static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, size_t num);
+  static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
 
 };
 
diff --git a/req/include/req_compactor_impl.hpp b/req/include/req_compactor_impl.hpp
index 3d5cce0..221768c 100755
--- a/req/include/req_compactor_impl.hpp
+++ b/req/include/req_compactor_impl.hpp
@@ -38,7 +38,7 @@
 hra_(hra),
 coin_(false),
 sorted_(sorted),
-section_size_raw_(section_size),
+section_size_raw_(static_cast<float>(section_size)),
 section_size_(section_size),
 num_sections_(req_constants::INIT_NUM_SECTIONS),
 state_(0),
@@ -72,9 +72,9 @@
 {
   if (other.items_ != nullptr) {
     items_ = allocator_.allocate(capacity_);
-    const size_t from = hra_ ? capacity_ - num_items_ : 0;
-    const size_t to = hra_ ? capacity_ : num_items_;
-    for (size_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
+    const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
+    const uint32_t to = hra_ ? capacity_ : num_items_;
+    for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
   }
 }
 
@@ -165,16 +165,16 @@
 template<typename FwdT>
 void req_compactor<T, C, A>::append(FwdT&& item) {
   if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
-  const size_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
+  const uint32_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
   new (items_ + i) T(std::forward<FwdT>(item));
   ++num_items_;
   if (num_items_ > 1) sorted_ = false;
 }
 
 template<typename T, typename C, typename A>
-void req_compactor<T, C, A>::grow(size_t new_capacity) {
+void req_compactor<T, C, A>::grow(uint32_t new_capacity) {
   T* new_items = allocator_.allocate(new_capacity);
-  size_t new_i = hra_ ? new_capacity - num_items_ : 0;
+  uint32_t new_i = hra_ ? new_capacity - num_items_ : 0;
   for (auto it = begin(); it != end(); ++it, ++new_i) {
     new (new_items + new_i) T(std::move(*it));
     (*it).~T();
@@ -185,7 +185,7 @@
 }
 
 template<typename T, typename C, typename A>
-void req_compactor<T, C, A>::ensure_space(size_t num) {
+void req_compactor<T, C, A>::ensure_space(uint32_t num) {
   if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
 }
 
@@ -218,13 +218,13 @@
   while (ensure_enough_sections()) {}
   ensure_space(other.get_num_items());
   sort();
-  auto middle = hra_ ? begin() : end();
+  auto offset = hra_ ? capacity_ - num_items_ : num_items_;
   auto from = hra_ ? begin() - other.get_num_items() : end();
   auto to = from + other.get_num_items();
   auto other_it = other.begin();
   for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
   if (!other.sorted_) std::sort(from, to, C());
-  if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), middle, hra_ ? end() : to, C());
+  if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
   num_items_ += other.get_num_items();
 }
 
@@ -240,7 +240,7 @@
 std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
   const uint32_t starting_nom_capacity = get_nom_capacity();
   // choose a part of the buffer to compact
-  const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
+  const uint32_t secs_to_compact = std::min<uint32_t>(count_trailing_zeros_in_u64(~state_) + 1, num_sections_);
   auto compaction_range = compute_compaction_range(secs_to_compact);
   if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
 
@@ -267,9 +267,9 @@
 
 template<typename T, typename C, typename A>
 bool req_compactor<T, C, A>::ensure_enough_sections() {
-  const float ssr = section_size_raw_ / sqrt(2);
+  const float ssr = section_size_raw_ / sqrtf(2);
   const uint32_t ne = nearest_even(ssr);
-  if (state_ >= static_cast<uint64_t>(1 << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
+  if (state_ >= static_cast<uint64_t>(1ULL << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
     section_size_raw_ = ssr;
     section_size_ = ne;
     num_sections_ <<= 1;
@@ -284,8 +284,8 @@
   uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
   // make compacted region even
   if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
-  const size_t low = hra_ ? 0 : non_compact;
-  const size_t high = hra_ ? num_items_ - non_compact : num_items_;
+  const uint32_t low = hra_ ? 0 : non_compact;
+  const uint32_t high = hra_ ? num_items_ - non_compact : num_items_;
   return std::pair<uint32_t, uint32_t>(low, high);
 }
 
@@ -381,7 +381,7 @@
 
 template<typename T, typename C, typename A>
 template<typename S>
-auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, size_t num)
+auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, uint32_t num)
 -> std::unique_ptr<T, items_deleter> {
   A alloc(allocator);
   std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
@@ -389,7 +389,7 @@
   // serde did not throw, enable destructors
   items.get_deleter().set_destroy(true);
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
-  return std::move(items);
+  return items;
 }
 
 template<typename T, typename C, typename A>
@@ -430,7 +430,7 @@
 
 template<typename T, typename C, typename A>
 template<typename S>
-auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, size_t num)
+auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, uint32_t num)
 -> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
   const char* ptr = static_cast<const char*>(bytes);
   const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -465,22 +465,22 @@
 template<typename T, typename C, typename A>
 class req_compactor<T, C, A>::items_deleter {
   public:
-  items_deleter(const A& allocator, bool destroy, uint32_t num): allocator(allocator), destroy(destroy), num(num) {}
+  items_deleter(const A& allocator, bool destroy, size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
   void operator() (T* ptr) {
     if (ptr != nullptr) {
-      if (destroy) {
-        for (uint32_t i = 0; i < num; ++i) {
+      if (destroy_) {
+        for (size_t i = 0; i < num_; ++i) {
           ptr[i].~T();
         }
       }
-      allocator.deallocate(ptr, num);
+      allocator_.deallocate(ptr, num_);
     }
   }
-  void set_destroy(bool destroy) { this->destroy = destroy; }
+  void set_destroy(bool destroy) { destroy_ = destroy; }
   private:
-  A allocator;
-  bool destroy;
-  uint32_t num;
+  A allocator_;
+  bool destroy_;
+  size_t num_;
 };
 
 } /* namespace datasketches */
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index ca806cc..779caba 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -319,7 +319,7 @@
 
   // for deserialization
   class item_deleter;
-  req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
+  req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
 
   static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
   static void check_serial_version(uint8_t serial_version);
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 3c90908..ee6d9e1 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -28,7 +28,7 @@
 template<typename T, typename C, typename S, typename A>
 req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
 allocator_(allocator),
-k_(std::max(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
+k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
 hra_(hra),
 max_nom_size_(0),
 num_retained_(0),
@@ -401,7 +401,7 @@
   write(os, k_);
   const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
   write(os, num_levels);
-  const uint8_t num_raw_items = raw_items ? n_ : 0;
+  const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
   write(os, num_raw_items);
   if (is_empty()) return;
   if (is_estimation_mode()) {
@@ -440,7 +440,7 @@
   ptr += copy_to_mem(k_, ptr);
   const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
   ptr += copy_to_mem(num_levels, ptr);
-  const uint8_t num_raw_items = raw_items ? n_ : 0;
+  const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
   ptr += copy_to_mem(num_raw_items, ptr);
   if (!is_empty()) {
     if (is_estimation_mode()) {
@@ -620,7 +620,7 @@
 
 template<typename T, typename C, typename S, typename A>
 uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
-  return compactors_.size();
+  return static_cast<uint8_t>(compactors_.size());
 }
 
 template<typename T, typename C, typename S, typename A>
@@ -711,7 +711,7 @@
 };
 
 template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
+req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
 allocator_(compactors.get_allocator()),
 k_(k),
 hra_(hra),
@@ -766,9 +766,9 @@
 
 template<typename T, typename C, typename S, typename A>
 req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
-levels_it_(begin),
-levels_end_(end),
-compactor_it_((*levels_it_).begin())
+    levels_it_(begin),
+    levels_end_(end),
+    compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
 {}
 
 template<typename T, typename C, typename S, typename A>
@@ -802,7 +802,7 @@
 
 template<typename T, typename C, typename S, typename A>
 std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
-  return std::pair<const T&, const uint64_t>(*compactor_it_, 1 << (*levels_it_).get_lg_weight());
+  return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
 }
 
 } /* namespace datasketches */
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 0be4aa5..a301a04 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -55,15 +55,15 @@
 
 TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
   req_sketch<float> sketch(12, false);
-  sketch.update(1);
+  sketch.update(1.0f);
   REQUIRE_FALSE(sketch.is_HRA());
   REQUIRE_FALSE(sketch.is_empty());
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 1);
   REQUIRE(sketch.get_num_retained() == 1);
-  REQUIRE(sketch.get_rank(1) == 0);
-  REQUIRE(sketch.get_rank<true>(1) == 1);
-  REQUIRE(sketch.get_rank(1.1) == 1);
+  REQUIRE(sketch.get_rank(1.0f) == 0);
+  REQUIRE(sketch.get_rank<true>(1.0f) == 1);
+  REQUIRE(sketch.get_rank(1.1f) == 1);
   REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
   REQUIRE(sketch.get_quantile(0) == 1);
   REQUIRE(sketch.get_quantile(0.5) == 1);
@@ -86,43 +86,43 @@
 
 TEST_CASE("req sketch: repeated values", "[req_sketch]") {
   req_sketch<float> sketch(12);
-  sketch.update(1);
-  sketch.update(1);
-  sketch.update(1);
-  sketch.update(2);
-  sketch.update(2);
-  sketch.update(2);
+  sketch.update(1.0f);
+  sketch.update(1.0f);
+  sketch.update(1.0f);
+  sketch.update(2.0f);
+  sketch.update(2.0f);
+  sketch.update(2.0f);
   REQUIRE_FALSE(sketch.is_empty());
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 6);
   REQUIRE(sketch.get_num_retained() == 6);
-  REQUIRE(sketch.get_rank(1) == 0);
-  REQUIRE(sketch.get_rank<true>(1) == 0.5);
-  REQUIRE(sketch.get_rank(2) == 0.5);
-  REQUIRE(sketch.get_rank<true>(2) == 1);
+  REQUIRE(sketch.get_rank(1.0f) == 0);
+  REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
+  REQUIRE(sketch.get_rank(2.0f) == 0.5);
+  REQUIRE(sketch.get_rank<true>(2.0f) == 1);
 }
 
 TEST_CASE("req sketch: exact mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
-  for (size_t i = 1; i <= 10; ++i) sketch.update(i);
+  for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
   REQUIRE_FALSE(sketch.is_empty());
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 10);
   REQUIRE(sketch.get_num_retained() == 10);
 
   // like KLL
-  REQUIRE(sketch.get_rank(1) == 0);
-  REQUIRE(sketch.get_rank(2) == 0.1);
-  REQUIRE(sketch.get_rank(6) == 0.5);
-  REQUIRE(sketch.get_rank(9) == 0.8);
-  REQUIRE(sketch.get_rank(10) == 0.9);
+  REQUIRE(sketch.get_rank(1.0f) == 0);
+  REQUIRE(sketch.get_rank(2.0f) == 0.1);
+  REQUIRE(sketch.get_rank(6.0f) == 0.5);
+  REQUIRE(sketch.get_rank(9.0f) == 0.8);
+  REQUIRE(sketch.get_rank(10.0f) == 0.9);
 
   // inclusive
-  REQUIRE(sketch.get_rank<true>(1) == 0.1);
-  REQUIRE(sketch.get_rank<true>(2) == 0.2);
-  REQUIRE(sketch.get_rank<true>(5) == 0.5);
-  REQUIRE(sketch.get_rank<true>(9) == 0.9);
-  REQUIRE(sketch.get_rank<true>(10) == 1);
+  REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
+  REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
+  REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
+  REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
+  REQUIRE(sketch.get_rank<true>(10.0f) == 1);
 
   // like KLL
   REQUIRE(sketch.get_quantile(0) == 1);
@@ -164,16 +164,16 @@
 TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 100000;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE_FALSE(sketch.is_empty());
   REQUIRE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == n);
 //  std::cout << sketch.to_string(true);
   REQUIRE(sketch.get_num_retained() < n);
   REQUIRE(sketch.get_rank(0) == 0);
-  REQUIRE(sketch.get_rank(n) == 1);
-  REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
-  REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
+  REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
+  REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
+  REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
   REQUIRE(sketch.get_min_value() == 0);
   REQUIRE(sketch.get_max_value() == n - 1);
   REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
@@ -219,7 +219,7 @@
 
 TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
   req_sketch<float> sketch(12);
-  sketch.update(1);
+  sketch.update(1.0f);
 
   std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
   sketch.serialize(s);
@@ -235,7 +235,7 @@
 
 TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
   req_sketch<float> sketch(12);
-  sketch.update(1);
+  sketch.update(1.0f);
 
   auto bytes = sketch.serialize();
   REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
@@ -253,7 +253,7 @@
 TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 50;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE_FALSE(sketch.is_estimation_mode());
 
   std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -271,7 +271,7 @@
 TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 50;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE_FALSE(sketch.is_estimation_mode());
 
   auto bytes = sketch.serialize();
@@ -290,7 +290,7 @@
 TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 100000;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE(sketch.is_estimation_mode());
 
   std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -308,7 +308,7 @@
 TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 100000;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE(sketch.is_estimation_mode());
 
   auto bytes = sketch.serialize();
@@ -326,7 +326,7 @@
 TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
   req_sketch<float> sketch(12);
   const size_t n = 100000;
-  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
   REQUIRE(sketch.is_estimation_mode());
 
   std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -373,8 +373,8 @@
   REQUIRE(sketch.get_num_retained() == 1);
   REQUIRE(sketch.get_min_value() == 1);
   REQUIRE(sketch.get_max_value() == 1);
-  REQUIRE(sketch.get_rank(1) == 0);
-  REQUIRE(sketch.get_rank<true>(1) == 1);
+  REQUIRE(sketch.get_rank(1.0f) == 0);
+  REQUIRE(sketch.get_rank<true>(1.0f) == 1);
 }
 
 TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -388,7 +388,7 @@
   REQUIRE(sketch.get_num_retained() == 4);
   REQUIRE(sketch.get_min_value() == 0);
   REQUIRE(sketch.get_max_value() == 3);
-  REQUIRE(sketch.get_rank(2) == 0.5);
+  REQUIRE(sketch.get_rank(2.0f) == 0.5);
 }
 
 TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -402,7 +402,7 @@
   REQUIRE(sketch.get_num_retained() == 100);
   REQUIRE(sketch.get_min_value() == 0);
   REQUIRE(sketch.get_max_value() == 99);
-  REQUIRE(sketch.get_rank(50) == 0.5);
+  REQUIRE(sketch.get_rank(50.0f) == 0.5);
 }
 
 TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -416,14 +416,14 @@
   REQUIRE(sketch.get_num_retained() == 2942);
   REQUIRE(sketch.get_min_value() == 0);
   REQUIRE(sketch.get_max_value() == 9999);
-  REQUIRE(sketch.get_rank(5000) == 0.5);
+  REQUIRE(sketch.get_rank(5000.0f) == 0.5);
 }
 
 TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
   req_sketch<float> sketch1(40);
 
   req_sketch<float> sketch2(40);
-  for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
+  for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
 
   sketch1.merge(sketch2);
   REQUIRE(sketch1.get_min_value() == 0);
@@ -431,15 +431,15 @@
   REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
   REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
   REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
-  REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
+  REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
 }
 
 TEST_CASE("req sketch: merge", "[req_sketch]") {
   req_sketch<float> sketch1(100);
-  for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
+  for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
 
   req_sketch<float> sketch2(100);
-  for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
+  for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
 
   sketch1.merge(sketch2);
   REQUIRE(sketch1.get_min_value() == 0);
@@ -447,18 +447,18 @@
   REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
   REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
   REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
-  REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
+  REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
 }
 
 TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
   req_sketch<float> sketch1(12);
-  for (size_t i = 0; i < 40; ++i) sketch1.update(i);
+  for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
 
   req_sketch<float> sketch2(12);
-  for (size_t i = 40; i < 80; ++i) sketch2.update(i);
+  for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
 
   req_sketch<float> sketch3(12);
-  for (size_t i = 80; i < 120; ++i) sketch3.update(i);
+  for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
 
   req_sketch<float> sketch(12);
   sketch.merge(sketch1);
@@ -467,15 +467,15 @@
   REQUIRE(sketch.get_min_value() == 0);
   REQUIRE(sketch.get_max_value() == 119);
   REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
-  REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
+  REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
 }
 
 TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
   req_sketch<float> sketch1(12);
-  sketch1.update(1);
+  sketch1.update(1.0f);
 
   req_sketch<float> sketch2(12, false);
-  sketch2.update(1);
+  sketch2.update(1.0f);
 
   REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
 }
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index 16f8216..33be587 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -334,7 +334,7 @@
     num_bytes += (h_ / 8) + (h_ % 8 > 0);
   }
   // must iterate over the items
-  for (auto& it: *this)
+  for (auto it: *this)
     num_bytes += S().size_of_item(it.first);
   return num_bytes;
 }
diff --git a/sampling/test/var_opt_sketch_test.cpp b/sampling/test/var_opt_sketch_test.cpp
index c1121c5..44560c9 100644
--- a/sampling/test/var_opt_sketch_test.cpp
+++ b/sampling/test/var_opt_sketch_test.cpp
@@ -220,7 +220,7 @@
   }
 
   double output_sum = 0.0;
-  for (auto& it : sk) { // std::pair<int, weight>
+  for (auto it : sk) { // std::pair<int, weight>
     output_sum += it.second;
   }
     
diff --git a/theta/include/bounds_on_ratios_in_sampled_sets.hpp b/theta/include/bounds_on_ratios_in_sampled_sets.hpp
index e2c5433..341319a 100644
--- a/theta/include/bounds_on_ratios_in_sampled_sets.hpp
+++ b/theta/include/bounds_on_ratios_in_sampled_sets.hpp
@@ -90,7 +90,7 @@
    * @param f the inclusion probability used to produce the set with size <i>a</i>.
    * @return the approximate lower bound
    */
-  static double estimate_of_a(uint64_t a, uint64_t f) {
+  static double estimate_of_a(uint64_t a, double f) {
     check_inputs(a, 1, f);
     return a / f;
   }
diff --git a/theta/include/theta_jaccard_similarity_base.hpp b/theta/include/theta_jaccard_similarity_base.hpp
index cb18601..783a184 100644
--- a/theta/include/theta_jaccard_similarity_base.hpp
+++ b/theta/include/theta_jaccard_similarity_base.hpp
@@ -131,9 +131,9 @@
 
   template<typename SketchA, typename SketchB>
   static typename Union::CompactSketch compute_union(const SketchA& sketch_a, const SketchB& sketch_b) {
-    const unsigned count_a = sketch_a.get_num_retained();
-    const unsigned count_b = sketch_b.get_num_retained();
-    const unsigned lg_k = std::min(std::max(log2(ceiling_power_of_2(count_a + count_b)), theta_constants::MIN_LG_K), theta_constants::MAX_LG_K);
+    const auto count_a = sketch_a.get_num_retained();
+    const auto count_b = sketch_b.get_num_retained();
+    const auto lg_k = std::min(std::max(log2(ceiling_power_of_2(count_a + count_b)), theta_constants::MIN_LG_K), theta_constants::MAX_LG_K);
     auto u = typename Union::builder().set_lg_k(lg_k).build();
     u.update(sketch_a);
     u.update(sketch_b);
diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp
index 774f715..0653a70 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -290,7 +290,7 @@
 
 template<typename A>
 uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
-  return entries_.size();
+  return static_cast<uint32_t>(entries_.size());
 }
 
 template<typename A>
@@ -300,22 +300,22 @@
 
 template<typename A>
 auto compact_theta_sketch_alloc<A>::begin() -> iterator {
-  return iterator(entries_.data(), entries_.size(), 0);
+  return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }
 
 template<typename A>
 auto compact_theta_sketch_alloc<A>::end() -> iterator {
-  return iterator(nullptr, 0, entries_.size());
+  return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }
 
 template<typename A>
 auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
-  return const_iterator(entries_.data(), entries_.size(), 0);
+  return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }
 
 template<typename A>
 auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
-  return const_iterator(nullptr, 0, entries_.size());
+  return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }
 
 template<typename A>
@@ -343,7 +343,7 @@
   write(os, seed_hash);
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_entries = entries_.size();
+      const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
       write(os, num_entries);
       const uint32_t unused32 = 0;
       write(os, unused32);
@@ -381,7 +381,7 @@
   ptr += copy_to_mem(seed_hash, ptr);
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_entries = entries_.size();
+      const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
       ptr += copy_to_mem(num_entries, ptr);
       ptr += sizeof(uint32_t);
       if (this->is_estimation_mode()) {
diff --git a/theta/include/theta_update_sketch_base_impl.hpp b/theta/include/theta_update_sketch_base_impl.hpp
index a343c78..ce577e6 100644
--- a/theta/include/theta_update_sketch_base_impl.hpp
+++ b/theta/include/theta_update_sketch_base_impl.hpp
@@ -39,7 +39,7 @@
 entries_(nullptr)
 {
   if (lg_cur_size > 0) {
-    const size_t size = 1 << lg_cur_size;
+    const size_t size = 1ULL << lg_cur_size;
     entries_ = allocator_.allocate(size);
     for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
   }
@@ -58,7 +58,7 @@
 entries_(nullptr)
 {
   if (other.entries_ != nullptr) {
-    const size_t size = 1 << lg_cur_size_;
+    const size_t size = 1ULL << lg_cur_size_;
     entries_ = allocator_.allocate(size);
     for (size_t i = 0; i < size; ++i) {
       if (EK()(other.entries_[i]) != 0) {
@@ -89,7 +89,7 @@
 theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
 {
   if (entries_ != nullptr) {
-    const size_t size = 1 << lg_cur_size_;
+    const size_t size = 1ULL << lg_cur_size_;
     for (size_t i = 0; i < size; ++i) {
       if (EK()(entries_[i]) != 0) entries_[i].~EN();
     }
@@ -136,7 +136,7 @@
 
 template<typename EN, typename EK, typename A>
 auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
-  const size_t size = 1 << lg_cur_size_;
+  const size_t size = 1ULL << lg_cur_size_;
   const size_t mask = size - 1;
   const uint32_t stride = get_stride(key, lg_cur_size_);
   uint32_t index = static_cast<uint32_t>(key) & mask;
@@ -175,13 +175,13 @@
 
 template<typename EN, typename EK, typename A>
 auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
-  return &entries_[1 << lg_cur_size_];
+  return &entries_[1ULL << lg_cur_size_];
 }
 
 template<typename EN, typename EK, typename A>
 uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
   const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
-  return std::floor(fraction * (1 << lg_cur_size));
+  return static_cast<uint32_t>(std::floor(fraction * (1 << lg_cur_size)));
 }
 
 template<typename EN, typename EK, typename A>
@@ -192,11 +192,11 @@
 
 template<typename EN, typename EK, typename A>
 void theta_update_sketch_base<EN, EK, A>::resize() {
-  const size_t old_size = 1 << lg_cur_size_;
+  const size_t old_size = 1ULL << lg_cur_size_;
   const uint8_t lg_tgt_size = lg_nom_size_ + 1;
-  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
+  const uint8_t factor = std::max<uint8_t>(1, std::min<uint8_t>(static_cast<uint8_t>(rf_), static_cast<uint8_t>(lg_tgt_size - lg_cur_size_)));
   lg_cur_size_ += factor;
-  const size_t new_size = 1 << lg_cur_size_;
+  const size_t new_size = 1ULL << lg_cur_size_;
   EN* old_entries = entries_;
   entries_ = allocator_.allocate(new_size);
   for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
@@ -214,7 +214,7 @@
 // assumes number of entries > nominal size
 template<typename EN, typename EK, typename A>
 void theta_update_sketch_base<EN, EK, A>::rebuild() {
-  const size_t size = 1 << lg_cur_size_;
+  const size_t size = 1ULL << lg_cur_size_;
   const uint32_t nominal_size = 1 << lg_nom_size_;
 
   // empty entries have uninitialized payloads
@@ -301,7 +301,7 @@
 
 template<typename Derived, typename Allocator>
 uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
-  if (p_ < 1) return theta_constants::MAX_THETA * p_;
+  if (p_ < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p_);
   return theta_constants::MAX_THETA;
 }
 
diff --git a/theta/test/theta_a_not_b_test.cpp b/theta/test/theta_a_not_b_test.cpp
index 1ef5255..4e6ff26 100644
--- a/theta/test/theta_a_not_b_test.cpp
+++ b/theta/test/theta_a_not_b_test.cpp
@@ -37,7 +37,7 @@
 TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
   update_theta_sketch a = update_theta_sketch::builder().build();
   a.update(1);
-  update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
+  update_theta_sketch b = update_theta_sketch::builder().set_p(0.001f).build();
   theta_a_not_b a_not_b;
 
   // B is still empty
diff --git a/theta/test/theta_intersection_test.cpp b/theta/test/theta_intersection_test.cpp
index 2c8d6c0..c8fb6e6 100644
--- a/theta/test/theta_intersection_test.cpp
+++ b/theta/test/theta_intersection_test.cpp
@@ -48,7 +48,7 @@
 }
 
 TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
-  update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
+  update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001f).build();
   sketch.update(1);
   theta_intersection intersection;
   intersection.update(sketch);
diff --git a/theta/test/theta_jaccard_similarity_test.cpp b/theta/test/theta_jaccard_similarity_test.cpp
index 9354d1c..d40a0ce 100644
--- a/theta/test/theta_jaccard_similarity_test.cpp
+++ b/theta/test/theta_jaccard_similarity_test.cpp
@@ -107,7 +107,7 @@
 TEST_CASE("theta jaccard: similarity test", "[theta_sketch]") {
   const int8_t min_lg_k = 12;
   const int u1 = 1 << 20;
-  const int u2 = u1 * 0.95;
+  const int u2 = static_cast<int>(u1 * 0.95);
   const double threshold = 0.943;
 
   auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).build();
@@ -127,7 +127,7 @@
 TEST_CASE("theta jaccard: dissimilarity test", "[theta_sketch]") {
   const int8_t min_lg_k = 12;
   const int u1 = 1 << 20;
-  const int u2 = u1 * 0.05;
+  const int u2 = static_cast<int>(u1 * 0.05);
   const double threshold = 0.061;
 
   auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).build();
diff --git a/theta/test/theta_sketch_test.cpp b/theta/test/theta_sketch_test.cpp
index f817a3e..eeb2a73 100644
--- a/theta/test/theta_sketch_test.cpp
+++ b/theta/test/theta_sketch_test.cpp
@@ -50,7 +50,7 @@
 }
 
 TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
-  update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
+  update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
   update_sketch.update(1);
   //std::cerr << update_sketch.to_string();
   REQUIRE(update_sketch.get_num_retained() == 0);
diff --git a/theta/test/theta_union_test.cpp b/theta/test/theta_union_test.cpp
index e45862d..c170457 100644
--- a/theta/test/theta_union_test.cpp
+++ b/theta/test/theta_union_test.cpp
@@ -39,7 +39,7 @@
 }
 
 TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
-  update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
+  update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
   update_sketch.update(1);
   theta_union u = theta_union::builder().build();
   u.update(update_sketch);
diff --git a/tuple/include/array_of_doubles_sketch_impl.hpp b/tuple/include/array_of_doubles_sketch_impl.hpp
index f81e544..b494d66 100644
--- a/tuple/include/array_of_doubles_sketch_impl.hpp
+++ b/tuple/include/array_of_doubles_sketch_impl.hpp
@@ -88,7 +88,7 @@
   write(os, seed_hash);
   write(os, this->theta_);
   if (this->get_num_retained() > 0) {
-    const uint32_t num_entries = this->entries_.size();
+    const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
     write(os, num_entries);
     const uint32_t unused32 = 0;
     write(os, unused32);
@@ -128,7 +128,7 @@
   ptr += copy_to_mem(seed_hash, ptr);
   ptr += copy_to_mem((this->theta_), ptr);
   if (this->get_num_retained() > 0) {
-    const uint32_t num_entries = this->entries_.size();
+    const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
     ptr += copy_to_mem(num_entries, ptr);
     ptr += sizeof(uint32_t); // unused
     for (const auto& it: this->entries_) {
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 28e26e2..1eba1e6 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -315,7 +315,7 @@
 
 template<typename S, typename A>
 uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
-  return entries_.size();
+  return static_cast<uint32_t>(entries_.size());
 }
 
 template<typename S, typename A>
@@ -367,7 +367,7 @@
   write(os, seed_hash);
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_entries = entries_.size();
+      const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
       write(os, num_entries);
       const uint32_t unused32 = 0;
       write(os, unused32);
@@ -412,7 +412,7 @@
   ptr += copy_to_mem(seed_hash, ptr);
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_entries = entries_.size();
+      const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
       ptr += copy_to_mem(num_entries, ptr);
       ptr += sizeof(uint32_t); // unused
       if (this->is_estimation_mode()) {
@@ -535,22 +535,22 @@
 
 template<typename S, typename A>
 auto compact_tuple_sketch<S, A>::begin() -> iterator {
-  return iterator(entries_.data(), entries_.size(), 0);
+  return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }
 
 template<typename S, typename A>
 auto compact_tuple_sketch<S, A>::end() -> iterator {
-  return iterator(nullptr, 0, entries_.size());
+  return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }
 
 template<typename S, typename A>
 auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
-  return const_iterator(entries_.data(), entries_.size(), 0);
+  return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }
 
 template<typename S, typename A>
 auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
-  return const_iterator(nullptr, 0, entries_.size());
+  return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }
 
 template<typename S, typename A>
diff --git a/tuple/test/array_of_doubles_sketch_test.cpp b/tuple/test/array_of_doubles_sketch_test.cpp
index 7a5e359..d40491f 100644
--- a/tuple/test/array_of_doubles_sketch_test.cpp
+++ b/tuple/test/array_of_doubles_sketch_test.cpp
@@ -75,7 +75,7 @@
 }
 
 TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
-  auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
+  auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build();
   std::vector<double> a = {1};
   update_sketch.update(1, a);
   REQUIRE_FALSE(update_sketch.is_empty());
diff --git a/tuple/test/tuple_a_not_b_test.cpp b/tuple/test/tuple_a_not_b_test.cpp
index 7c9446c..84c1881 100644
--- a/tuple/test/tuple_a_not_b_test.cpp
+++ b/tuple/test/tuple_a_not_b_test.cpp
@@ -38,8 +38,8 @@
 
 TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
-  a.update(1, 1);
-  auto b = update_tuple_sketch<float>::builder().set_p(0.001).build();
+  a.update(1, 1.0f);
+  auto b = update_tuple_sketch<float>::builder().set_p(0.001f).build();
   tuple_a_not_b<float> a_not_b;
 
   // B is still empty
@@ -51,7 +51,7 @@
   REQUIRE(result.get_estimate() == 1.0);
 
   // B is not empty in estimation mode and no entries
-  b.update(1, 1);
+  b.update(1, 1.0f);
   REQUIRE(b.get_num_retained() == 0);
 
   result = a_not_b.compute(a, b);
@@ -65,11 +65,11 @@
 TEST_CASE("tuple a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) a.update(value++, 1);
+  for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
 
   auto b = update_tuple_sketch<float>::builder().build();
   value = 500;
-  for (int i = 0; i < 1000; i++) b.update(value++, 1);
+  for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -105,7 +105,7 @@
 TEST_CASE("mixed a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) a.update(value++, 1);
+  for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
 
   auto b = update_theta_sketch::builder().build();
   value = 500;
@@ -145,10 +145,10 @@
 TEST_CASE("tuple a-not-b: exact mode disjoint", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) a.update(value++, 1);
+  for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
 
   auto b = update_tuple_sketch<float>::builder().build();
-  for (int i = 0; i < 1000; i++) b.update(value++, 1);
+  for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -168,7 +168,7 @@
 TEST_CASE("tuple a-not-b: exact mode full overlap", "[tuple_a_not_b]") {
   auto sketch = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) sketch.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -188,11 +188,11 @@
 TEST_CASE("tuple a-not-b: estimation mode half overlap", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) a.update(value++, 1);
+  for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
 
   auto b = update_tuple_sketch<float>::builder().build();
   value = 5000;
-  for (int i = 0; i < 10000; i++) b.update(value++, 1);
+  for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -212,10 +212,10 @@
 TEST_CASE("tuple a-not-b: estimation mode disjoint", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) a.update(value++, 1);
+  for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
 
   auto b = update_tuple_sketch<float>::builder().build();
-  for (int i = 0; i < 10000; i++) b.update(value++, 1);
+  for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -235,7 +235,7 @@
 TEST_CASE("tuple a-not-b: estimation mode full overlap", "[tuple_a_not_b]") {
   auto sketch = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) sketch.update(value++, 1);
+  for (int i = 0; i < 10000; i++) sketch.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
@@ -254,7 +254,7 @@
 
 TEST_CASE("tuple a-not-b: seed mismatch", "[tuple_a_not_b]") {
   auto sketch = update_tuple_sketch<float>::builder().build();
-  sketch.update(1, 1); // non-empty should not be ignored
+  sketch.update(1, 1.0f); // non-empty should not be ignored
   tuple_a_not_b<float> a_not_b(123);
   REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
 }
@@ -262,11 +262,11 @@
 TEST_CASE("tuple a-not-b: issue #152", "[tuple_a_not_b]") {
   auto a = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) a.update(value++, 1);
+  for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
 
   auto b = update_tuple_sketch<float>::builder().build();
   value = 5000;
-  for (int i = 0; i < 25000; i++) b.update(value++, 1);
+  for (int i = 0; i < 25000; i++) b.update(value++, 1.0f);
 
   tuple_a_not_b<float> a_not_b;
 
diff --git a/tuple/test/tuple_intersection_test.cpp b/tuple/test/tuple_intersection_test.cpp
index 06ccd76..d3a26b9 100644
--- a/tuple/test/tuple_intersection_test.cpp
+++ b/tuple/test/tuple_intersection_test.cpp
@@ -59,8 +59,8 @@
 }
 
 TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
-  auto sketch = update_tuple_sketch<float>::builder().set_p(0.001).build();
-  sketch.update(1, 1);
+  auto sketch = update_tuple_sketch<float>::builder().set_p(0.001f).build();
+  sketch.update(1, 1.0f);
   tuple_intersection_float intersection;
   intersection.update(sketch);
   auto result = intersection.get_result();
@@ -82,11 +82,11 @@
 TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
   auto sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
 
   auto sketch2 = update_tuple_sketch<float>::builder().build();
   value = 500;
-  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
 
   { // unordered
     tuple_intersection_float intersection;
@@ -111,10 +111,10 @@
 TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
   auto sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
 
   auto sketch2 = update_tuple_sketch<float>::builder().build();
-  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
 
   { // unordered
     tuple_intersection_float intersection;
@@ -139,7 +139,7 @@
 TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]") {
   auto sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
 
   auto sketch2 = update_theta_sketch::builder().build();
   value = 500;
@@ -168,11 +168,11 @@
 TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
   auto sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
 
   auto sketch2 = update_tuple_sketch<float>::builder().build();
   value = 5000;
-  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
 
   { // unordered
     tuple_intersection_float intersection;
@@ -197,10 +197,10 @@
 TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
   auto sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
 
   auto sketch2 = update_tuple_sketch<float>::builder().build();
-  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
 
   { // unordered
     tuple_intersection_float intersection;
@@ -224,7 +224,7 @@
 
 TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
   auto sketch = update_tuple_sketch<float>::builder().build();
-  sketch.update(1, 1); // non-empty should not be ignored
+  sketch.update(1, 1.0f); // non-empty should not be ignored
   tuple_intersection_float intersection(123);
   REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
 }
diff --git a/tuple/test/tuple_jaccard_similarity_test.cpp b/tuple/test/tuple_jaccard_similarity_test.cpp
index 2b3efbb..0c957db 100644
--- a/tuple/test/tuple_jaccard_similarity_test.cpp
+++ b/tuple/test/tuple_jaccard_similarity_test.cpp
@@ -44,7 +44,7 @@
 
 TEST_CASE("tuple jaccard: same sketch exact mode", "[tuple_sketch]") {
   auto sk = update_tuple_sketch<float>::builder().build();
-  for (int i = 0; i < 1000; ++i) sk.update(i, 1);
+  for (int i = 0; i < 1000; ++i) sk.update(i, 1.0f);
 
   // update sketch
   auto jc = tuple_jaccard_similarity_float::jaccard(sk, sk);
@@ -61,8 +61,8 @@
   auto sk_a = update_tuple_sketch<float>::builder().build();
   auto sk_b = update_tuple_sketch<float>::builder().build();
   for (int i = 0; i < 1000; ++i) {
-    sk_a.update(i, 1);
-    sk_b.update(i, 1);
+    sk_a.update(i, 1.0f);
+    sk_b.update(i, 1.0f);
   }
 
   // update sketches
@@ -83,8 +83,8 @@
   auto sk_a = update_tuple_sketch<float>::builder().build();
   auto sk_b = update_tuple_sketch<float>::builder().build();
   for (int i = 0; i < 1000; ++i) {
-    sk_a.update(i, 1);
-    sk_b.update(i + 1000, 1);
+    sk_a.update(i, 1.0f);
+    sk_b.update(i + 1000, 1.0f);
   }
 
   // update sketches
diff --git a/tuple/test/tuple_sketch_allocation_test.cpp b/tuple/test/tuple_sketch_allocation_test.cpp
index a8e279a..4e834e8 100644
--- a/tuple/test/tuple_sketch_allocation_test.cpp
+++ b/tuple/test/tuple_sketch_allocation_test.cpp
@@ -64,7 +64,7 @@
     REQUIRE(count == update_sketch.get_num_retained());
 
     update_sketch.trim();
-    REQUIRE(update_sketch.get_num_retained() == (1 << update_sketch.get_lg_k()));
+    REQUIRE(update_sketch.get_num_retained() == (1U << update_sketch.get_lg_k()));
 
     auto compact_sketch = update_sketch.compact();
     REQUIRE(!compact_sketch.is_empty());
diff --git a/tuple/test/tuple_sketch_test.cpp b/tuple/test/tuple_sketch_test.cpp
index ec5d959..e6a87f1 100644
--- a/tuple/test/tuple_sketch_test.cpp
+++ b/tuple/test/tuple_sketch_test.cpp
@@ -40,7 +40,7 @@
 
 TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
   auto builder = update_tuple_sketch<float>::builder();
-  builder.set_lg_k(10).set_p(0.5).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
+  builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
   auto sketch = builder.build();
   REQUIRE(sketch.get_lg_k() == 10);
   REQUIRE(sketch.get_theta() == 0.5);
@@ -74,9 +74,9 @@
 
 TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
   auto update_sketch = update_tuple_sketch<float>::builder().build();
-  update_sketch.update(1, 1);
-  update_sketch.update(2, 2);
-  update_sketch.update(1, 1);
+  update_sketch.update(1, 1.0f);
+  update_sketch.update(2, 2.0f);
+  update_sketch.update(1, 1.0f);
 //  std::cout << update_sketch.to_string(true);
   REQUIRE(!update_sketch.is_empty());
   REQUIRE(!update_sketch.is_estimation_mode());
@@ -167,11 +167,11 @@
 
 TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") {
   auto update_sketch = max_float_update_tuple_sketch::builder(max_value_policy<float>(5)).build();
-  update_sketch.update(1, 1);
-  update_sketch.update(1, 2);
-  update_sketch.update(2, 10);
-  update_sketch.update(3, 3);
-  update_sketch.update(3, 7);
+  update_sketch.update(1, 1.0f);
+  update_sketch.update(1, 2.0f);
+  update_sketch.update(2, 10.0f);
+  update_sketch.update(3, 3.0f);
+  update_sketch.update(3, 7.0f);
 //  std::cout << update_sketch.to_string(true);
   int count = 0;
   float sum = 0;
@@ -212,37 +212,37 @@
 TEST_CASE("tuple sketch: float, update with different types of keys", "[tuple_sketch]") {
   auto sketch = update_tuple_sketch<float>::builder().build();
 
-  sketch.update(static_cast<uint64_t>(1), 1);
+  sketch.update(static_cast<uint64_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<int64_t>(1), 1);
+  sketch.update(static_cast<int64_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<uint32_t>(1), 1);
+  sketch.update(static_cast<uint32_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<int32_t>(1), 1);
+  sketch.update(static_cast<int32_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<uint16_t>(1), 1);
+  sketch.update(static_cast<uint16_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<int16_t>(1), 1);
+  sketch.update(static_cast<int16_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<uint8_t>(1), 1);
+  sketch.update(static_cast<uint8_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(static_cast<int8_t>(1), 1);
+  sketch.update(static_cast<int8_t>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 1);
 
-  sketch.update(1.0, 1);
+  sketch.update(1.0, 1.0f);
   REQUIRE(sketch.get_num_retained() == 2);
 
-  sketch.update(static_cast<float>(1), 1);
+  sketch.update(static_cast<float>(1), 1.0f);
   REQUIRE(sketch.get_num_retained() == 2);
 
-  sketch.update("a", 1);
+  sketch.update("a", 1.0f);
   REQUIRE(sketch.get_num_retained() == 3);
 }
 
diff --git a/tuple/test/tuple_union_test.cpp b/tuple/test/tuple_union_test.cpp
index 4088fa2..10c9a25 100644
--- a/tuple/test/tuple_union_test.cpp
+++ b/tuple/test/tuple_union_test.cpp
@@ -51,9 +51,9 @@
 }
 
 TEST_CASE("tuple_union float: non-empty no retained entries", "[tuple union]") {
-  auto update_sketch = update_tuple_sketch<float>::builder().set_p(0.001).build();
+  auto update_sketch = update_tuple_sketch<float>::builder().set_p(0.001f).build();
 //  std::cout << update_sketch.to_string();
-  update_sketch.update(1, 1);
+  update_sketch.update(1, 1.0f);
   REQUIRE(!update_sketch.is_empty());
   REQUIRE(update_sketch.get_num_retained() == 0);
   auto u = tuple_union<float>::builder().build();
@@ -69,12 +69,12 @@
 
 TEST_CASE("tuple_union float: simple case", "[tuple union]") {
   auto update_sketch1 = update_tuple_sketch<float>::builder().build();
-  update_sketch1.update(1, 1);
-  update_sketch1.update(2, 1);
+  update_sketch1.update(1, 1.0f);
+  update_sketch1.update(2, 1.0f);
 
   auto update_sketch2 = update_tuple_sketch<float>::builder().build();
-  update_sketch2.update(1, 1);
-  update_sketch2.update(3, 1);
+  update_sketch2.update(1, 1.0f);
+  update_sketch2.update(3, 1.0f);
 
   auto u = tuple_union<float>::builder().build();
   u.update(update_sketch1);
@@ -86,11 +86,11 @@
 TEST_CASE("tuple_union float: exact mode half overlap", "[tuple union]") {
   auto update_sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 1000; ++i) update_sketch1.update(value++, 1);
+  for (int i = 0; i < 1000; ++i) update_sketch1.update(value++, 1.0f);
 
   auto update_sketch2 = update_tuple_sketch<float>::builder().build();
   value = 500;
-  for (int i = 0; i < 1000; ++i) update_sketch2.update(value++, 1);
+  for (int i = 0; i < 1000; ++i) update_sketch2.update(value++, 1.0f);
 
   { // unordered
     auto u = tuple_union<float>::builder().build();
@@ -115,11 +115,11 @@
 TEST_CASE("tuple_union float: estimation mode half overlap", "[tuple union]") {
   auto update_sketch1 = update_tuple_sketch<float>::builder().build();
   int value = 0;
-  for (int i = 0; i < 10000; ++i) update_sketch1.update(value++, 1);
+  for (int i = 0; i < 10000; ++i) update_sketch1.update(value++, 1.0f);
 
   auto update_sketch2 = update_tuple_sketch<float>::builder().build();
   value = 5000;
-  for (int i = 0; i < 10000; ++i) update_sketch2.update(value++, 1);
+  for (int i = 0; i < 10000; ++i) update_sketch2.update(value++, 1.0f);
 
   { // unordered
     auto u = tuple_union<float>::builder().build();
@@ -143,7 +143,7 @@
 
 TEST_CASE("tuple_union float: seed mismatch", "[tuple union]") {
   auto update_sketch = update_tuple_sketch<float>::builder().build();
-  update_sketch.update(1, 1); // non-empty should not be ignored
+  update_sketch.update(1, 1.0f); // non-empty should not be ignored
 
   auto u = tuple_union<float>::builder().set_seed(123).build();
   REQUIRE_THROWS_AS(u.update(update_sketch), std::invalid_argument);
@@ -154,7 +154,7 @@
 
   // tuple update
   auto update_tuple = update_tuple_sketch<float>::builder().build();
-  for (unsigned i = 0; i < 10; ++i) update_tuple.update(i, 1);
+  for (unsigned i = 0; i < 10; ++i) update_tuple.update(i, 1.0f);
   u.update(update_tuple);
 
   // tuple compact