theta/include/theta_sketch_impl.hpp - datasketches-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #ifndef THETA_SKETCH_IMPL_HPP_
 #define THETA_SKETCH_IMPL_HPP_

 #include <sstream>
 #include <vector>
 #include <stdexcept>

 #include "serde.hpp"
 #include "binomial_bounds.hpp"
 #include "theta_helpers.hpp"
 #include "count_zeros.hpp"
 #include "bit_packing.hpp"

 namespace datasketches {

 template<typename A>
 bool base_theta_sketch_alloc<A>::is_estimation_mode() const {
   return get_theta64() < theta_constants::MAX_THETA && !is_empty();
 }

 template<typename A>
 double base_theta_sketch_alloc<A>::get_theta() const {
   return static_cast<double>(get_theta64()) /
          static_cast<double>(theta_constants::MAX_THETA);
 }

 template<typename A>
 double base_theta_sketch_alloc<A>::get_estimate() const {
   return get_num_retained() / get_theta();
 }

 template<typename A>
 double base_theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
   if (!is_estimation_mode()) return get_num_retained();
   return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
 }

 template<typename A>
 double base_theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
   if (!is_estimation_mode()) return get_num_retained();
   return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
 }

 template<typename A>
 string<A> base_theta_sketch_alloc<A>::to_string(bool print_details) const {
   // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
   // The stream does not support passing an allocator instance, and alternatives are complicated.
   std::ostringstream os;
   os << "### Theta sketch summary:" << std::endl;
   os << "   num retained entries : " << this->get_num_retained() << std::endl;
   os << "   seed hash            : " << this->get_seed_hash() << std::endl;
   os << "   empty?               : " << (this->is_empty() ? "true" : "false") << std::endl;
   os << "   ordered?             : " << (this->is_ordered() ? "true" : "false") << std::endl;
   os << "   estimation mode?     : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
   os << "   theta (fraction)     : " << this->get_theta() << std::endl;
   os << "   theta (raw 64-bit)   : " << this->get_theta64() << std::endl;
   os << "   estimate             : " << this->get_estimate() << std::endl;
   os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
   os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
   print_specifics(os);
   os << "### End sketch summary" << std::endl;
   if (print_details) {
       print_items(os);
   }
   return string<A>(os.str().c_str(), this->get_allocator());
 }

 template<typename A>
 void theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
     os << "### Retained entries" << std::endl;
     for (const auto& hash: *this) {
       os << hash << std::endl;
     }
     os << "### End retained entries" << std::endl;
 }


 // update sketch

 template<typename A, template<typename, typename, typename> class T>
 update_theta_sketch_alloc<A,T>::update_theta_sketch_alloc(theta_table&& table):
 table_{table}
 {}

 template<typename A, template<typename, typename, typename> class T>
 A update_theta_sketch_alloc<A,T>::get_allocator() const {
   return table_.allocator_;
 }

 template<typename A, template<typename, typename, typename> class T>
 bool update_theta_sketch_alloc<A,T>::is_empty() const {
   return table_.is_empty_;
 }

 template<typename A, template<typename, typename, typename> class T>
 bool update_theta_sketch_alloc<A,T>::is_ordered() const {
   return table_.num_entries_ > 1 ? false : true;
 }

 template<typename A, template<typename, typename, typename> class T>
 uint64_t update_theta_sketch_alloc<A,T>::get_theta64() const {
   return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
 }

 template<typename A, template<typename, typename, typename> class T>
 uint32_t update_theta_sketch_alloc<A,T>::get_num_retained() const {
   return table_.num_entries_;
 }

 template<typename A, template<typename, typename, typename> class T>
 uint16_t update_theta_sketch_alloc<A,T>::get_seed_hash() const {
   return compute_seed_hash(table_.seed_);
 }

 template<typename A, template<typename, typename, typename> class T>
 uint8_t update_theta_sketch_alloc<A,T>::get_lg_k() const {
   return table_.lg_nom_size_;
 }

 template<typename A, template<typename, typename, typename> class T>
 auto update_theta_sketch_alloc<A,T>::get_rf() const -> resize_factor {
   return table_.rf_;
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(uint64_t value) {
   update(&value, sizeof(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(int64_t value) {
   update(&value, sizeof(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(uint32_t value) {
   update(static_cast<int32_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(int32_t value) {
   update(static_cast<int64_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(uint16_t value) {
   update(static_cast<int16_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(int16_t value) {
   update(static_cast<int64_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(uint8_t value) {
   update(static_cast<int8_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(int8_t value) {
   update(static_cast<int64_t>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(double value) {
   update(canonical_double(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(float value) {
   update(static_cast<double>(value));
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(const std::string& value) {
   if (value.empty()) return;
   update(value.c_str(), value.length());
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::update(const void* data, size_t length) {
   const uint64_t hash = table_.hash_and_screen(data, length);
   if (hash == 0) return;
   auto result = table_.find(hash);
   if (!result.second) {
     table_.insert(result.first, hash);
   }
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::trim() {
   table_.trim();
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::reset() {
   table_.reset();
 }

 template<typename A, template<typename, typename, typename> class T>
 auto update_theta_sketch_alloc<A,T>::begin() -> iterator {
   return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }

 template<typename A, template<typename, typename, typename> class T>
 auto update_theta_sketch_alloc<A,T>::end() -> iterator {
   return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }

 template<typename A, template<typename, typename, typename> class T>
 auto update_theta_sketch_alloc<A,T>::begin() const -> const_iterator {
   return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }

 template<typename A, template<typename, typename, typename> class T>
 auto update_theta_sketch_alloc<A,T>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }

 template<typename A, template<typename, typename, typename> class T>
 compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A,T>::compact(bool ordered) const {
   return compact_theta_sketch_alloc<A>(*this, ordered);
 }

 template<typename A, template<typename, typename, typename> class T>
 void update_theta_sketch_alloc<A,T>::print_specifics(std::ostringstream& os) const {
   os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
   os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
   os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
 }

 // builder

 template<typename A, template<typename, typename, typename> class T>
 update_theta_sketch_alloc<A,T>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}

 template<typename A, template<typename, typename, typename> class T>
 update_theta_sketch_alloc<A,T> update_theta_sketch_alloc<A,T>::builder::build() const {
   return update_theta_sketch_alloc(ThetaTable(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_));
 }

 // compact sketch

 template<typename A>
 template<typename Other>
 compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Other& other, bool ordered):
 is_empty_(other.is_empty()),
 is_ordered_(other.is_ordered() || ordered),
 seed_hash_(other.get_seed_hash()),
 theta_(other.get_theta64()),
 entries_(other.get_allocator())
 {
   if (!other.is_empty()) {
     entries_.reserve(other.get_num_retained());
     std::copy(other.begin(), other.end(), std::back_inserter(entries_));
     if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
   }
 }

 template<typename A>
 compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
     std::vector<uint64_t, A>&& entries):
 is_empty_(is_empty),
 is_ordered_(is_ordered || (entries.size() <= 1ULL)),
 seed_hash_(seed_hash),
 theta_(theta),
 entries_(std::move(entries))
 {}

 template<typename A>
 A compact_theta_sketch_alloc<A>::get_allocator() const {
   return entries_.get_allocator();
 }

 template<typename A>
 bool compact_theta_sketch_alloc<A>::is_empty() const {
   return is_empty_;
 }

 template<typename A>
 bool compact_theta_sketch_alloc<A>::is_ordered() const {
   return is_ordered_;
 }

 template<typename A>
 uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
   return theta_;
 }

 template<typename A>
 uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
   return static_cast<uint32_t>(entries_.size());
 }

 template<typename A>
 uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
   return seed_hash_;
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::begin() -> iterator {
   return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::end() -> iterator {
   return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
   return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
 }

 template<typename A>
 void compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}

 template<typename A>
 void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
   const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
   write(os, preamble_longs);
   write(os, UNCOMPRESSED_SERIAL_VERSION);
   write(os, SKETCH_TYPE);
   write<uint16_t>(os, 0); // unused
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
     (1 << flags::IS_READ_ONLY) |
     (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
     (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   write(os, flags_byte);
   write(os, get_seed_hash());
   if (preamble_longs > 1) {
     write(os, static_cast<uint32_t>(entries_.size()));
     write<uint32_t>(os, 0); // unused
   }
   if (this->is_estimation_mode()) write(os, this->theta_);
   if (entries_.size() > 0) write(os, entries_.data(), entries_.size() * sizeof(uint64_t));
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
   const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
   const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
       + sizeof(uint64_t) * entries_.size();
   vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;

   *ptr++ = preamble_longs;
   *ptr++ = UNCOMPRESSED_SERIAL_VERSION;
   *ptr++ = SKETCH_TYPE;
   ptr += sizeof(uint16_t); // unused
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
     (1 << flags::IS_READ_ONLY) |
     (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
     (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   *ptr++ = flags_byte;
   ptr += copy_to_mem(get_seed_hash(), ptr);
   if (preamble_longs > 1) {
     ptr += copy_to_mem(static_cast<uint32_t>(entries_.size()), ptr);
     ptr += sizeof(uint32_t); // unused
   }
   if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
   if (entries_.size() > 0) ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
   return bytes;
 }

 template<typename A>
 bool compact_theta_sketch_alloc<A>::is_suitable_for_compression() const {
   if (!this->is_ordered() || entries_.size() == 0 ||
         (entries_.size() == 1 && !this->is_estimation_mode())) return false;
   return true;
 }

 template<typename A>
 void compact_theta_sketch_alloc<A>::serialize_compressed(std::ostream& os) const {
   if (is_suitable_for_compression()) return serialize_version_4(os);
   return serialize(os);
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::serialize_compressed(unsigned header_size_bytes) const -> vector_bytes {
   if (is_suitable_for_compression()) return serialize_version_4(header_size_bytes);
   return serialize(header_size_bytes);
 }

 template<typename A>
 uint8_t compact_theta_sketch_alloc<A>::compute_min_leading_zeros() const {
   // compression is based on leading zeros in deltas between ordered hash values
   // assumes ordered sketch
   uint64_t previous = 0;
   uint64_t ored = 0;
   for (const uint64_t entry: entries_) {
     const uint64_t delta = entry - previous;
     ored |= delta;
     previous = entry;
   }
   return count_leading_zeros_in_u64(ored);
 }

 template<typename A>
 void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const {
   const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
   const uint8_t entry_bits = 64 - compute_min_leading_zeros();

   // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
   const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));

   write(os, preamble_longs);
   write(os, COMPRESSED_SERIAL_VERSION);
   write(os, SKETCH_TYPE);
   write(os, entry_bits);
   write(os, num_entries_bytes);
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
     (1 << flags::IS_READ_ONLY) |
     (1 << flags::IS_ORDERED)
   );
   write(os, flags_byte);
   write(os, get_seed_hash());
   if (this->is_estimation_mode()) write(os, this->theta_);
   uint32_t num_entries = static_cast<uint32_t>(entries_.size());
   for (unsigned i = 0; i < num_entries_bytes; ++i) {
     write<uint8_t>(os, num_entries & 0xff);
     num_entries >>= 8;
   }

   uint64_t previous = 0;
   uint64_t deltas[8];
   vector_bytes buffer(entry_bits, 0, entries_.get_allocator()); // block of 8 entries takes entry_bits bytes

   // pack blocks of 8 deltas
   unsigned i;
   for (i = 0; i + 7 < entries_.size(); i += 8) {
     for (unsigned j = 0; j < 8; ++j) {
       deltas[j] = entries_[i + j] - previous;
       previous = entries_[i + j];
     }
     pack_bits_block8(deltas, buffer.data(), entry_bits);
     write(os, buffer.data(), buffer.size());
   }

   // pack extra deltas if fewer than 8 of them left
   if (i < entries_.size()) {
     uint8_t offset = 0;
     uint8_t* ptr = buffer.data();
     for (; i < entries_.size(); ++i) {
       const uint64_t delta = entries_[i] - previous;
       previous = entries_[i];
       offset = pack_bits(delta, entry_bits, ptr, offset);
     }
     write(os, buffer.data(), ptr - buffer.data());
   }
 }

 template<typename A>
 auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_bytes) const -> vector_bytes {
   const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
   const uint8_t entry_bits = 64 - compute_min_leading_zeros();
   const size_t compressed_bits = entry_bits * entries_.size();

   // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
   const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));

   const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + num_entries_bytes
       + whole_bytes_to_hold_bits(compressed_bits);
   vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;

   *ptr++ = preamble_longs;
   *ptr++ = COMPRESSED_SERIAL_VERSION;
   *ptr++ = SKETCH_TYPE;
   *ptr++ = entry_bits;
   *ptr++ = num_entries_bytes;
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
     (1 << flags::IS_READ_ONLY) |
     (1 << flags::IS_ORDERED)
   );
   *ptr++ = flags_byte;
   ptr += copy_to_mem(get_seed_hash(), ptr);
   if (this->is_estimation_mode()) {
     ptr += copy_to_mem(theta_, ptr);
   }
   uint32_t num_entries = static_cast<uint32_t>(entries_.size());
   for (unsigned i = 0; i < num_entries_bytes; ++i) {
     *ptr++ = num_entries & 0xff;
     num_entries >>= 8;
   }

   uint64_t previous = 0;
   uint64_t deltas[8];

   // pack blocks of 8 deltas
   unsigned i;
   for (i = 0; i + 7 < entries_.size(); i += 8) {
     for (unsigned j = 0; j < 8; ++j) {
       deltas[j] = entries_[i + j] - previous;
       previous = entries_[i + j];
     }
     pack_bits_block8(deltas, ptr, entry_bits);
     ptr += entry_bits;
   }

   // pack extra deltas if fewer than 8 of them left
   uint8_t offset = 0;
   for (; i < entries_.size(); ++i) {
     const uint64_t delta = entries_[i] - previous;
     previous = entries_[i];
     offset = pack_bits(delta, entry_bits, ptr, offset);
   }
   return bytes;
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   const auto preamble_longs = read<uint8_t>(is);
   const auto serial_version = read<uint8_t>(is);
   const auto type = read<uint8_t>(is);
   checker<true>::check_sketch_type(type, SKETCH_TYPE);
   switch (serial_version) {
     case 4:
       return deserialize_v4(preamble_longs, is, seed, allocator);
     case 3:
       return deserialize_v3(preamble_longs, is, seed, allocator);
     case 1:
       return deserialize_v1(preamble_longs, is, seed, allocator);
     case 2:
       return deserialize_v2(preamble_longs, is, seed, allocator);
     default:
       throw std::invalid_argument("unexpected sketch serialization version " + std::to_string(serial_version));
   }
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v1(
     uint8_t, std::istream& is, uint64_t seed, const A& allocator)
 {
   const auto seed_hash = compute_seed_hash(seed);
   read<uint8_t>(is); // unused
   read<uint32_t>(is); // unused
   const auto num_entries = read<uint32_t>(is);
   read<uint32_t>(is); //unused
   const auto theta = read<uint64_t>(is);
   std::vector<uint64_t, A> entries(num_entries, 0, allocator);
   bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
   if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
   return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v2(
     uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
 {
   read<uint8_t>(is); // unused
   read<uint16_t>(is); // unused
   const uint16_t seed_hash = read<uint16_t>(is);
   checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
   if (preamble_longs == 1) {
     if (!is.good()) throw std::runtime_error("error reading from std::istream");
     std::vector<uint64_t, A> entries(0, 0, allocator);
     return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
   } else if (preamble_longs == 2) {
     const uint32_t num_entries = read<uint32_t>(is);
     read<uint32_t>(is); // unused
     std::vector<uint64_t, A> entries(num_entries, 0, allocator);
     if (num_entries == 0) {
       return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
     }
     read(is, entries.data(), entries.size() * sizeof(uint64_t));
     if (!is.good()) throw std::runtime_error("error reading from std::istream");
     return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
   } else if (preamble_longs == 3) {
     const uint32_t num_entries = read<uint32_t>(is);
     read<uint32_t>(is); // unused
     const auto theta = read<uint64_t>(is);
     bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
     std::vector<uint64_t, A> entries(num_entries, 0, allocator);
     if (is_empty) {
       if (!is.good()) throw std::runtime_error("error reading from std::istream");
       return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
     } else {
       read(is, entries.data(), sizeof(uint64_t) * entries.size());
       if (!is.good()) throw std::runtime_error("error reading from std::istream");
       return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
     }
   } else {
     throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
   }
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v3(
     uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
 {
   read<uint16_t>(is); // unused
   const auto flags_byte = read<uint8_t>(is);
   const auto seed_hash = read<uint16_t>(is);
   const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
   if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
   uint64_t theta = theta_constants::MAX_THETA;
   uint32_t num_entries = 0;
   if (!is_empty) {
     if (preamble_longs == 1) {
       num_entries = 1;
     } else {
       num_entries = read<uint32_t>(is);
       read<uint32_t>(is); // unused
       if (preamble_longs > 2) theta = read<uint64_t>(is);
     }
   }
   std::vector<uint64_t, A> entries(num_entries, 0, allocator);
   if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
   const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
   return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v4(
     uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
 {
   const auto entry_bits = read<uint8_t>(is);
   const auto num_entries_bytes = read<uint8_t>(is);
   const auto flags_byte = read<uint8_t>(is);
   const auto seed_hash = read<uint16_t>(is);
   const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
   if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
   uint64_t theta = theta_constants::MAX_THETA;
   if (preamble_longs > 1) theta = read<uint64_t>(is);
   uint32_t num_entries = 0;
   for (unsigned i = 0; i < num_entries_bytes; ++i) {
     num_entries |= read<uint8_t>(is) << (i << 3);
   }
   vector_bytes buffer(entry_bits, 0, allocator); // block of 8 entries takes entry_bits bytes
   std::vector<uint64_t, A> entries(num_entries, 0, allocator);

   // unpack blocks of 8 deltas
   unsigned i;
   for (i = 0; i + 7 < num_entries; i += 8) {
     read(is, buffer.data(), buffer.size());
     unpack_bits_block8(&entries[i], buffer.data(), entry_bits);
   }
   // unpack extra deltas if fewer than 8 of them left
   if (i < num_entries) read(is, buffer.data(), whole_bytes_to_hold_bits((num_entries - i) * entry_bits));
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
   const uint8_t* ptr = buffer.data();
   uint8_t offset = 0;
   for (; i < num_entries; ++i) {
     offset = unpack_bits(entries[i], entry_bits, ptr, offset);
   }
   // undo deltas
   uint64_t previous = 0;
   for (i = 0; i < num_entries; ++i) {
     entries[i] += previous;
     previous = entries[i];
   }
   const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
   return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }

 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, false);
   if (data.entry_bits == 64) { // versions 1 to 3
     const uint64_t* entries = reinterpret_cast<const uint64_t*>(data.entries_start_ptr);
     return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta,
         std::vector<uint64_t, A>(entries, entries + data.num_entries, allocator));
   } else { // version 4
     std::vector<uint64_t, A> entries(data.num_entries, 0, allocator);
     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data.entries_start_ptr);
     // unpack blocks of 8 deltas
     unsigned i;
     for (i = 0; i + 7 < data.num_entries; i += 8) {
       unpack_bits_block8(&entries[i], ptr, data.entry_bits);
       ptr += data.entry_bits;
     }
     // unpack extra deltas if fewer than 8 of them left
     uint8_t offset = 0;
     for (; i < data.num_entries; ++i) {
       offset = unpack_bits(entries[i], data.entry_bits, ptr, offset);
     }
     // undo deltas
     uint64_t previous = 0;
     for (i = 0; i < data.num_entries; ++i) {
       entries[i] += previous;
       previous = entries[i];
     }
     return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::move(entries));
   }
 }

 // wrapped compact sketch

 template<typename A>
 wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(const data_type& data):
 data_(data)
 {}

 template<typename A>
 const wrapped_compact_theta_sketch_alloc<A> wrapped_compact_theta_sketch_alloc<A>::wrap(const void* bytes, size_t size, uint64_t seed, bool dump_on_error) {
   return wrapped_compact_theta_sketch_alloc(compact_theta_sketch_parser<true>::parse(bytes, size, seed, dump_on_error));
 }

 template<typename A>
 A wrapped_compact_theta_sketch_alloc<A>::get_allocator() const {
   return A();
 }

 template<typename A>
 bool wrapped_compact_theta_sketch_alloc<A>::is_empty() const {
   return data_.is_empty;
 }

 template<typename A>
 bool wrapped_compact_theta_sketch_alloc<A>::is_ordered() const {
   return data_.is_ordered;
 }

 template<typename A>
 uint64_t wrapped_compact_theta_sketch_alloc<A>::get_theta64() const {
   return data_.theta;
 }

 template<typename A>
 uint32_t wrapped_compact_theta_sketch_alloc<A>::get_num_retained() const {
   return data_.num_entries;
 }

 template<typename A>
 uint16_t wrapped_compact_theta_sketch_alloc<A>::get_seed_hash() const {
   return data_.seed_hash;
 }

 template<typename A>
 auto wrapped_compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
   return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, 0);
 }

 template<typename A>
 auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
   return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, data_.num_entries);
 }

 template<typename A>
 void wrapped_compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}

 template<typename A>
 void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
     os << "### Retained entries" << std::endl;
     for (const auto hash: *this) {
       os << hash << std::endl;
     }
     os << "### End retained entries" << std::endl;
 }

 // assumes index == 0 or index == num_entries
 template<typename Allocator>
 wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::const_iterator(
     const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index):
 ptr_(ptr),
 entry_bits_(entry_bits),
 num_entries_(num_entries),
 index_(index),
 previous_(0),
 is_block_mode_(num_entries_ >= 8),
 buf_i_(0),
 offset_(0)
 {
   if (entry_bits == 64) { // no compression
     ptr_ = reinterpret_cast<const uint64_t*>(ptr) + index;
   } else if (index < num_entries) {
     if (is_block_mode_) {
       unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
       ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
       for (int i = 0; i < 8; ++i) {
         buffer_[i] += previous_;
         previous_ = buffer_[i];
       }
     } else {
       offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
       buffer_[0] += previous_;
       previous_ = buffer_[0];
     }
   }
 }

 template<typename Allocator>
 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++() -> const_iterator& {
   if (entry_bits_ == 64) { // no compression
     ptr_ = reinterpret_cast<const uint64_t*>(ptr_) + 1;
     return *this;
   }
   ++index_;
   if (index_ < num_entries_) {
     if (is_block_mode_) {
       ++buf_i_;
       if (buf_i_ == 8) {
         buf_i_ = 0;
         if (index_ + 8 < num_entries_) {
           unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
           ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
           for (int i = 0; i < 8; ++i) {
             buffer_[i] += previous_;
             previous_ = buffer_[i];
           }
         } else {
           is_block_mode_ = false;
           offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
           buffer_[0] += previous_;
           previous_ = buffer_[0];
         }
       }
     } else {
       offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
       buffer_[0] += previous_;
       previous_ = buffer_[0];
     }
   }
   return *this;
 }

 template<typename Allocator>
 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++(int) -> const_iterator {
   const_iterator tmp(*this);
   operator++();
   return tmp;
 }

 template<typename Allocator>
 bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator!=(const const_iterator& other) const {
   if (entry_bits_ == 64) return ptr_ != other.ptr_;
   return index_ != other.index_;
 }

 template<typename Allocator>
 bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(const const_iterator& other) const {
   if (entry_bits_ == 64) return ptr_ == other.ptr_;
   return index_ == other.index_;
 }

 template<typename Allocator>
 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
   if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
   return buffer_[buf_i_];
 }

 template<typename Allocator>
 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
   if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
   return buffer_ + buf_i_;
 }

 } /* namespace datasketches */

 #endif