diff --git a/cpc/src/fm85.cpp b/cpc/src/fm85.cpp
index e7544e4..1055fb1 100644
--- a/cpc/src/fm85.cpp
+++ b/cpc/src/fm85.cpp
@@ -61,20 +61,22 @@
 // This is to support custom allocator and deallocator
 void fm85InitAD (void* (*alloc)(size_t), void (*dealloc)(void*)) {
   if (!fm85Initialized) {
-    fm85Initialized = 1;
     fm85alloc = alloc;
     fm85free = dealloc;
     fillByteLeadingZerosTable();
     fillByteTrailingZerosTable();
     makeTheDecodingTables();
-    fillInvPow2Tab ();
-    fillKxpByteLookup ();
+    fillInvPow2Tab();
+    fillKxpByteLookup();
+    fm85Initialized = 1;
   }
 }
 
 void fm85Clean (void) {
-  freeTheDecodingTables();
-  fm85Initialized = 0;
+  if (fm85Initialized) {
+    freeTheDecodingTables();
+    fm85Initialized = 0;
+  }
 }
 
 /*******************************************************/
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 4663af5..c7ff7a6 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -281,7 +281,7 @@
     kll_sketch(uint16_t k, uint8_t flags_byte, const void* bytes, size_t size);
 
     // common update code
-    uint32_t internal_update(const T& value);
+    inline uint32_t internal_update(const T& value);
 
     // The following code is only valid in the special case of exactly reaching capacity while updating.
     // It cannot be used while merging, while reducing k, or anything else.
@@ -416,14 +416,14 @@
 
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::update(const T& value) {
-  const uint32_t next_pos = internal_update(value);
-  new (&items_[next_pos]) T(value);
+  const uint32_t index = internal_update(value);
+  new (&items_[index]) T(value);
 }
 
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::update(T&& value) {
-  const uint32_t next_pos = internal_update(value);
-  new (&items_[next_pos]) T(std::move(value));
+  const uint32_t index = internal_update(value);
+  new (&items_[index]) T(std::move(value));
 }
 
 template<typename T, typename C, typename S, typename A>
@@ -438,9 +438,7 @@
   if (levels_[0] == 0) compress_while_updating();
   n_++;
   is_level_zero_sorted_ = false;
-  const uint32_t next_pos(levels_[0] - 1);
-  levels_[0] = next_pos;
-  return next_pos;
+  return --levels_[0];
 }
 
 template<typename T, typename C, typename S, typename A>
@@ -647,7 +645,8 @@
     }
     ptr += S().serialize(ptr, &items_[levels_[0]], get_num_retained());
   }
-  if (ptr != static_cast<char*>(data_ptr.get()) + size) throw std::logic_error("serialized size mismatch");
+  const size_t delta = ptr - static_cast<const char*>(data_ptr.get());
+  if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
   return std::make_pair(std::move(data_ptr), size);
 }
 
@@ -804,7 +803,8 @@
     new (max_value_) T(items_[levels_[0]]);
   }
   is_level_zero_sorted_ = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
-  if (ptr != static_cast<const char*>(bytes) + size) throw std::logic_error("deserialized size mismatch");
+  const size_t delta = ptr - static_cast<const char*>(bytes);
+  if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
 }
 
 // The following code is only valid in the special case of exactly reaching capacity while updating.
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index 8e12791..c14bb03 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -67,7 +67,9 @@
   CPPUNIT_TEST(merge_min_value_from_other);
   CPPUNIT_TEST(merge_min_and_max_from_other);
   CPPUNIT_TEST(sketch_of_ints);
-  CPPUNIT_TEST(sketch_of_strings);
+  CPPUNIT_TEST(sketch_of_strings_stream);
+  CPPUNIT_TEST(sketch_of_strings_bytes);
+  CPPUNIT_TEST(sketch_of_strings_single_item_bytes);
   CPPUNIT_TEST(copy);
   CPPUNIT_TEST_SUITE_END();
 
@@ -502,43 +504,83 @@
     CPPUNIT_ASSERT_EQUAL(sketch.get_rank(n), sketch2.get_rank(n));
   }
 
-  void sketch_of_strings() {
-    kll_string_sketch sketch;
-    CPPUNIT_ASSERT_THROW(sketch.get_quantile(0), std::runtime_error);
-    CPPUNIT_ASSERT_THROW(sketch.get_min_value(), std::runtime_error);
-    CPPUNIT_ASSERT_THROW(sketch.get_max_value(), std::runtime_error);
-    CPPUNIT_ASSERT_EQUAL(8u, sketch.get_serialized_size_bytes());
+  void sketch_of_strings_stream() {
+    kll_string_sketch sketch1;
+    CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+    CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+    CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+    CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
 
-    const int n(1000);
-    for (int i = 0; i < n; i++) sketch.update(std::to_string(i));
+    const int n = 1000;
+    for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
 
-    CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch.get_min_value());
-    CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch.get_max_value());
+    CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+    CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
 
     std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
-    sketch.serialize(s);
-    CPPUNIT_ASSERT_EQUAL(sketch.get_serialized_size_bytes(), (uint32_t) s.tellp());
+    sketch1.serialize(s);
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_serialized_size_bytes(), (uint32_t) s.tellp());
     auto sketch2 = kll_string_sketch::deserialize(s);
     CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) s.tellg());
     CPPUNIT_ASSERT_EQUAL(s.tellp(), s.tellg());
-    CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch2.is_empty());
-    CPPUNIT_ASSERT_EQUAL(sketch.is_estimation_mode(), sketch2.is_estimation_mode());
-    CPPUNIT_ASSERT_EQUAL(sketch.get_n(), sketch2.get_n());
-    CPPUNIT_ASSERT_EQUAL(sketch.get_num_retained(), sketch2.get_num_retained());
-    CPPUNIT_ASSERT_EQUAL(sketch.get_min_value(), sketch2.get_min_value());
-    CPPUNIT_ASSERT_EQUAL(sketch.get_max_value(), sketch2.get_max_value());
-    CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
-    CPPUNIT_ASSERT_EQUAL(sketch.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
-    CPPUNIT_ASSERT_EQUAL(sketch.get_quantile(0.5), sketch2.get_quantile(0.5));
-    CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
-    CPPUNIT_ASSERT_EQUAL(sketch.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+    CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+    CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
 
     // to take a look using hexdump
-    std::ofstream os("kll-string.bin");
-    sketch.serialize(os);
+    //std::ofstream os("kll-string.bin");
+    //sketch1.serialize(os);
 
     // debug print
-    //sketch.to_stream(std::cout);
+    //sketch1.to_stream(std::cout);
+  }
+
+  void sketch_of_strings_bytes() {
+    kll_string_sketch sketch1;
+    CPPUNIT_ASSERT_THROW(sketch1.get_quantile(0), std::runtime_error);
+    CPPUNIT_ASSERT_THROW(sketch1.get_min_value(), std::runtime_error);
+    CPPUNIT_ASSERT_THROW(sketch1.get_max_value(), std::runtime_error);
+    CPPUNIT_ASSERT_EQUAL(8u, sketch1.get_serialized_size_bytes());
+
+    const int n = 1000;
+    for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
+
+    CPPUNIT_ASSERT_EQUAL(std::string("0"), sketch1.get_min_value());
+    CPPUNIT_ASSERT_EQUAL(std::string("999"), sketch1.get_max_value());
+
+    auto data = sketch1.serialize();
+    CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+    auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+    CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
+    CPPUNIT_ASSERT_EQUAL(sketch1.is_empty(), sketch2.is_empty());
+    CPPUNIT_ASSERT_EQUAL(sketch1.is_estimation_mode(), sketch2.is_estimation_mode());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_n(), sketch2.get_n());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_num_retained(), sketch2.get_num_retained());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_min_value(), sketch2.get_min_value());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_max_value(), sketch2.get_max_value());
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(false), sketch2.get_normalized_rank_error(false));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_normalized_rank_error(true), sketch2.get_normalized_rank_error(true));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_quantile(0.5), sketch2.get_quantile(0.5));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(0)), sketch2.get_rank(std::to_string(0)));
+    CPPUNIT_ASSERT_EQUAL(sketch1.get_rank(std::to_string(n)), sketch2.get_rank(std::to_string(n)));
+  }
+
+
+  void sketch_of_strings_single_item_bytes() {
+    kll_string_sketch sketch1;
+    sketch1.update("a");
+    auto data = sketch1.serialize();
+    CPPUNIT_ASSERT_EQUAL((size_t) sketch1.get_serialized_size_bytes(), data.second);
+    auto sketch2 = kll_string_sketch::deserialize(data.first.get(), data.second);
+    CPPUNIT_ASSERT_EQUAL(sketch2.get_serialized_size_bytes(), (uint32_t) data.second);
   }
 
   void copy() {
