Merge pull request #427 from apache/tdigest

removed unused code
diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml
new file mode 100644
index 0000000..6659b79
--- /dev/null
+++ b/.github/workflows/sanitize.yml
@@ -0,0 +1,28 @@
+name: Sanitize
+
+on:
+  pull_request:
+    branches:
+      - master
+  workflow_dispatch:
+
+env:
+  BUILD_TYPE: Release
+
+jobs:
+  build:
+    name: Address Sanitizer
+    runs-on: ubuntu-latest
+            
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          persist-credentials: false
+      - name: Configure
+        run: cmake -B build -S . -DSANITIZE=address
+      - name: Build C++ unit tests
+        run: cmake --build build --config Release
+      - name: Run C++ tests
+        run: cmake --build build --config Release --target test
diff --git a/NOTICE b/NOTICE
index 2f06d9b..977cbb5 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
 Apache DataSketches C++
-Copyright 2023 The Apache Software Foundation
+Copyright 2024 The Apache Software Foundation
 
 Copyright 2015-2018 Yahoo Inc.
 Copyright 2019-2020 Verizon Media
diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index ffcf776..40b84f1 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -44,6 +44,10 @@
 template<typename A>
 inline cpc_compressor<A>& get_compressor();
 
+// function called atexit to clean up compression tables
+template<typename A>
+void destroy_compressor();
+
 template<typename A>
 class cpc_compressor {
 public:
@@ -109,8 +113,10 @@
   };
 
   cpc_compressor();
-  template<typename T> friend cpc_compressor<T>& get_compressor();
+  friend cpc_compressor& get_compressor<A>();
+  
   ~cpc_compressor();
+  friend void destroy_compressor<A>();
 
   void make_decoding_tables(); // call this at startup
   void free_decoding_tables(); // call this at the end
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index e1e75d3..062e2e0 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -22,9 +22,11 @@
 #ifndef CPC_COMPRESSOR_IMPL_HPP_
 #define CPC_COMPRESSOR_IMPL_HPP_
 
+#include <cstdlib>
 #include <memory>
 #include <stdexcept>
 
+#include "common_defs.hpp"
 #include "compression_data.hpp"
 #include "cpc_util.hpp"
 #include "cpc_common.hpp"
@@ -36,9 +38,17 @@
 template<typename A>
 cpc_compressor<A>& get_compressor() {
   static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
+  static int reg_result = std::atexit(destroy_compressor<A>); // just to clean up a little more nicely; don't worry if it fails
+  unused(reg_result);
   return *instance;
 }
 
+// register to call compressor destructor at exit
+template<typename A>
+void destroy_compressor() {
+  delete std::addressof(get_compressor<A>());
+}
+
 template<typename A>
 cpc_compressor<A>::cpc_compressor() {
   make_decoding_tables();
diff --git a/sampling/include/ebpps_sample.hpp b/sampling/include/ebpps_sample.hpp
index 97c8b56..7ddb282 100644
--- a/sampling/include/ebpps_sample.hpp
+++ b/sampling/include/ebpps_sample.hpp
@@ -37,14 +37,14 @@
   public:
     explicit ebpps_sample(uint32_t k, const A& allocator = A());
 
-    // constructor used to create a sample to merge one item
-    template<typename TT>
-    ebpps_sample(TT&& item, double theta, const A& allocator = A());
-
     // for deserialization
     class items_deleter;
     ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
 
+    // used instead of having a single-item constructor for update/merge calls
+    template<typename TT>
+    void replace_content(TT&& item, double theta);
+
     void reset();
     void downsample(double theta);
 
diff --git a/sampling/include/ebpps_sample_impl.hpp b/sampling/include/ebpps_sample_impl.hpp
index f379841..88a86ae 100644
--- a/sampling/include/ebpps_sample_impl.hpp
+++ b/sampling/include/ebpps_sample_impl.hpp
@@ -42,22 +42,6 @@
   }
 
 template<typename T, typename A>
-template<typename TT>
-ebpps_sample<T,A>::ebpps_sample(TT&& item, double theta, const A& allocator) :
-  allocator_(allocator),
-  c_(theta),
-  partial_item_(),
-  data_(allocator)
-  {
-    if (theta == 1.0) {
-      data_.reserve(1);
-      data_.emplace_back(std::forward<TT>(item));
-    } else {
-      partial_item_.emplace(std::forward<TT>(item));
-    }
-  }
-
-template<typename T, typename A>
 ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator) :
   allocator_(allocator),
   c_(c),
@@ -66,6 +50,19 @@
   {}
 
 template<typename T, typename A>
+template<typename TT>
+void ebpps_sample<T,A>::replace_content(TT&& item, double theta) {
+  c_ = theta;
+  data_.clear();
+  partial_item_.reset();
+  if (theta == 1.0) {
+    data_.emplace_back(std::forward<TT>(item));
+  } else {
+    partial_item_.emplace(std::forward<TT>(item));
+  }
+}
+
+template<typename T, typename A>
 auto ebpps_sample<T,A>::get_sample() const -> result_type {
   double unused;
   const double c_frac = std::modf(c_, &unused);
diff --git a/sampling/include/ebpps_sketch.hpp b/sampling/include/ebpps_sketch.hpp
index 51bcc4f..038b5a3 100644
--- a/sampling/include/ebpps_sketch.hpp
+++ b/sampling/include/ebpps_sketch.hpp
@@ -256,6 +256,8 @@
 
     ebpps_sample<T,A> sample_;      // Object holding the current state of the sample
 
+    ebpps_sample<T,A> tmp_;         // Temporary sample of size 1 used in updates
+
     // handles merge after ensuring other.cumulative_wt_ <= this->cumulative_wt_
     // so we can send items in individually
     template<typename O>
diff --git a/sampling/include/ebpps_sketch_impl.hpp b/sampling/include/ebpps_sketch_impl.hpp
index e4dc019..299b7f5 100644
--- a/sampling/include/ebpps_sketch_impl.hpp
+++ b/sampling/include/ebpps_sketch_impl.hpp
@@ -40,7 +40,8 @@
   cumulative_wt_(0.0),
   wt_max_(0.0),
   rho_(1.0),
-  sample_(check_k(k), allocator)
+  sample_(check_k(k), allocator),
+  tmp_(1, allocator)
   {}
 
 template<typename T, typename A>
@@ -53,7 +54,8 @@
   cumulative_wt_(cumulative_wt),
   wt_max_(wt_max),
   rho_(rho),
-  sample_(sample)
+  sample_(sample),
+  tmp_(1, allocator)
   {}
 
 template<typename T, typename A>
@@ -148,9 +150,8 @@
   if (cumulative_wt_ > 0.0)
     sample_.downsample(new_rho / rho_);
   
-  ebpps_sample<T,A> tmp(conditional_forward<FwdItem>(item), new_rho * weight, allocator_);
-
-  sample_.merge(tmp);
+  tmp_.replace_content(conditional_forward<FwdItem>(item), new_rho * weight);
+  sample_.merge(tmp_);
 
   cumulative_wt_ = new_cum_wt;
   wt_max_ = new_wt_max;
@@ -240,9 +241,8 @@
     if (cumulative_wt_ > 0.0)
       sample_.downsample(new_rho / rho_);
   
-    ebpps_sample<T,A> tmp(conditional_forward<O>(items[i]), new_rho * avg_wt, allocator_);
-
-    sample_.merge(tmp);
+    tmp_.replace_content(conditional_forward<O>(items[i]), new_rho * avg_wt);
+    sample_.merge(tmp_);
 
     cumulative_wt_ = new_cum_wt;
     rho_ = new_rho;
@@ -259,9 +259,8 @@
     if (cumulative_wt_ > 0.0)
       sample_.downsample(new_rho / rho_);
   
-    ebpps_sample<T,A> tmp(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt, allocator_);
-
-    sample_.merge(tmp);
+    tmp_.replace_content(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt);
+    sample_.merge(tmp_);
 
     cumulative_wt_ = new_cum_wt;
     rho_ = new_rho;
diff --git a/sampling/test/ebpps_sample_test.cpp b/sampling/test/ebpps_sample_test.cpp
index c83cded..e39cba7 100644
--- a/sampling/test/ebpps_sample_test.cpp
+++ b/sampling/test/ebpps_sample_test.cpp
@@ -42,14 +42,15 @@
 
 TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
   double theta = 1.0;
-  ebpps_sample<int> sample = ebpps_sample<int>(-1, theta);
+  ebpps_sample<int> sample(1);
+  sample.replace_content(-1, theta);
   REQUIRE(sample.get_c() == theta);
   REQUIRE(sample.get_num_retained_items() == 1);
   REQUIRE(sample.get_sample().size() == 1);
   REQUIRE(sample.has_partial_item() == false);
   
   theta = 1e-300;
-  sample = ebpps_sample<int>(-1, theta);
+  sample.replace_content(-1, theta);
   REQUIRE(sample.get_c() == theta);
   REQUIRE(sample.get_num_retained_items() == 1);
   REQUIRE(sample.get_sample().size() == 0); // assuming the random number is > 1e-300
@@ -57,7 +58,8 @@
 }
 
 TEST_CASE("ebpps sample: downsampling", "[ebpps_sketch]") {
-  ebpps_sample<char> sample = ebpps_sample<char>('a', 1.0);
+  ebpps_sample<char> sample(1);
+  sample.replace_content('a', 1.0);
 
   sample.downsample(2.0); // no-op
   REQUIRE(sample.get_c() == 1.0);
@@ -121,8 +123,9 @@
   uint32_t k = 8;
   ebpps_sample<int> sample = ebpps_sample<int>(k);
   
+  ebpps_sample<int> s(1);
   for (uint32_t i = 1; i <= k; ++i) {
-    ebpps_sample<int> s = ebpps_sample<int>(i, 1.0);
+    s.replace_content(i, 1.0);
     sample.merge(s);
     REQUIRE(sample.get_c() == static_cast<double>(i));
     REQUIRE(sample.get_num_retained_items() == i);