Merge pull request #427 from apache/tdigest
removed unused code
diff --git a/.github/workflows/sanitize.yml b/.github/workflows/sanitize.yml
new file mode 100644
index 0000000..6659b79
--- /dev/null
+++ b/.github/workflows/sanitize.yml
@@ -0,0 +1,28 @@
+name: Sanitize
+
+on:
+ pull_request:
+ branches:
+ - master
+ workflow_dispatch:
+
+env:
+ BUILD_TYPE: Release
+
+jobs:
+ build:
+ name: Address Sanitizer
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: true
+ persist-credentials: false
+ - name: Configure
+ run: cmake -B build -S . -DSANITIZE=address
+ - name: Build C++ unit tests
+ run: cmake --build build --config Release
+ - name: Run C++ tests
+ run: cmake --build build --config Release --target test
diff --git a/NOTICE b/NOTICE
index 2f06d9b..977cbb5 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
Apache DataSketches C++
-Copyright 2023 The Apache Software Foundation
+Copyright 2024 The Apache Software Foundation
Copyright 2015-2018 Yahoo Inc.
Copyright 2019-2020 Verizon Media
diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index ffcf776..40b84f1 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -44,6 +44,10 @@
template<typename A>
inline cpc_compressor<A>& get_compressor();
+// function called atexit to clean up compression tables
+template<typename A>
+void destroy_compressor();
+
template<typename A>
class cpc_compressor {
public:
@@ -109,8 +113,10 @@
};
cpc_compressor();
- template<typename T> friend cpc_compressor<T>& get_compressor();
+ friend cpc_compressor& get_compressor<A>();
+
~cpc_compressor();
+ friend void destroy_compressor<A>();
void make_decoding_tables(); // call this at startup
void free_decoding_tables(); // call this at the end
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index e1e75d3..062e2e0 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -22,9 +22,11 @@
#ifndef CPC_COMPRESSOR_IMPL_HPP_
#define CPC_COMPRESSOR_IMPL_HPP_
+#include <cstdlib>
#include <memory>
#include <stdexcept>
+#include "common_defs.hpp"
#include "compression_data.hpp"
#include "cpc_util.hpp"
#include "cpc_common.hpp"
@@ -36,9 +38,17 @@
template<typename A>
cpc_compressor<A>& get_compressor() {
static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
+ static int reg_result = std::atexit(destroy_compressor<A>); // just to clean up a little more nicely; don't worry if it fails
+ unused(reg_result);
return *instance;
}
+// register to call compressor destructor at exit
+template<typename A>
+void destroy_compressor() {
+ delete std::addressof(get_compressor<A>());
+}
+
template<typename A>
cpc_compressor<A>::cpc_compressor() {
make_decoding_tables();
diff --git a/sampling/include/ebpps_sample.hpp b/sampling/include/ebpps_sample.hpp
index 97c8b56..7ddb282 100644
--- a/sampling/include/ebpps_sample.hpp
+++ b/sampling/include/ebpps_sample.hpp
@@ -37,14 +37,14 @@
public:
explicit ebpps_sample(uint32_t k, const A& allocator = A());
- // constructor used to create a sample to merge one item
- template<typename TT>
- ebpps_sample(TT&& item, double theta, const A& allocator = A());
-
// for deserialization
class items_deleter;
ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
+ // used instead of having a single-item constructor for update/merge calls
+ template<typename TT>
+ void replace_content(TT&& item, double theta);
+
void reset();
void downsample(double theta);
diff --git a/sampling/include/ebpps_sample_impl.hpp b/sampling/include/ebpps_sample_impl.hpp
index f379841..88a86ae 100644
--- a/sampling/include/ebpps_sample_impl.hpp
+++ b/sampling/include/ebpps_sample_impl.hpp
@@ -42,22 +42,6 @@
}
template<typename T, typename A>
-template<typename TT>
-ebpps_sample<T,A>::ebpps_sample(TT&& item, double theta, const A& allocator) :
- allocator_(allocator),
- c_(theta),
- partial_item_(),
- data_(allocator)
- {
- if (theta == 1.0) {
- data_.reserve(1);
- data_.emplace_back(std::forward<TT>(item));
- } else {
- partial_item_.emplace(std::forward<TT>(item));
- }
- }
-
-template<typename T, typename A>
ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator) :
allocator_(allocator),
c_(c),
@@ -66,6 +50,19 @@
{}
template<typename T, typename A>
+template<typename TT>
+void ebpps_sample<T,A>::replace_content(TT&& item, double theta) {
+ c_ = theta;
+ data_.clear();
+ partial_item_.reset();
+ if (theta == 1.0) {
+ data_.emplace_back(std::forward<TT>(item));
+ } else {
+ partial_item_.emplace(std::forward<TT>(item));
+ }
+}
+
+template<typename T, typename A>
auto ebpps_sample<T,A>::get_sample() const -> result_type {
double unused;
const double c_frac = std::modf(c_, &unused);
diff --git a/sampling/include/ebpps_sketch.hpp b/sampling/include/ebpps_sketch.hpp
index 51bcc4f..038b5a3 100644
--- a/sampling/include/ebpps_sketch.hpp
+++ b/sampling/include/ebpps_sketch.hpp
@@ -256,6 +256,8 @@
ebpps_sample<T,A> sample_; // Object holding the current state of the sample
+ ebpps_sample<T,A> tmp_; // Temporary sample of size 1 used in updates
+
// handles merge after ensuring other.cumulative_wt_ <= this->cumulative_wt_
// so we can send items in individually
template<typename O>
diff --git a/sampling/include/ebpps_sketch_impl.hpp b/sampling/include/ebpps_sketch_impl.hpp
index e4dc019..299b7f5 100644
--- a/sampling/include/ebpps_sketch_impl.hpp
+++ b/sampling/include/ebpps_sketch_impl.hpp
@@ -40,7 +40,8 @@
cumulative_wt_(0.0),
wt_max_(0.0),
rho_(1.0),
- sample_(check_k(k), allocator)
+ sample_(check_k(k), allocator),
+ tmp_(1, allocator)
{}
template<typename T, typename A>
@@ -53,7 +54,8 @@
cumulative_wt_(cumulative_wt),
wt_max_(wt_max),
rho_(rho),
- sample_(sample)
+ sample_(sample),
+ tmp_(1, allocator)
{}
template<typename T, typename A>
@@ -148,9 +150,8 @@
if (cumulative_wt_ > 0.0)
sample_.downsample(new_rho / rho_);
- ebpps_sample<T,A> tmp(conditional_forward<FwdItem>(item), new_rho * weight, allocator_);
-
- sample_.merge(tmp);
+ tmp_.replace_content(conditional_forward<FwdItem>(item), new_rho * weight);
+ sample_.merge(tmp_);
cumulative_wt_ = new_cum_wt;
wt_max_ = new_wt_max;
@@ -240,9 +241,8 @@
if (cumulative_wt_ > 0.0)
sample_.downsample(new_rho / rho_);
- ebpps_sample<T,A> tmp(conditional_forward<O>(items[i]), new_rho * avg_wt, allocator_);
-
- sample_.merge(tmp);
+ tmp_.replace_content(conditional_forward<O>(items[i]), new_rho * avg_wt);
+ sample_.merge(tmp_);
cumulative_wt_ = new_cum_wt;
rho_ = new_rho;
@@ -259,9 +259,8 @@
if (cumulative_wt_ > 0.0)
sample_.downsample(new_rho / rho_);
- ebpps_sample<T,A> tmp(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt, allocator_);
-
- sample_.merge(tmp);
+ tmp_.replace_content(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt);
+ sample_.merge(tmp_);
cumulative_wt_ = new_cum_wt;
rho_ = new_rho;
diff --git a/sampling/test/ebpps_sample_test.cpp b/sampling/test/ebpps_sample_test.cpp
index c83cded..e39cba7 100644
--- a/sampling/test/ebpps_sample_test.cpp
+++ b/sampling/test/ebpps_sample_test.cpp
@@ -42,14 +42,15 @@
TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
double theta = 1.0;
- ebpps_sample<int> sample = ebpps_sample<int>(-1, theta);
+ ebpps_sample<int> sample(1);
+ sample.replace_content(-1, theta);
REQUIRE(sample.get_c() == theta);
REQUIRE(sample.get_num_retained_items() == 1);
REQUIRE(sample.get_sample().size() == 1);
REQUIRE(sample.has_partial_item() == false);
theta = 1e-300;
- sample = ebpps_sample<int>(-1, theta);
+ sample.replace_content(-1, theta);
REQUIRE(sample.get_c() == theta);
REQUIRE(sample.get_num_retained_items() == 1);
REQUIRE(sample.get_sample().size() == 0); // assuming the random number is > 1e-300
@@ -57,7 +58,8 @@
}
TEST_CASE("ebpps sample: downsampling", "[ebpps_sketch]") {
- ebpps_sample<char> sample = ebpps_sample<char>('a', 1.0);
+ ebpps_sample<char> sample(1);
+ sample.replace_content('a', 1.0);
sample.downsample(2.0); // no-op
REQUIRE(sample.get_c() == 1.0);
@@ -121,8 +123,9 @@
uint32_t k = 8;
ebpps_sample<int> sample = ebpps_sample<int>(k);
+ ebpps_sample<int> s(1);
for (uint32_t i = 1; i <= k; ++i) {
- ebpps_sample<int> s = ebpps_sample<int>(i, 1.0);
+ s.replace_content(i, 1.0);
sample.merge(s);
REQUIRE(sample.get_c() == static_cast<double>(i));
REQUIRE(sample.get_num_retained_items() == i);