Merge pull request #177 from apache/kll_minor_cleanup
KLL minor cleanup
diff --git a/kll/include/kll_quantile_calculator_impl.hpp b/kll/include/kll_quantile_calculator_impl.hpp
index cb92674..f580819 100644
--- a/kll/include/kll_quantile_calculator_impl.hpp
+++ b/kll/include/kll_quantile_calculator_impl.hpp
@@ -81,7 +81,7 @@
void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
uint64_t subtotal = 0;
for (auto& entry: entries_) {
- const uint32_t new_subtotal = subtotal + entry.second;
+ const uint64_t new_subtotal = subtotal + entry.second;
entry.second = subtotal;
subtotal = new_subtotal;
}
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 8b3409c..a4530c9 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -315,8 +315,8 @@
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing float values
- * that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left split point (or minimum value) and
* exclusive of the right split point, with the exception that the last interval will include
* the maximum value.
@@ -338,8 +338,8 @@
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing float values
- * that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left split point (or minimum value) and
* exclusive of the right split point, with the exception that the last interval will include
* the maximum value.
@@ -401,7 +401,7 @@
* @param is input stream
* @return an instance of a sketch
*/
- static kll_sketch<T, C, S, A> deserialize(std::istream& is);
+ static kll_sketch deserialize(std::istream& is);
/**
* This method deserializes a sketch from a given array of bytes.
@@ -409,7 +409,7 @@
* @param size the size of the array
* @return an instance of a sketch
*/
- static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size);
+ static kll_sketch deserialize(const void* bytes, size_t size);
/*
* Gets the normalized rank error given k and pmf.
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index d051135..f0c5ff3 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -91,7 +91,7 @@
template<typename T, typename C, typename S, typename A>
kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
- kll_sketch<T, C, S, A> copy(other);
+ kll_sketch copy(other);
std::swap(k_, copy.k_);
std::swap(m_, copy.m_);
std::swap(min_k_, copy.min_k_);
@@ -271,6 +271,7 @@
template<typename T, typename C, typename S, typename A>
std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
std::vector<T, A> quantiles;
+ quantiles.reserve(size);
if (is_empty()) return quantiles;
std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
quantiles.reserve(size);
@@ -469,9 +470,9 @@
check_serial_version(serial_version);
check_family_id(family_id);
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
- if (is_empty) return kll_sketch<T, C, S, A>(k);
+ if (is_empty) return kll_sketch(k);
uint64_t n;
uint16_t min_k;
@@ -524,8 +525,7 @@
// copy did not throw, repackage with destrtuctor
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
}
- if (!is.good())
- throw std::runtime_error("error reading from std::istream");
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
std::move(min_value), std::move(max_value), is_level_zero_sorted);
}
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index ac71757..08e9abe 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -156,6 +156,34 @@
REQUIRE(quantiles[2] == quantiles2[2]);
}
+ SECTION("10 items") {
+ kll_float_sketch sketch;
+ sketch.update(1);
+ sketch.update(2);
+ sketch.update(3);
+ sketch.update(4);
+ sketch.update(5);
+ sketch.update(6);
+ sketch.update(7);
+ sketch.update(8);
+ sketch.update(9);
+ sketch.update(10);
+ REQUIRE(sketch.get_quantile(0) == 1.0);
+ REQUIRE(sketch.get_quantile(0.5) == 6.0);
+ REQUIRE(sketch.get_quantile(0.99) == 10.0);
+ REQUIRE(sketch.get_quantile(1) == 10.0);
+ }
+
+ SECTION("100 items") {
+ kll_float_sketch sketch;
+ for (int i = 0; i < 100; ++i) sketch.update(i);
+ REQUIRE(sketch.get_quantile(0) == 0);
+ REQUIRE(sketch.get_quantile(0.01) == 1);
+ REQUIRE(sketch.get_quantile(0.5) == 50);
+ REQUIRE(sketch.get_quantile(0.99) == 99.0);
+ REQUIRE(sketch.get_quantile(1) == 99.0);
+ }
+
SECTION("many items, estimation mode") {
kll_float_sketch sketch;
const int n(1000000);