quantile calculator
diff --git a/req/CMakeLists.txt b/req/CMakeLists.txt
index 95bfb0a..36e7d90 100755
--- a/req/CMakeLists.txt
+++ b/req/CMakeLists.txt
@@ -38,6 +38,8 @@
list(APPEND req_HEADERS "include/req_sketch_impl.hpp")
list(APPEND req_HEADERS "include/req_compactor.hpp")
list(APPEND req_HEADERS "include/req_compactor_impl.hpp")
+list(APPEND req_HEADERS "include/req_quantile_calculator.hpp")
+list(APPEND req_HEADERS "include/req_quantile_calculator_impl.hpp")
install(TARGETS req
EXPORT ${PROJECT_NAME}
@@ -53,4 +55,6 @@
${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator_impl.hpp
)
diff --git a/req/include/req_compactor.hpp b/req/include/req_compactor.hpp
index 8da2c44..939dd5e 100755
--- a/req/include/req_compactor.hpp
+++ b/req/include/req_compactor.hpp
@@ -35,6 +35,8 @@
bool is_sorted() const;
uint32_t get_num_items() const;
uint32_t get_nom_capacity() const;
+ uint8_t get_lg_weight() const;
+ const std::vector<T, Allocator>& get_items() const;
template<bool inclusive>
uint64_t compute_weight(const T& item) const;
diff --git a/req/include/req_compactor_impl.hpp b/req/include/req_compactor_impl.hpp
index 170edf0..9e62970 100755
--- a/req/include/req_compactor_impl.hpp
+++ b/req/include/req_compactor_impl.hpp
@@ -57,6 +57,16 @@
}
template<typename T, bool H, typename C, typename A>
+uint8_t req_compactor<T, H, C, A>::get_lg_weight() const {
+ return lg_weight_;
+}
+
+template<typename T, bool H, typename C, typename A>
+const std::vector<T, A>& req_compactor<T, H, C, A>::get_items() const {
+ return items_;
+}
+
+template<typename T, bool H, typename C, typename A>
template<bool inclusive>
uint64_t req_compactor<T, H, C, A>::compute_weight(const T& item) const {
if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
@@ -85,7 +95,7 @@
if (items_.capacity() < items_.size() + items.size()) items_.reserve(items_.size() + items.size());
auto middle = items_.end();
std::move(items.begin(), items.end(), std::back_inserter(items_));
- std::inplace_merge(items_.begin(), middle, items_.end());
+ std::inplace_merge(items_.begin(), middle, items_.end(), C());
}
template<typename T, bool H, typename C, typename A>
diff --git a/req/include/req_quantile_calculator.hpp b/req/include/req_quantile_calculator.hpp
new file mode 100755
index 0000000..ccf901c
--- /dev/null
+++ b/req/include/req_quantile_calculator.hpp
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_QUANTILE_CALCULATOR_HPP_
+#define REQ_QUANTILE_CALCULATOR_HPP_
+
+#include <functional>
+
+namespace datasketches {
+
+template<
+ typename T,
+ typename Allocator
+>
+class req_quantile_calculator {
+public:
+ req_quantile_calculator(uint64_t n, const Allocator& allocator);
+
+ void add(const std::vector<T, Allocator>& items, uint8_t lg_weight);
+
+ template<bool inclusive>
+ void convert_to_cummulative();
+
+ const T& get_quantile(double rank) const;
+
+private:
+ using Entry = std::pair<const T*, uint64_t>;
+ using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
+ using Container = std::vector<Entry, AllocEntry>;
+
+ struct compare_pairs_by_second {
+ bool operator()(const Entry& a, const Entry& b) {
+ return a.second < b.second;
+ }
+ };
+
+ uint64_t n_;
+ Container entries_;
+};
+
+} /* namespace datasketches */
+
+#include "req_quantile_calculator_impl.hpp"
+
+#endif
diff --git a/req/include/req_quantile_calculator_impl.hpp b/req/include/req_quantile_calculator_impl.hpp
new file mode 100755
index 0000000..bc143bf
--- /dev/null
+++ b/req/include/req_quantile_calculator_impl.hpp
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
+#define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
+
+namespace datasketches {
+
+template<typename T, typename A>
+req_quantile_calculator<T, A>::req_quantile_calculator(uint64_t n, const A& allocator):
+n_(n),
+entries_(allocator)
+{}
+
+template<typename T, typename A>
+void req_quantile_calculator<T, A>::add(const std::vector<T, A>& items, uint8_t lg_weight) {
+ if (entries_.capacity() < entries_.size() + items.size()) entries_.reserve(entries_.size() + items.size());
+ auto middle = entries_.end();
+ for (const auto& item: items) entries_.push_back(Entry(&item, 1 << lg_weight));
+ std::inplace_merge(entries_.begin(), middle, entries_.end(), compare_pairs_by_second());
+}
+
+template<typename T, typename A>
+template<bool inclusive>
+void req_quantile_calculator<T, A>::convert_to_cummulative() {
+ uint64_t subtotal = 0;
+ for (auto& entry: entries_) {
+ const uint64_t new_subtotal = subtotal + entry.second;
+ entry.second = inclusive ? new_subtotal : subtotal;
+ subtotal = new_subtotal;
+ }
+}
+
+template<typename T, typename A>
+const T& req_quantile_calculator<T, A>::get_quantile(double rank) const {
+ uint64_t weight = rank * n_;
+ auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
+ if (it == entries_.end()) return *(entries_[entries_.size() - 1].first);
+ return *(it->first);
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index d99f687..30841c3 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -22,6 +22,7 @@
#include "req_common.hpp"
#include "req_compactor.hpp"
+#include "req_quantile_calculator.hpp"
namespace datasketches {
@@ -78,6 +79,9 @@
template<bool inclusive = false>
double get_rank(const T& item) const;
+ template<bool inclusive = false>
+ const T& get_quantile(double rank) const;
+
/**
* Prints a summary of the sketch.
* @param print_levels if true include information about levels
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 6137e99..f8f592a 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -88,6 +88,25 @@
}
template<typename T, bool H, typename C, typename S, typename A>
+template<bool inclusive>
+const T& req_sketch<T, H, C, S, A>::get_quantile(double rank) const {
+ if (is_empty()) throw new std::invalid_argument("sketch is empty");
+ if ((rank < 0.0) || (rank > 1.0)) {
+ throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
+ }
+ // TODO: min and max
+ if (!compactors_[0].is_sorted()) {
+ const_cast<req_compactor<T, H, C, A>&>(compactors_[0]).sort(); // allow this side effect
+ }
+ req_quantile_calculator<T, A> quantile_calculator(n_, allocator_);
+ for (auto& compactor: compactors_) {
+ quantile_calculator.add(compactor.get_items(), compactor.get_lg_weight());
+ }
+ quantile_calculator.template convert_to_cummulative<inclusive>();
+ return quantile_calculator.get_quantile(rank);
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
void req_sketch<T, H, C, S, A>::grow() {
const uint8_t lg_weight = get_num_levels();
compactors_.push_back(Compactor(lg_weight, k_, allocator_));
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 5451fcc..d55907f 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -52,6 +52,9 @@
REQUIRE(sketch.get_rank<true>(1) == 1);
REQUIRE(sketch.get_rank(1.1) == 1);
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
+ REQUIRE(sketch.get_quantile(0) == 1);
+ REQUIRE(sketch.get_quantile(0.5) == 1);
+ REQUIRE(sketch.get_quantile(1) == 1);
}
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
@@ -79,8 +82,34 @@
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 100);
REQUIRE(sketch.get_num_retained() == 100);
+
+ // like KLL
+ REQUIRE(sketch.get_rank(0) == 0);
+ REQUIRE(sketch.get_rank(1) == 0.01);
REQUIRE(sketch.get_rank(50) == 0.5);
+ REQUIRE(sketch.get_rank(98) == 0.98);
+ REQUIRE(sketch.get_rank(99) == 0.99);
+
+ // inclusive
+ REQUIRE(sketch.get_rank<true>(0) == 0.01);
+ REQUIRE(sketch.get_rank<true>(1) == 0.02);
REQUIRE(sketch.get_rank<true>(49) == 0.5);
+ REQUIRE(sketch.get_rank<true>(98) == 0.99);
+ REQUIRE(sketch.get_rank<true>(99) == 1);
+
+ // like KLL
+ REQUIRE(sketch.get_quantile(0) == 0);
+ REQUIRE(sketch.get_quantile(0.01) == 1);
+ REQUIRE(sketch.get_quantile(0.5) == 50);
+ REQUIRE(sketch.get_quantile(.99) == 99);
+ REQUIRE(sketch.get_quantile(1) == 99);
+
+ // inclusive
+ REQUIRE(sketch.get_quantile<true>(0) == 0);
+ REQUIRE(sketch.get_quantile<true>(0.01) == 0);
+ REQUIRE(sketch.get_quantile<true>(0.5) == 49);
+ REQUIRE(sketch.get_quantile<true>(0.99) == 98);
+ REQUIRE(sketch.get_quantile<true>(1) == 99);
}
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {