blob: aac736c7742f612bafcccc03116c0c97d492c9e3 [file]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef __PROCESS_STATISTICS_HPP__
#define __PROCESS_STATISTICS_HPP__
#include <glog/logging.h>
#include <algorithm>
#include <iterator>
#include <type_traits>
#include <vector>
#include <process/timeseries.hpp>
#include <stout/foreach.hpp>
#include <stout/option.hpp>
namespace process {
// Represents statistics for a `TimeSeries` of data or a standard container.
template <typename T>
struct Statistics
{
// Returns `Statistics` for the given `TimeSeries`, or `None` if the
// `TimeSeries` has less then 2 datapoints.
//
// TODO(dhamon): Consider adding a histogram abstraction for better
// performance.
//
// Remove this specification once we can construct directly from
// `TimeSeries<T>::Value`, e.g., by using an iterator adaptor, see
// https://www.boost.org/doc/libs/1_51_0/libs/range/doc/html/range/reference/adaptors/reference/map_values.html // NOLINT(whitespace/line_length)
static Option<Statistics<T>> from(const TimeSeries<T>& timeseries)
{
std::vector<typename TimeSeries<T>::Value> values_ = timeseries.get();
std::vector<T> values;
values.reserve(values_.size());
foreach (const typename TimeSeries<T>::Value& value, values_) {
values.push_back(value.data);
}
return from(std::move(values));
}
// Returns `Statistics` for the given container, or `None` if the container
// has less then 2 datapoints. The container is represented as a pair of
// [first, last) iterators.
//
// TODO(alexr): Consider relaxing the collection type requirement to
// `std::is_convertible<std::iterator_traits<It>::value_type, T>`.
template <
typename It,
typename = typename std::enable_if<
std::is_same<
typename std::iterator_traits<It>::value_type,
T>::value &&
std::is_convertible<
typename std::iterator_traits<It>::iterator_category,
std::forward_iterator_tag>::value>::type>
static Option<Statistics<T>> from(It first, It last)
{
// Copy values into a vector.
std::vector<T> values;
values.reserve(std::distance(first, last));
std::copy(first, last, std::back_inserter(values));
return from(std::move(values));
}
size_t count;
T min;
T max;
// TODO(dhamon): Consider making the percentiles we store dynamic.
T p25;
T p50;
T p75;
T p90;
T p95;
T p99;
T p999;
T p9999;
private:
// Calculates `Statistics` from the provided vector; note pass by reference.
static Option<Statistics<T>> from(std::vector<T>&& values)
{
// We need at least 2 values to compute aggregates.
if (values.size() < 2) {
return None();
}
std::sort(values.begin(), values.end());
Statistics statistics;
statistics.count = values.size();
statistics.min = values.front();
statistics.max = values.back();
statistics.p25 = percentile(values, 0.25);
statistics.p50 = percentile(values, 0.5);
statistics.p75 = percentile(values, 0.75);
statistics.p90 = percentile(values, 0.90);
statistics.p95 = percentile(values, 0.95);
statistics.p99 = percentile(values, 0.99);
statistics.p999 = percentile(values, 0.999);
statistics.p9999 = percentile(values, 0.9999);
return statistics;
}
// Returns the requested percentile from the sorted values.
// Note that we need at least two values to compute percentiles!
//
// TODO(dhamon): Use a 'Percentage' abstraction.
static T percentile(const std::vector<T>& values, double percentile)
{
CHECK_GE(values.size(), 2u);
if (percentile <= 0.0) {
return values.front();
}
if (percentile >= 1.0) {
return values.back();
}
// Use linear interpolation.
const double position = percentile * (values.size() - 1);
const size_t index = static_cast<size_t>(floor(position));
const double delta = position - index;
CHECK_GE(index, 0u);
CHECK_LT(index, values.size() - 1);
return values[index] + (values[index + 1] - values[index]) * delta;
}
};
} // namespace process {
#endif // __PROCESS_STATISTICS_HPP__