blob: 3055c75994fd9d7015f04e8e90888cf88d492d45 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <atomic>
#include <map>
#include <string>
#include <vector>
#include "common/logging.h"
namespace doris {
// Histogram data structure implementation:
//
// After construction, the 'value_index_map' will be set to:
//
// BucketValue: | 1 | 2 | 2*1.5 |2*1.5^2|2*1.5^3| ... |2*1.5^n| ... |UINT64MAX|
// Index: | 0 | 1 | 2 | 3 | 4 | ... | n-1 | ... | 108 |
//
// The width of bucket is growing by 1.5 times and its initial values are 1 and 2.
// The input value will be add to the bucket which lower bound is just greater than
// input value. For example, input value A > 2*1.5^n and A <= 2*1.5^(n+1), A will be added
// to the latter bucket.
class HistogramBucketMapper {
public:
HistogramBucketMapper();
// converts a value to the bucket index.
size_t index_for_value(const uint64_t& value) const;
// number of buckets required.
size_t bucket_count() const { return _bucket_values.size(); }
uint64_t last_value() const { return _max_bucket_value; }
uint64_t first_value() const { return _min_bucket_value; }
uint64_t bucket_limit(const size_t bucket_number) const {
DCHECK(bucket_number < bucket_count());
return _bucket_values[bucket_number];
}
private:
std::vector<uint64_t> _bucket_values;
uint64_t _max_bucket_value;
uint64_t _min_bucket_value;
std::map<uint64_t, uint64_t> _value_index_map;
};
struct HistogramStat {
HistogramStat();
~HistogramStat() {}
HistogramStat(const HistogramStat&) = delete;
HistogramStat& operator=(const HistogramStat&) = delete;
void clear();
bool is_empty() const;
void add(const uint64_t& value);
void merge(const HistogramStat& other);
uint64_t min() const { return _min.load(std::memory_order_relaxed); }
uint64_t max() const { return _max.load(std::memory_order_relaxed); }
uint64_t num() const { return _num.load(std::memory_order_relaxed); }
uint64_t sum() const { return _sum.load(std::memory_order_relaxed); }
uint64_t sum_squares() const { return _sum_squares.load(std::memory_order_relaxed); }
uint64_t bucket_at(size_t b) const { return _buckets[b].load(std::memory_order_relaxed); }
double median() const;
double percentile(double p) const;
double average() const;
double standard_deviation() const;
std::string to_string() const;
// To be able to use HistogramStat as thread local variable, it
// cannot have dynamic allocated member. That's why we're
// using manually values from BucketMapper
std::atomic<uint64_t> _min;
std::atomic<uint64_t> _max;
std::atomic<uint64_t> _num;
std::atomic<uint64_t> _sum;
std::atomic<uint64_t> _sum_squares;
std::atomic<uint64_t> _buckets[109]; // 109==BucketMapper::bucket_count()
const uint64_t _num_buckets;
};
} // namespace doris