blob: 2f39a7ecc825127f2469f25cceb2ca203e50b5af [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef DISTINCT_COUNT_ACCURACY_PROFILE_HPP_
#define DISTINCT_COUNT_ACCURACY_PROFILE_HPP_
#include "job_profile.hpp"
#include "kll_sketch.hpp"
namespace datasketches {
// quantile fractions computed from the standard normal cumulative distribution.
static const double M3SD = 0.0013498980316301; //minus 3 StdDev
static const double M2SD = 0.0227501319481792; //minus 2 StdDev
static const double M1SD = 0.1586552539314570; //minus 1 StdDev
static const double P1SD = 0.8413447460685430; //plus 1 StdDev
static const double P2SD = 0.9772498680518210; //plus 2 StdDev
static const double P3SD = 0.9986501019683700; //plus 3 StdDev
static const double FRACTIONS[] = {0.0, M3SD, M2SD, M1SD, 0.5, P1SD, P2SD, P3SD, 1.0};
static const size_t FRACT_LEN = 9;
class accuracy_stats {
public:
accuracy_stats(size_t k, size_t true_value);
void update(double estimate);
size_t get_true_value() const;
double get_mean_est() const;
double get_mean_rel_err() const;
double get_rms_rel_err() const;
size_t get_count() const;
std::vector<double> get_quantiles(const double* fractions, size_t size) const;
private:
size_t true_value;
double sum_est;
double sum_rel_err;
double sum_sq_rel_err;
size_t count;
kll_sketch<double> rel_err_distribution;
};
class distinct_count_accuracy_profile: public job_profile {
public:
void run();
virtual void run_trial() = 0;
protected:
uint64_t key;
std::vector<accuracy_stats> stats;
private:
void print_stats() const;
};
}
#endif