blob: 6ffbf002d6d4f84d874e8bcb2888fbecce0393d6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cmath>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include "util/benchmark.h"
#include "util/cpu-info.h"
#include "util/debug-util.h"
#include "util/stopwatch.h"
#include "common/names.h"
namespace impala {
// Private measurement function. This function is a bit unusual in that it
// throws exceptions; the intention is to abort the measurement when an unreliable
// result is detected, but to also provide some useful context as to which benchmark
// was being run. The method used to determine whether the measurement is unreliable
// is detecting context switches using getrusage(). Pass micro_heuristics=false to
// the class constructor if you do not want this behavior.
double Benchmark::Measure(BenchmarkFunction function, void* args,
int max_time, int batch_size, bool micro_heuristics) {
int64_t target_cycles = CpuInfo::cycles_per_ms() * max_time;
int64_t iters = 0;
struct rusage ru_start;
struct rusage ru_stop;
StopWatch sw;
int64_t lost_time = 0;
// Run it with the default batch size to roughly estimate how many iterations
// it will take.
for (;;) {
int64 begin_time = sw.ElapsedTime();
if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
sw.Start();
function(batch_size, args);
sw.Stop();
if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
if (!micro_heuristics || ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
iters = batch_size;
break;
}
// Taking too long and we keep getting switched out; either the machine is busy,
// or the benchmark takes too long to run and should not be used with this
// microbenchmark suite. Bail.
if (sw.ElapsedTime() > target_cycles) {
throw std::runtime_error("Benchmark failed to complete due to context switching.");
}
// Divide the batch size by the number of context switches until we find a size
// small enough that we don't switch
batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
lost_time += sw.ElapsedTime() - begin_time;
}
double iters_guess = (target_cycles / (sw.ElapsedTime() - lost_time)) * batch_size;
// Shoot for 110% of the guess. Going a little over is not a big deal.
iters_guess *= 1.1;
// Modify the batch size based on the guess. We ran the function a small number
// of times to estimate how fast the function is. Run the remaining iterations at
// in 20% increments.
batch_size = max<int>(1, (iters_guess - iters) / 5);
while (sw.ElapsedTime() < target_cycles) {
int64 begin_time = sw.ElapsedTime();
if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
sw.Start();
function(batch_size, args);
sw.Stop();
if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
if (!micro_heuristics || ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
iters += batch_size;
} else {
// We could have a vastly different estimate for batch size now and might have
// started context switching again. Divide down by 1 + the number of context
// switches as a guess of the number of iterations to perform with each batch.
lost_time += sw.ElapsedTime() - begin_time;
batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
}
}
// Arbitrary fudge factor for throwing in the towel - we give up if > 90% of
// measurements were dropped.
if (lost_time > 10 * sw.ElapsedTime()) {
throw std::runtime_error("Benchmark failed to complete due to noisy measurements.");
}
if (lost_time > sw.ElapsedTime()) {
LOG(WARNING) << "More than 50% of benchmark time lost due to context switching.";
}
double ms_elapsed = (sw.ElapsedTime() - lost_time) / CpuInfo::cycles_per_ms();
return iters / ms_elapsed;
}
Benchmark::Benchmark(const string& name, bool micro_heuristics) : name_(name),
micro_heuristics_(micro_heuristics) {
#ifndef NDEBUG
LOG(ERROR) << "WARNING: Running benchmark in DEBUG mode.";
#endif
CpuInfo::VerifyPerformanceGovernor();
CpuInfo::VerifyTurboDisabled();
}
int Benchmark::AddBenchmark(const string& name, BenchmarkFunction fn, void* args,
int baseline_idx) {
if (baseline_idx == -1) baseline_idx = benchmarks_.size();
CHECK_LE(baseline_idx, benchmarks_.size());
BenchmarkResult benchmark;
benchmark.name = name;
benchmark.fn = fn;
benchmark.args = args;
benchmark.baseline_idx = baseline_idx;
benchmarks_.push_back(benchmark);
return benchmarks_.size() - 1;
}
string Benchmark::Measure(int max_time, int initial_batch_size) {
if (benchmarks_.empty()) return "";
// Run a warmup to iterate through the data
benchmarks_[0].fn(10, benchmarks_[0].args);
// The number of times a benchmark is repeated
const int NUM_REPS = 60;
// Which percentiles of the benchmark to report. Reports the LO_PERCENT, MID_PERCENT,
// and HI_PERCENT percentile result.
const int LO_PERCENT = 10;
const int MID_PERCENT = 50;
const int HI_PERCENT = 100 - LO_PERCENT;
const size_t LO_IDX =
floor(((LO_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
const size_t MID_IDX =
floor(((MID_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
const size_t HI_IDX =
floor(((HI_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
const int function_out_width = 35;
const int rate_out_width = 10;
const int percentile_out_width = 9;
const int comparison_out_width = 11;
const int padding = 0;
const int total_width = function_out_width + rate_out_width + 3 * comparison_out_width +
3 * percentile_out_width + padding;
stringstream ss;
try {
for (int j = 0; j < NUM_REPS; ++j) {
for (int i = 0; i < benchmarks_.size(); ++i) {
benchmarks_[i].rates.push_back(
Measure(benchmarks_[i].fn, benchmarks_[i].args, max_time, initial_batch_size,
micro_heuristics_));
}
}
} catch (std::exception& e) {
LOG(ERROR) << e.what();
LOG(WARNING) << "Exiting silently from " << name_ <<
" to avoid spurious test failure.";
_exit(0);
}
ss << name_ << ":"
<< setw(function_out_width - name_.size() - 1) << "Function"
<< setw(rate_out_width) << "iters/ms"
<< setw(percentile_out_width -4) << LO_PERCENT << "%ile"
<< setw(percentile_out_width - 4) << MID_PERCENT << "%ile"
<< setw(percentile_out_width - 4) << HI_PERCENT << "%ile"
<< setw(comparison_out_width - 4) << LO_PERCENT << "%ile"
<< setw(comparison_out_width - 4) << MID_PERCENT << "%ile"
<< setw(comparison_out_width - 4) << HI_PERCENT << "%ile" << endl;
ss << setw(function_out_width + rate_out_width + 3 * percentile_out_width +
comparison_out_width) << "(relative)"
<< setw(comparison_out_width) << "(relative)"
<< setw(comparison_out_width) << "(relative)" << endl;
for (int i = 0; i < total_width; ++i) {
ss << '-';
}
ss << endl;
int previous_baseline_idx = -1;
for (int i = 0; i < benchmarks_.size(); ++i) {
sort(benchmarks_[i].rates.begin(), benchmarks_[i].rates.end());
const double base_line_lo = benchmarks_[benchmarks_[i].baseline_idx].rates[LO_IDX];
const double base_line_mid = benchmarks_[benchmarks_[i].baseline_idx].rates[MID_IDX];
const double base_line_hi = benchmarks_[benchmarks_[i].baseline_idx].rates[HI_IDX];
if (previous_baseline_idx != benchmarks_[i].baseline_idx && i > 0) ss << endl;
ss << setw(function_out_width) << benchmarks_[i].name
<< setw(rate_out_width + percentile_out_width) << setprecision(3)
<< benchmarks_[i].rates[LO_IDX]
<< setw(percentile_out_width) << setprecision(3)
<< benchmarks_[i].rates[MID_IDX]
<< setw(percentile_out_width) << setprecision(3)
<< benchmarks_[i].rates[HI_IDX]
<< setw(comparison_out_width - 1) << setprecision(3)
<< (benchmarks_[i].rates[LO_IDX] / base_line_lo) << "X"
<< setw(comparison_out_width - 1) << setprecision(3)
<< (benchmarks_[i].rates[MID_IDX] / base_line_mid) << "X"
<< setw(comparison_out_width - 1) << setprecision(3)
<< (benchmarks_[i].rates[HI_IDX] / base_line_hi) << "X" << endl;
previous_baseline_idx = benchmarks_[i].baseline_idx;
}
return ss.str();
}
// TODO: maybe add other things like amount of RAM, etc
string Benchmark::GetMachineInfo() {
stringstream ss;
ss << "Machine Info: " << CpuInfo::model_name();
return ss.str();
}
}