be/src/util/benchmark.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <cmath>
 #include <iomanip>
 #include <iostream>
 #include <sstream>

 #include <sys/time.h>
 #include <sys/resource.h>
 #include <unistd.h>

 #include "util/benchmark.h"
 #include "util/cpu-info.h"
 #include "util/debug-util.h"
 #include "util/stopwatch.h"

 #include "common/names.h"

 namespace impala {

 // Private measurement function.  This function is a bit unusual in that it
 // throws exceptions; the intention is to abort the measurement when an unreliable
 // result is detected, but to also provide some useful context as to which benchmark
 // was being run.  The method used to determine whether the measurement is unreliable
 // is detecting context switches using getrusage().  Pass micro_heuristics=false to
 // the class constructor if you do not want this behavior.
 double Benchmark::Measure(BenchmarkFunction function, void* args,
     int max_time, int batch_size, bool micro_heuristics) {
   int64_t target_cycles = CpuInfo::cycles_per_ms() * max_time;
   int64_t iters = 0;
   struct rusage ru_start;
   struct rusage ru_stop;
   StopWatch sw;
   int64_t lost_time = 0;

   // Run it with the default batch size to roughly estimate how many iterations
   // it will take.
   for (;;) {
     int64 begin_time = sw.ElapsedTime();
     if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
     sw.Start();
     function(batch_size, args);
     sw.Stop();
     if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
     if (!micro_heuristics || ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
       iters = batch_size;
       break;
     }

     // Taking too long and we keep getting switched out; either the machine is busy,
     // or the benchmark takes too long to run and should not be used with this
     // microbenchmark suite.  Bail.
     if (sw.ElapsedTime() > target_cycles) {
       throw std::runtime_error("Benchmark failed to complete due to context switching.");
     }

     // Divide the batch size by the number of context switches until we find a size
     // small enough that we don't switch
     batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
     lost_time += sw.ElapsedTime() - begin_time;
   }

   double iters_guess = (target_cycles / (sw.ElapsedTime() - lost_time)) * batch_size;
   // Shoot for 110% of the guess. Going a little over is not a big deal.
   iters_guess *= 1.1;
   // Modify the batch size based on the guess.  We ran the function a small number
   // of times to estimate how fast the function is.  Run the remaining iterations at
   // in 20% increments.
   batch_size = max<int>(1, (iters_guess - iters) / 5);

   while (sw.ElapsedTime() < target_cycles) {
     int64 begin_time = sw.ElapsedTime();
     if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
     sw.Start();
     function(batch_size, args);
     sw.Stop();
     if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
     if (!micro_heuristics || ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
       iters += batch_size;
     } else {
       // We could have a vastly different estimate for batch size now and might have
       // started context switching again.  Divide down by 1 + the number of context
       // switches as a guess of the number of iterations to perform with each batch.
       lost_time += sw.ElapsedTime() - begin_time;
       batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
     }
   }

   // Arbitrary fudge factor for throwing in the towel - we give up if > 90% of
   // measurements were dropped.
   if (lost_time > 10 * sw.ElapsedTime()) {
     throw std::runtime_error("Benchmark failed to complete due to noisy measurements.");
   }
   if (lost_time > sw.ElapsedTime()) {
     LOG(WARNING) << "More than 50% of benchmark time lost due to context switching.";
   }

   double ms_elapsed = (sw.ElapsedTime() - lost_time) / CpuInfo::cycles_per_ms();
   return iters / ms_elapsed;
 }

 Benchmark::Benchmark(const string& name, bool micro_heuristics) : name_(name),
   micro_heuristics_(micro_heuristics) {
 #ifndef NDEBUG
   LOG(ERROR) << "WARNING: Running benchmark in DEBUG mode.";
 #endif
   CpuInfo::VerifyPerformanceGovernor();
   CpuInfo::VerifyTurboDisabled();
 }

 int Benchmark::AddBenchmark(const string& name, BenchmarkFunction fn, void* args,
     int baseline_idx) {
   if (baseline_idx == -1) baseline_idx = benchmarks_.size();
   CHECK_LE(baseline_idx, benchmarks_.size());
   BenchmarkResult benchmark;
   benchmark.name = name;
   benchmark.fn = fn;
   benchmark.args = args;
   benchmark.baseline_idx = baseline_idx;
   benchmarks_.push_back(benchmark);
   return benchmarks_.size() - 1;
 }

 string Benchmark::Measure(int max_time, int initial_batch_size) {
   if (benchmarks_.empty()) return "";

   // Run a warmup to iterate through the data
   benchmarks_[0].fn(10, benchmarks_[0].args);

   // The number of times a benchmark is repeated
   const int NUM_REPS = 60;
   // Which percentiles of the benchmark to report. Reports the LO_PERCENT, MID_PERCENT,
   // and HI_PERCENT percentile result.
   const int LO_PERCENT = 10;
   const int MID_PERCENT = 50;
   const int HI_PERCENT = 100 - LO_PERCENT;
   const size_t LO_IDX =
       floor(((LO_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
   const size_t MID_IDX =
       floor(((MID_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
   const size_t HI_IDX =
       floor(((HI_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);

   const int function_out_width = 35;
   const int rate_out_width = 10;
   const int percentile_out_width = 9;
   const int comparison_out_width = 11;
   const int padding = 0;
   const int total_width = function_out_width + rate_out_width + 3 * comparison_out_width +
       3 * percentile_out_width + padding;

   stringstream ss;
   try {
     for (int j = 0; j < NUM_REPS; ++j) {
       for (int i = 0; i < benchmarks_.size(); ++i) {
         benchmarks_[i].rates.push_back(
             Measure(benchmarks_[i].fn, benchmarks_[i].args, max_time, initial_batch_size,
               micro_heuristics_));
       }
     }
   } catch (std::exception& e) {
     LOG(ERROR) << e.what();
     LOG(WARNING) << "Exiting silently from " << name_ <<
       " to avoid spurious test failure.";
     _exit(0);
   }

   ss << name_ << ":"
      << setw(function_out_width - name_.size() - 1) << "Function"
      << setw(rate_out_width) << "iters/ms"
      << setw(percentile_out_width -4) << LO_PERCENT << "%ile"
      << setw(percentile_out_width - 4) << MID_PERCENT << "%ile"
      << setw(percentile_out_width - 4) << HI_PERCENT << "%ile"
      << setw(comparison_out_width - 4) << LO_PERCENT << "%ile"
      << setw(comparison_out_width - 4) << MID_PERCENT << "%ile"
      << setw(comparison_out_width - 4) << HI_PERCENT << "%ile" << endl;
   ss << setw(function_out_width + rate_out_width + 3 * percentile_out_width +
             comparison_out_width) << "(relative)"
      << setw(comparison_out_width) << "(relative)"
      << setw(comparison_out_width) << "(relative)" << endl;
   for (int i = 0; i < total_width; ++i) {
     ss << '-';
   }
   ss << endl;

   int previous_baseline_idx = -1;
   for (int i = 0; i < benchmarks_.size(); ++i) {
     sort(benchmarks_[i].rates.begin(), benchmarks_[i].rates.end());
     const double base_line_lo = benchmarks_[benchmarks_[i].baseline_idx].rates[LO_IDX];
     const double base_line_mid = benchmarks_[benchmarks_[i].baseline_idx].rates[MID_IDX];
     const double base_line_hi = benchmarks_[benchmarks_[i].baseline_idx].rates[HI_IDX];
     if (previous_baseline_idx != benchmarks_[i].baseline_idx && i > 0) ss << endl;
     ss << setw(function_out_width) << benchmarks_[i].name
        << setw(rate_out_width + percentile_out_width) << setprecision(3)
            << benchmarks_[i].rates[LO_IDX]
        << setw(percentile_out_width) << setprecision(3)
            << benchmarks_[i].rates[MID_IDX]
        << setw(percentile_out_width) << setprecision(3)
            << benchmarks_[i].rates[HI_IDX]
        << setw(comparison_out_width - 1) << setprecision(3)
        << (benchmarks_[i].rates[LO_IDX] / base_line_lo) << "X"
        << setw(comparison_out_width - 1) << setprecision(3)
        << (benchmarks_[i].rates[MID_IDX] / base_line_mid) << "X"
        << setw(comparison_out_width - 1) << setprecision(3)
        << (benchmarks_[i].rates[HI_IDX] / base_line_hi) << "X" << endl;
     previous_baseline_idx = benchmarks_[i].baseline_idx;
   }

   return ss.str();
 }

 // TODO: maybe add other things like amount of RAM, etc
 string Benchmark::GetMachineInfo() {
   stringstream ss;
   ss << "Machine Info: " << CpuInfo::model_name();
   return ss.str();
 }

 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include <cmath>
	#include <iomanip>
	#include <iostream>
	#include <sstream>

	#include <sys/time.h>
	#include <sys/resource.h>
	#include <unistd.h>

	#include "util/benchmark.h"
	#include "util/cpu-info.h"
	#include "util/debug-util.h"
	#include "util/stopwatch.h"

	#include "common/names.h"

	namespace impala {

	// Private measurement function. This function is a bit unusual in that it
	// throws exceptions; the intention is to abort the measurement when an unreliable
	// result is detected, but to also provide some useful context as to which benchmark
	// was being run. The method used to determine whether the measurement is unreliable
	// is detecting context switches using getrusage(). Pass micro_heuristics=false to
	// the class constructor if you do not want this behavior.
	double Benchmark::Measure(BenchmarkFunction function, void* args,
	int max_time, int batch_size, bool micro_heuristics) {
	int64_t target_cycles = CpuInfo::cycles_per_ms() * max_time;
	int64_t iters = 0;
	struct rusage ru_start;
	struct rusage ru_stop;
	StopWatch sw;
	int64_t lost_time = 0;

	// Run it with the default batch size to roughly estimate how many iterations
	// it will take.
	for (;;) {
	int64 begin_time = sw.ElapsedTime();
	if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
	sw.Start();
	function(batch_size, args);
	sw.Stop();
	if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
	if (!micro_heuristics \|\| ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
	iters = batch_size;
	break;
	}

	// Taking too long and we keep getting switched out; either the machine is busy,
	// or the benchmark takes too long to run and should not be used with this
	// microbenchmark suite. Bail.
	if (sw.ElapsedTime() > target_cycles) {
	throw std::runtime_error("Benchmark failed to complete due to context switching.");
	}

	// Divide the batch size by the number of context switches until we find a size
	// small enough that we don't switch
	batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
	lost_time += sw.ElapsedTime() - begin_time;
	}

	double iters_guess = (target_cycles / (sw.ElapsedTime() - lost_time)) * batch_size;
	// Shoot for 110% of the guess. Going a little over is not a big deal.
	iters_guess *= 1.1;
	// Modify the batch size based on the guess. We ran the function a small number
	// of times to estimate how fast the function is. Run the remaining iterations at
	// in 20% increments.
	batch_size = max<int>(1, (iters_guess - iters) / 5);

	while (sw.ElapsedTime() < target_cycles) {
	int64 begin_time = sw.ElapsedTime();
	if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_start);
	sw.Start();
	function(batch_size, args);
	sw.Stop();
	if (micro_heuristics) getrusage(RUSAGE_THREAD, &ru_stop);
	if (!micro_heuristics \|\| ru_stop.ru_nivcsw == ru_start.ru_nivcsw) {
	iters += batch_size;
	} else {
	// We could have a vastly different estimate for batch size now and might have
	// started context switching again. Divide down by 1 + the number of context
	// switches as a guess of the number of iterations to perform with each batch.
	lost_time += sw.ElapsedTime() - begin_time;
	batch_size = max<int>(1, batch_size / (1+(ru_stop.ru_nivcsw - ru_start.ru_nivcsw)));
	}
	}

	// Arbitrary fudge factor for throwing in the towel - we give up if > 90% of
	// measurements were dropped.
	if (lost_time > 10 * sw.ElapsedTime()) {
	throw std::runtime_error("Benchmark failed to complete due to noisy measurements.");
	}
	if (lost_time > sw.ElapsedTime()) {
	LOG(WARNING) << "More than 50% of benchmark time lost due to context switching.";
	}

	double ms_elapsed = (sw.ElapsedTime() - lost_time) / CpuInfo::cycles_per_ms();
	return iters / ms_elapsed;
	}

	Benchmark::Benchmark(const string& name, bool micro_heuristics) : name_(name),
	micro_heuristics_(micro_heuristics) {
	#ifndef NDEBUG
	LOG(ERROR) << "WARNING: Running benchmark in DEBUG mode.";
	#endif
	CpuInfo::VerifyPerformanceGovernor();
	CpuInfo::VerifyTurboDisabled();
	}

	int Benchmark::AddBenchmark(const string& name, BenchmarkFunction fn, void* args,
	int baseline_idx) {
	if (baseline_idx == -1) baseline_idx = benchmarks_.size();
	CHECK_LE(baseline_idx, benchmarks_.size());
	BenchmarkResult benchmark;
	benchmark.name = name;
	benchmark.fn = fn;
	benchmark.args = args;
	benchmark.baseline_idx = baseline_idx;
	benchmarks_.push_back(benchmark);
	return benchmarks_.size() - 1;
	}

	string Benchmark::Measure(int max_time, int initial_batch_size) {
	if (benchmarks_.empty()) return "";

	// Run a warmup to iterate through the data
	benchmarks_[0].fn(10, benchmarks_[0].args);

	// The number of times a benchmark is repeated
	const int NUM_REPS = 60;
	// Which percentiles of the benchmark to report. Reports the LO_PERCENT, MID_PERCENT,
	// and HI_PERCENT percentile result.
	const int LO_PERCENT = 10;
	const int MID_PERCENT = 50;
	const int HI_PERCENT = 100 - LO_PERCENT;
	const size_t LO_IDX =
	floor(((LO_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
	const size_t MID_IDX =
	floor(((MID_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
	const size_t HI_IDX =
	floor(((HI_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);

	const int function_out_width = 35;
	const int rate_out_width = 10;
	const int percentile_out_width = 9;
	const int comparison_out_width = 11;
	const int padding = 0;
	const int total_width = function_out_width + rate_out_width + 3 * comparison_out_width +
	3 * percentile_out_width + padding;

	stringstream ss;
	try {
	for (int j = 0; j < NUM_REPS; ++j) {
	for (int i = 0; i < benchmarks_.size(); ++i) {
	benchmarks_[i].rates.push_back(
	Measure(benchmarks_[i].fn, benchmarks_[i].args, max_time, initial_batch_size,
	micro_heuristics_));
	}
	}
	} catch (std::exception& e) {
	LOG(ERROR) << e.what();
	LOG(WARNING) << "Exiting silently from " << name_ <<
	" to avoid spurious test failure.";
	_exit(0);
	}

	ss << name_ << ":"
	<< setw(function_out_width - name_.size() - 1) << "Function"
	<< setw(rate_out_width) << "iters/ms"
	<< setw(percentile_out_width -4) << LO_PERCENT << "%ile"
	<< setw(percentile_out_width - 4) << MID_PERCENT << "%ile"
	<< setw(percentile_out_width - 4) << HI_PERCENT << "%ile"
	<< setw(comparison_out_width - 4) << LO_PERCENT << "%ile"
	<< setw(comparison_out_width - 4) << MID_PERCENT << "%ile"
	<< setw(comparison_out_width - 4) << HI_PERCENT << "%ile" << endl;
	ss << setw(function_out_width + rate_out_width + 3 * percentile_out_width +
	comparison_out_width) << "(relative)"
	<< setw(comparison_out_width) << "(relative)"
	<< setw(comparison_out_width) << "(relative)" << endl;
	for (int i = 0; i < total_width; ++i) {
	ss << '-';
	}
	ss << endl;

	int previous_baseline_idx = -1;
	for (int i = 0; i < benchmarks_.size(); ++i) {
	sort(benchmarks_[i].rates.begin(), benchmarks_[i].rates.end());
	const double base_line_lo = benchmarks_[benchmarks_[i].baseline_idx].rates[LO_IDX];
	const double base_line_mid = benchmarks_[benchmarks_[i].baseline_idx].rates[MID_IDX];
	const double base_line_hi = benchmarks_[benchmarks_[i].baseline_idx].rates[HI_IDX];
	if (previous_baseline_idx != benchmarks_[i].baseline_idx && i > 0) ss << endl;
	ss << setw(function_out_width) << benchmarks_[i].name
	<< setw(rate_out_width + percentile_out_width) << setprecision(3)
	<< benchmarks_[i].rates[LO_IDX]
	<< setw(percentile_out_width) << setprecision(3)
	<< benchmarks_[i].rates[MID_IDX]
	<< setw(percentile_out_width) << setprecision(3)
	<< benchmarks_[i].rates[HI_IDX]
	<< setw(comparison_out_width - 1) << setprecision(3)
	<< (benchmarks_[i].rates[LO_IDX] / base_line_lo) << "X"
	<< setw(comparison_out_width - 1) << setprecision(3)
	<< (benchmarks_[i].rates[MID_IDX] / base_line_mid) << "X"
	<< setw(comparison_out_width - 1) << setprecision(3)
	<< (benchmarks_[i].rates[HI_IDX] / base_line_hi) << "X" << endl;
	previous_baseline_idx = benchmarks_[i].baseline_idx;
	}

	return ss.str();
	}

	// TODO: maybe add other things like amount of RAM, etc
	string Benchmark::GetMachineInfo() {
	stringstream ss;
	ss << "Machine Info: " << CpuInfo::model_name();
	return ss.str();
	}

	}