|  | // Copyright 2015 Google Inc. All rights reserved. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | #include "benchmark/benchmark.h" | 
|  | #include "arraysize.h" | 
|  | #include "check.h" | 
|  | #include "colorprint.h" | 
|  | #include "commandlineflags.h" | 
|  | #include "internal_macros.h" | 
|  | #include "log.h" | 
|  | #include "re.h" | 
|  | #include "sleep.h" | 
|  | #include "stat.h" | 
|  | #include "string_util.h" | 
|  | #include "sysinfo.h" | 
|  | #include "walltime.h" | 
|  |  | 
|  | #include <sys/time.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <atomic> | 
|  | #include <condition_variable> | 
|  | #include <iostream> | 
|  | #include <memory> | 
|  | #include <mutex> | 
|  | #include <thread> | 
|  | #include <sstream> | 
|  |  | 
|  | DEFINE_string(benchmark_filter, ".", | 
|  | "A regular expression that specifies the set of benchmarks " | 
|  | "to execute.  If this flag is empty, no benchmarks are run.  " | 
|  | "If this flag is the string \"all\", all benchmarks linked " | 
|  | "into the process are run."); | 
|  |  | 
|  | DEFINE_int32(benchmark_iterations, 0, | 
|  | "Total number of iterations per benchmark. 0 means the benchmarks " | 
|  | "are time-based."); | 
|  |  | 
|  | DEFINE_double(benchmark_min_time, 0.5, | 
|  | "Minimum number of seconds we should run benchmark before " | 
|  | "results are considered significant.  For cpu-time based " | 
|  | "tests, this is the lower bound on the total cpu time " | 
|  | "used by all threads that make up the test.  For real-time " | 
|  | "based tests, this is the lower bound on the elapsed time " | 
|  | "of the benchmark execution, regardless of number of " | 
|  | "threads."); | 
|  |  | 
|  | DEFINE_bool(benchmark_memory_usage, false, | 
|  | "Report memory usage for all benchmarks"); | 
|  |  | 
|  | DEFINE_int32(benchmark_repetitions, 1, | 
|  | "The number of runs of each benchmark. If greater than 1, the " | 
|  | "mean and standard deviation of the runs will be reported."); | 
|  |  | 
|  | DEFINE_int32(v, 0, "The level of verbose logging to output"); | 
|  | DEFINE_bool(color_print, true, "Enables colorized logging."); | 
|  |  | 
|  | // Will be non-empty if heap checking is turned on, which would | 
|  | // invalidate any benchmarks. | 
|  | DECLARE_string(heap_check); | 
|  |  | 
|  | // The ""'s catch people who don't pass in a literal for "str" | 
|  | #define strliterallen(str) (sizeof("" str "") - 1) | 
|  |  | 
|  | // Must use a string literal for prefix. | 
|  | #define memprefix(str, len, prefix)                  \ | 
|  | ((((len) >= strliterallen(prefix)) &&              \ | 
|  | memcmp(str, prefix, strliterallen(prefix)) == 0) \ | 
|  | ? str + strliterallen(prefix)                 \ | 
|  | : NULL) | 
|  |  | 
|  | namespace benchmark { | 
|  | namespace { | 
|  | // For non-dense Range, intermediate values are powers of kRangeMultiplier. | 
|  | static const int kRangeMultiplier = 8; | 
|  |  | 
|  | std::mutex starting_mutex; | 
|  | std::condition_variable starting_cv; | 
|  |  | 
|  | bool running_benchmark = false; | 
|  |  | 
|  | // Should this benchmark report memory usage? | 
|  | bool get_memory_usage; | 
|  |  | 
|  | // Should this benchmark base decisions off of real time rather than | 
|  | // cpu time? | 
|  | bool use_real_time; | 
|  |  | 
|  | // Overhead of an empty benchmark. | 
|  | double overhead = 0.0; | 
|  |  | 
|  | // Return prefix to print in front of each reported line | 
|  | const char* Prefix() { | 
|  | #ifdef NDEBUG | 
|  | return ""; | 
|  | #else | 
|  | return "DEBUG: "; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | // TODO | 
|  | // static internal::MallocCounter *benchmark_mc; | 
|  |  | 
|  | bool CpuScalingEnabled() { | 
|  | // On Linux, the CPUfreq subsystem exposes CPU information as files on the | 
|  | // local file system. If reading the exported files fails, then we may not be | 
|  | // running on Linux, so we silently ignore all the read errors. | 
|  | for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { | 
|  | std::stringstream ss; | 
|  | ss << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_governor"; | 
|  | std::string governor_file = ss.str(); | 
|  | FILE* file = fopen(governor_file.c_str(), "r"); | 
|  | if (!file) break; | 
|  | char buff[16]; | 
|  | size_t bytes_read = fread(buff, 1, sizeof(buff), file); | 
|  | fclose(file); | 
|  | if (memprefix(buff, bytes_read, "performance") == NULL) return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Given a collection of reports, computes their mean and stddev. | 
|  | // REQUIRES: all runs in "reports" must be from the same benchmark. | 
|  | void ComputeStats(const std::vector<BenchmarkReporter::Run>& reports, | 
|  | BenchmarkReporter::Run* mean_data, | 
|  | BenchmarkReporter::Run* stddev_data) { | 
|  | // Accumulators. | 
|  | Stat1_d real_accumulated_time_stat; | 
|  | Stat1_d cpu_accumulated_time_stat; | 
|  | Stat1_d items_per_second_stat; | 
|  | Stat1_d bytes_per_second_stat; | 
|  | Stat1_d iterations_stat; | 
|  | Stat1MinMax_d max_heapbytes_used_stat; | 
|  |  | 
|  | // Populate the accumulators. | 
|  | for (std::vector<BenchmarkReporter::Run>::const_iterator it = reports.begin(); | 
|  | it != reports.end(); ++it) { | 
|  | CHECK_EQ(reports[0].benchmark_name, it->benchmark_name); | 
|  | real_accumulated_time_stat += | 
|  | Stat1_d(it->real_accumulated_time / it->iterations, it->iterations); | 
|  | cpu_accumulated_time_stat += | 
|  | Stat1_d(it->cpu_accumulated_time / it->iterations, it->iterations); | 
|  | items_per_second_stat += Stat1_d(it->items_per_second, it->iterations); | 
|  | bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations); | 
|  | iterations_stat += Stat1_d(it->iterations, it->iterations); | 
|  | max_heapbytes_used_stat += | 
|  | Stat1MinMax_d(it->max_heapbytes_used, it->iterations); | 
|  | } | 
|  |  | 
|  | // Get the data from the accumulator to BenchmarkRunData's.  In the | 
|  | // computations below we must multiply by the number of iterations since | 
|  | // PrintRunData will divide by it. | 
|  | mean_data->benchmark_name = reports[0].benchmark_name + "_mean"; | 
|  | mean_data->iterations = iterations_stat.Mean(); | 
|  | mean_data->real_accumulated_time = real_accumulated_time_stat.Mean() * | 
|  | mean_data->iterations; | 
|  | mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean() * | 
|  | mean_data->iterations; | 
|  | mean_data->bytes_per_second = bytes_per_second_stat.Mean(); | 
|  | mean_data->items_per_second = items_per_second_stat.Mean(); | 
|  | mean_data->max_heapbytes_used = max_heapbytes_used_stat.Max(); | 
|  |  | 
|  | // Only add label to mean/stddev if it is same for all runs | 
|  | mean_data->report_label = reports[0].report_label; | 
|  | for (size_t i = 1; i < reports.size(); i++) { | 
|  | if (reports[i].report_label != reports[0].report_label) { | 
|  | mean_data->report_label = ""; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev"; | 
|  | stddev_data->report_label = mean_data->report_label; | 
|  | stddev_data->iterations = iterations_stat.StdDev(); | 
|  | // The value of iterations_stat.StdDev() above may be 0 if all the repetitions | 
|  | // have the same number of iterations.  Blindly multiplying by 0 in the | 
|  | // computation of real/cpu_accumulated_time below would lead to 0/0 in | 
|  | // PrintRunData.  So we skip the multiplication in this case and PrintRunData | 
|  | // skips the division. | 
|  | if (stddev_data->iterations == 0) { | 
|  | stddev_data->real_accumulated_time = real_accumulated_time_stat.StdDev(); | 
|  | stddev_data->cpu_accumulated_time = cpu_accumulated_time_stat.StdDev(); | 
|  | } else { | 
|  | stddev_data->real_accumulated_time = real_accumulated_time_stat.StdDev() * | 
|  | stddev_data->iterations; | 
|  | stddev_data->cpu_accumulated_time = cpu_accumulated_time_stat.StdDev() * | 
|  | stddev_data->iterations; | 
|  | } | 
|  | stddev_data->bytes_per_second = bytes_per_second_stat.StdDev(); | 
|  | stddev_data->items_per_second = items_per_second_stat.StdDev(); | 
|  | stddev_data->max_heapbytes_used = max_heapbytes_used_stat.StdDev(); | 
|  | } | 
|  | }  // namespace | 
|  |  | 
|  | namespace internal { | 
|  |  | 
|  | // Class for managing registered benchmarks.  Note that each registered | 
|  | // benchmark identifies a family of related benchmarks to run. | 
|  | class BenchmarkFamilies { | 
|  | public: | 
|  | static BenchmarkFamilies* GetInstance(); | 
|  |  | 
|  | // Registers a benchmark family and returns the index assigned to it. | 
|  | size_t AddBenchmark(Benchmark* family); | 
|  |  | 
|  | // Unregisters a family at the given index. | 
|  | void RemoveBenchmark(size_t index); | 
|  |  | 
|  | // Extract the list of benchmark instances that match the specified | 
|  | // regular expression. | 
|  | void FindBenchmarks(const std::string& re, | 
|  | std::vector<Benchmark::Instance>* benchmarks); | 
|  | private: | 
|  | BenchmarkFamilies(); | 
|  | ~BenchmarkFamilies(); | 
|  |  | 
|  | std::vector<Benchmark*> families_; | 
|  | std::mutex mutex_; | 
|  | }; | 
|  |  | 
|  | BenchmarkFamilies* BenchmarkFamilies::GetInstance() { | 
|  | static BenchmarkFamilies instance; | 
|  | return &instance; | 
|  | } | 
|  |  | 
|  | BenchmarkFamilies::BenchmarkFamilies() { } | 
|  |  | 
|  | BenchmarkFamilies::~BenchmarkFamilies() { | 
|  | for (internal::Benchmark* family : families_) { | 
|  | delete family; | 
|  | } | 
|  | } | 
|  |  | 
|  | size_t BenchmarkFamilies::AddBenchmark(Benchmark* family) { | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | // This loop attempts to reuse an entry that was previously removed to avoid | 
|  | // unncessary growth of the vector. | 
|  | for (size_t index = 0; index < families_.size(); ++index) { | 
|  | if (families_[index] == nullptr) { | 
|  | families_[index] = family; | 
|  | return index; | 
|  | } | 
|  | } | 
|  | size_t index = families_.size(); | 
|  | families_.push_back(family); | 
|  | return index; | 
|  | } | 
|  |  | 
|  | void BenchmarkFamilies::RemoveBenchmark(size_t index) { | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | families_[index] = NULL; | 
|  | // Don't shrink families_ here, we might be called by the destructor of | 
|  | // BenchmarkFamilies which iterates over the vector. | 
|  | } | 
|  |  | 
|  | void BenchmarkFamilies::FindBenchmarks( | 
|  | const std::string& spec, | 
|  | std::vector<Benchmark::Instance>* benchmarks) { | 
|  | // Make regular expression out of command-line flag | 
|  | Regex re; | 
|  | std::string re_error; | 
|  | if (!re.Init(spec, &re_error)) { | 
|  | std::cerr << "Could not compile benchmark re: " << re_error << std::endl; | 
|  | return; | 
|  | } | 
|  |  | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | for (internal::Benchmark* family : families_) { | 
|  | if (family == nullptr) continue;  // Family was deleted | 
|  |  | 
|  | // Match against filter. | 
|  | if (!re.Match(family->name_)) { | 
|  | VLOG(1) << "Skipping " << family->name_ << "\n"; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | std::vector<Benchmark::Instance> instances; | 
|  | if (family->rangeX_.empty() && family->rangeY_.empty()) { | 
|  | instances = family->CreateBenchmarkInstances( | 
|  | Benchmark::kNoRangeIndex, Benchmark::kNoRangeIndex); | 
|  | std::copy(instances.begin(), instances.end(), | 
|  | std::back_inserter(*benchmarks)); | 
|  | } else if (family->rangeY_.empty()) { | 
|  | for (size_t x = 0; x < family->rangeX_.size(); ++x) { | 
|  | instances = family->CreateBenchmarkInstances( | 
|  | x, Benchmark::kNoRangeIndex); | 
|  | std::copy(instances.begin(), instances.end(), | 
|  | std::back_inserter(*benchmarks)); | 
|  | } | 
|  | } else { | 
|  | for (size_t x = 0; x < family->rangeX_.size(); ++x) { | 
|  | for (size_t y = 0; y < family->rangeY_.size(); ++y) { | 
|  | instances = family->CreateBenchmarkInstances(x, y); | 
|  | std::copy(instances.begin(), instances.end(), | 
|  | std::back_inserter(*benchmarks)); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | std::string ConsoleReporter::PrintMemoryUsage(double bytes) const { | 
|  | if (!get_memory_usage || bytes < 0.0) return ""; | 
|  |  | 
|  | std::stringstream ss; | 
|  | ss << " " << HumanReadableNumber(bytes) << "B peak-mem"; | 
|  | return ss.str(); | 
|  | } | 
|  |  | 
|  | bool ConsoleReporter::ReportContext(const BenchmarkReporter::Context& context) | 
|  | const { | 
|  | name_field_width_ = context.name_field_width; | 
|  |  | 
|  | std::cout << "Benchmarking on " << context.num_cpus << " X " | 
|  | << context.mhz_per_cpu << " MHz CPU" | 
|  | << ((context.num_cpus > 1) ? "s" : "") << "\n"; | 
|  |  | 
|  | int remainder_ms; | 
|  | std::cout << walltime::Print(walltime::Now(), "%Y/%m/%d-%H:%M:%S", | 
|  | true,  // use local timezone | 
|  | &remainder_ms) << "\n"; | 
|  |  | 
|  | // Show details of CPU model, caches, TLBs etc. | 
|  | //  if (!context.cpu_info.empty()) | 
|  | //    std::cout << "CPU: " << context.cpu_info.c_str(); | 
|  |  | 
|  | if (context.cpu_scaling_enabled) { | 
|  | std::cerr << "CPU scaling is enabled: Benchmark timings may be noisy.\n"; | 
|  | } | 
|  |  | 
|  | int output_width = fprintf(stdout, "%s%-*s %10s %10s %10s\n", | 
|  | Prefix(), int(name_field_width_), "Benchmark", | 
|  | "Time(ns)", "CPU(ns)", "Iterations"); | 
|  | std::cout << std::string(output_width - 1, '-').c_str() << "\n"; | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void ConsoleReporter::ReportRuns( | 
|  | const std::vector<BenchmarkReporter::Run>& reports) const { | 
|  | for (std::vector<BenchmarkReporter::Run>::const_iterator it = reports.begin(); | 
|  | it != reports.end(); ++it) { | 
|  | CHECK_EQ(reports[0].benchmark_name, it->benchmark_name); | 
|  | PrintRunData(*it); | 
|  | } | 
|  |  | 
|  | // We don't report aggregated data if there was a single run. | 
|  | if (reports.size() < 2) return; | 
|  |  | 
|  | BenchmarkReporter::Run mean_data; | 
|  | BenchmarkReporter::Run stddev_data; | 
|  | ComputeStats(reports, &mean_data, &stddev_data); | 
|  |  | 
|  | PrintRunData(mean_data); | 
|  | PrintRunData(stddev_data); | 
|  | } | 
|  |  | 
|  | void ConsoleReporter::PrintRunData(const BenchmarkReporter::Run& result) const { | 
|  | // Format bytes per second | 
|  | std::string rate; | 
|  | if (result.bytes_per_second > 0) { | 
|  | std::stringstream ss; | 
|  | ss << " " << HumanReadableNumber(result.bytes_per_second) << "B/s"; | 
|  | rate = ss.str(); | 
|  | } | 
|  |  | 
|  | // Format items per second | 
|  | std::string items; | 
|  | if (result.items_per_second > 0) { | 
|  | std::stringstream ss; | 
|  | ss << " " << HumanReadableNumber(result.items_per_second) << " items/s"; | 
|  | items = ss.str(); | 
|  | } | 
|  |  | 
|  | ColorPrintf(COLOR_DEFAULT, "%s", Prefix()); | 
|  | ColorPrintf(COLOR_GREEN, "%-*s ", | 
|  | name_field_width_, result.benchmark_name.c_str()); | 
|  | if (result.iterations == 0) { | 
|  | ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", | 
|  | result.real_accumulated_time * 1e9, | 
|  | result.cpu_accumulated_time * 1e9); | 
|  | } else { | 
|  | ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", | 
|  | (result.real_accumulated_time * 1e9) / | 
|  | (static_cast<double>(result.iterations)), | 
|  | (result.cpu_accumulated_time * 1e9) / | 
|  | (static_cast<double>(result.iterations))); | 
|  | } | 
|  | ColorPrintf(COLOR_CYAN, "%10lld", result.iterations); | 
|  | ColorPrintf(COLOR_DEFAULT, "%*s %*s %s %s\n", | 
|  | 13, rate.c_str(), | 
|  | 18, items.c_str(), | 
|  | result.report_label.c_str(), | 
|  | PrintMemoryUsage(result.max_heapbytes_used).c_str()); | 
|  | } | 
|  |  | 
|  | /* TODO(dominic) | 
|  | void MemoryUsage() { | 
|  | // if (benchmark_mc) { | 
|  | //  benchmark_mc->Reset(); | 
|  | //} else { | 
|  | get_memory_usage = true; | 
|  | //} | 
|  | } | 
|  | */ | 
|  |  | 
|  | void PrintUsageAndExit() { | 
|  | fprintf(stdout, | 
|  | "benchmark [--benchmark_filter=<regex>]\n" | 
|  | "          [--benchmark_iterations=<iterations>]\n" | 
|  | "          [--benchmark_min_time=<min_time>]\n" | 
|  | //"          [--benchmark_memory_usage]\n" | 
|  | "          [--benchmark_repetitions=<num_repetitions>]\n" | 
|  | "          [--color_print={true|false}]\n" | 
|  | "          [--v=<verbosity>]\n"); | 
|  | exit(0); | 
|  | } | 
|  |  | 
|  | void ParseCommandLineFlags(int* argc, const char** argv) { | 
|  | for (int i = 1; i < *argc; ++i) { | 
|  | if (ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || | 
|  | ParseInt32Flag(argv[i], "benchmark_iterations", | 
|  | &FLAGS_benchmark_iterations) || | 
|  | ParseDoubleFlag(argv[i], "benchmark_min_time", | 
|  | &FLAGS_benchmark_min_time) || | 
|  | // TODO(dominic) | 
|  | //        ParseBoolFlag(argv[i], "gbenchmark_memory_usage", | 
|  | //                      &FLAGS_gbenchmark_memory_usage) || | 
|  | ParseInt32Flag(argv[i], "benchmark_repetitions", | 
|  | &FLAGS_benchmark_repetitions) || | 
|  | ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) || | 
|  | ParseInt32Flag(argv[i], "v", &FLAGS_v)) { | 
|  | for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1]; | 
|  |  | 
|  | --(*argc); | 
|  | --i; | 
|  | } else if (IsFlag(argv[i], "help")) | 
|  | PrintUsageAndExit(); | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | // A clock that provides a fast mechanism to check if we're nearly done. | 
|  | class State::FastClock { | 
|  | public: | 
|  | enum Type { | 
|  | REAL_TIME, | 
|  | CPU_TIME | 
|  | }; | 
|  | explicit FastClock(Type type) | 
|  | : type_(type), | 
|  | approx_time_(NowMicros()), | 
|  | bg_done_(false), | 
|  | bg_(BGThreadWrapper, this) { } | 
|  |  | 
|  | ~FastClock() { | 
|  | { | 
|  | std::unique_lock<std::mutex> l(bg_mutex_); | 
|  | bg_done_ = true; | 
|  | bg_cond_.notify_one(); | 
|  | } | 
|  | bg_.join(); | 
|  | } | 
|  |  | 
|  | // Returns true if the current time is guaranteed to be past "when_micros". | 
|  | // This method is very fast. | 
|  | inline bool HasReached(int64_t when_micros) { | 
|  | return std::atomic_load(&approx_time_) >= when_micros; | 
|  | } | 
|  |  | 
|  | // Returns the current time in microseconds past the epoch. | 
|  | int64_t NowMicros() const { | 
|  | double t = 0; | 
|  | switch (type_) { | 
|  | case REAL_TIME: | 
|  | t = walltime::Now(); | 
|  | break; | 
|  | case CPU_TIME: | 
|  | t = MyCPUUsage() + ChildrenCPUUsage(); | 
|  | break; | 
|  | } | 
|  | return static_cast<int64_t>(t * kNumMicrosPerSecond); | 
|  | } | 
|  |  | 
|  | // Reinitialize if necessary (since clock type may be change once benchmark | 
|  | // function starts running - see UseRealTime). | 
|  | void InitType(Type type) { | 
|  | type_ = type; | 
|  | std::lock_guard<std::mutex> l(bg_mutex_); | 
|  | std::atomic_store(&approx_time_, NowMicros()); | 
|  | } | 
|  |  | 
|  | private: | 
|  | Type type_; | 
|  | std::atomic<int64_t> approx_time_;  // Last time measurement taken by bg_ | 
|  | bool bg_done_;  // This is used to signal background thread to exit | 
|  | std::mutex bg_mutex_; | 
|  | std::condition_variable bg_cond_; | 
|  | std::thread bg_;  // Background thread that updates last_time_ once every ms | 
|  |  | 
|  | static void* BGThreadWrapper(void* that) { | 
|  | ((FastClock*)that)->BGThread(); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | void BGThread() { | 
|  | std::unique_lock<std::mutex> l(bg_mutex_); | 
|  | while (!bg_done_) | 
|  | { | 
|  | // Set timeout to 1 ms. | 
|  | bg_cond_.wait_for(l, std::chrono::milliseconds(1)); | 
|  | std::atomic_store(&approx_time_, NowMicros()); | 
|  | } | 
|  | } | 
|  |  | 
|  | BENCHMARK_DISALLOW_COPY_AND_ASSIGN(FastClock); | 
|  | }; | 
|  |  | 
|  | struct State::ThreadStats { | 
|  | int64_t bytes_processed; | 
|  | int64_t items_processed; | 
|  |  | 
|  | ThreadStats() { Reset(); } | 
|  |  | 
|  | void Reset() { | 
|  | bytes_processed = 0; | 
|  | items_processed = 0; | 
|  | } | 
|  |  | 
|  | void Add(const ThreadStats& other) { | 
|  | bytes_processed += other.bytes_processed; | 
|  | items_processed += other.items_processed; | 
|  | } | 
|  | }; | 
|  |  | 
|  | namespace internal { | 
|  |  | 
|  | // Information kept per benchmark we may want to run | 
|  | struct Benchmark::Instance { | 
|  | Instance() | 
|  | : bm(nullptr), | 
|  | threads(1), | 
|  | rangeXset(false), | 
|  | rangeX(kNoRange), | 
|  | rangeYset(false), | 
|  | rangeY(kNoRange) {} | 
|  |  | 
|  | std::string name; | 
|  | Benchmark* bm; | 
|  | int threads;  // Number of concurrent threads to use | 
|  |  | 
|  | bool rangeXset; | 
|  | int rangeX; | 
|  | bool rangeYset; | 
|  | int rangeY; | 
|  |  | 
|  | bool multithreaded() const { return !bm->thread_counts_.empty(); } | 
|  | }; | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | struct State::SharedState { | 
|  | const internal::Benchmark::Instance* instance; | 
|  | std::mutex mu; | 
|  | std::condition_variable cond; | 
|  | int starting;  // Number of threads that have entered STARTING state | 
|  | int stopping;  // Number of threads that have entered STOPPING state | 
|  | int exited;    // Number of threads that have complete exited | 
|  | int threads;   // Number of total threads that are running concurrently | 
|  | ThreadStats stats; | 
|  | std::vector<BenchmarkReporter::Run> runs;  // accumulated runs | 
|  | std::string label; | 
|  |  | 
|  | explicit SharedState(const internal::Benchmark::Instance* b) | 
|  | : instance(b), | 
|  | starting(0), | 
|  | stopping(0), | 
|  | exited(0), | 
|  | threads(b == nullptr ? 1 : b->threads) { } | 
|  |  | 
|  | BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SharedState); | 
|  | }; | 
|  |  | 
|  | namespace internal { | 
|  |  | 
|  | Benchmark::Benchmark(const char* name, BenchmarkFunction f) | 
|  | : name_(name), function_(f) { | 
|  | registration_index_ = BenchmarkFamilies::GetInstance()->AddBenchmark(this); | 
|  | } | 
|  |  | 
|  | Benchmark::~Benchmark() { | 
|  | BenchmarkFamilies::GetInstance()->RemoveBenchmark(registration_index_); | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::Arg(int x) { | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | rangeX_.push_back(x); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::Range(int start, int limit) { | 
|  | std::vector<int> arglist; | 
|  | AddRange(&arglist, start, limit, kRangeMultiplier); | 
|  |  | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | for (size_t i = 0; i < arglist.size(); ++i) rangeX_.push_back(arglist[i]); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::DenseRange(int start, int limit) { | 
|  | CHECK_GE(start, 0); | 
|  | CHECK_LE(start, limit); | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | for (int arg = start; arg <= limit; ++arg) rangeX_.push_back(arg); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::ArgPair(int x, int y) { | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | rangeX_.push_back(x); | 
|  | rangeY_.push_back(y); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) { | 
|  | std::vector<int> arglist1, arglist2; | 
|  | AddRange(&arglist1, lo1, hi1, kRangeMultiplier); | 
|  | AddRange(&arglist2, lo2, hi2, kRangeMultiplier); | 
|  |  | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | rangeX_.resize(arglist1.size()); | 
|  | std::copy(arglist1.begin(), arglist1.end(), rangeX_.begin()); | 
|  | rangeY_.resize(arglist2.size()); | 
|  | std::copy(arglist2.begin(), arglist2.end(), rangeY_.begin()); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { | 
|  | custom_arguments(this); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::Threads(int t) { | 
|  | CHECK_GT(t, 0); | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | thread_counts_.push_back(t); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { | 
|  | CHECK_GT(min_threads, 0); | 
|  | CHECK_GE(max_threads, min_threads); | 
|  |  | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | AddRange(&thread_counts_, min_threads, max_threads, 2); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | Benchmark* Benchmark::ThreadPerCpu() { | 
|  | std::lock_guard<std::mutex> l(mutex_); | 
|  | thread_counts_.push_back(NumCPUs()); | 
|  | return this; | 
|  | } | 
|  |  | 
|  | void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) { | 
|  | CHECK_GE(lo, 0); | 
|  | CHECK_GE(hi, lo); | 
|  |  | 
|  | // Add "lo" | 
|  | dst->push_back(lo); | 
|  |  | 
|  | // Now space out the benchmarks in multiples of "mult" | 
|  | for (int32_t i = 1; i < std::numeric_limits<int32_t>::max() / mult; | 
|  | i *= mult) { | 
|  | if (i >= hi) break; | 
|  | if (i > lo) dst->push_back(i); | 
|  | } | 
|  | // Add "hi" (if different from "lo") | 
|  | if (hi != lo) dst->push_back(hi); | 
|  | } | 
|  |  | 
|  | std::vector<Benchmark::Instance> Benchmark::CreateBenchmarkInstances( | 
|  | size_t rangeXindex, size_t rangeYindex) { | 
|  | // Special list of thread counts to use when none are specified | 
|  | std::vector<int> one_thread; | 
|  | one_thread.push_back(1); | 
|  |  | 
|  | std::vector<Benchmark::Instance> instances; | 
|  |  | 
|  | const bool is_multithreaded = (!thread_counts_.empty()); | 
|  | const std::vector<int>& thread_counts = | 
|  | (is_multithreaded ? thread_counts_ : one_thread); | 
|  | for (int num_threads : thread_counts) { | 
|  | Instance instance; | 
|  | instance.name = name_; | 
|  | instance.bm = this; | 
|  | instance.threads = num_threads; | 
|  |  | 
|  | if (rangeXindex != kNoRangeIndex) { | 
|  | instance.rangeX = rangeX_[rangeXindex]; | 
|  | instance.rangeXset = true; | 
|  | AppendHumanReadable(instance.rangeX, &instance.name); | 
|  | } | 
|  | if (rangeYindex != kNoRangeIndex) { | 
|  | instance.rangeY = rangeY_[rangeYindex]; | 
|  | instance.rangeYset = true; | 
|  | AppendHumanReadable(instance.rangeY, &instance.name); | 
|  | } | 
|  |  | 
|  | // Add the number of threads used to the name | 
|  | if (is_multithreaded) { | 
|  | std::stringstream ss; | 
|  | ss << "/threads:" << instance.threads; | 
|  | instance.name += ss.str(); | 
|  | } | 
|  |  | 
|  | instances.push_back(instance); | 
|  | } | 
|  |  | 
|  | return instances; | 
|  | } | 
|  |  | 
|  | void Benchmark::MeasureOverhead() { | 
|  | State::FastClock clock(State::FastClock::CPU_TIME); | 
|  | State::SharedState state(nullptr); | 
|  | State runner(&clock, &state, 0); | 
|  | while (runner.KeepRunning()) { | 
|  | } | 
|  | overhead = state.runs[0].real_accumulated_time / | 
|  | static_cast<double>(state.runs[0].iterations); | 
|  | VLOG(1) << "Per-iteration overhead for doing nothing: " << overhead << "\n"; | 
|  | } | 
|  |  | 
|  | void Benchmark::RunInstance(const Instance& b, const BenchmarkReporter* br) { | 
|  | use_real_time = false; | 
|  | running_benchmark = true; | 
|  | // get_memory_usage = FLAGS_gbenchmark_memory_usage; | 
|  | State::FastClock clock(State::FastClock::CPU_TIME); | 
|  |  | 
|  | // Initialize the test runners. | 
|  | State::SharedState state(&b); | 
|  | { | 
|  | std::vector<std::unique_ptr<State>> runners; | 
|  | for (int i = 0; i < b.threads; ++i) | 
|  | runners.push_back(std::unique_ptr<State>(new State(&clock, &state, i))); | 
|  |  | 
|  | // Run them all. | 
|  | for (int i = 0; i < b.threads; ++i) { | 
|  | if (b.multithreaded()) | 
|  | runners[i]->RunAsThread(); | 
|  | else | 
|  | runners[i]->Run(); | 
|  | } | 
|  | if (b.multithreaded()) { | 
|  | for (int i = 0; i < b.threads; ++i) runners[i]->Wait(); | 
|  | } | 
|  | } | 
|  | /* | 
|  | double mem_usage = 0; | 
|  | if (get_memory_usage) { | 
|  | // Measure memory usage | 
|  | Notification mem_done; | 
|  | BenchmarkRun mem_run; | 
|  | BenchmarkRun::SharedState mem_shared(&b, 1); | 
|  | mem_run.Init(&clock, &mem_shared, 0); | 
|  | { | 
|  | testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY); | 
|  | benchmark_mc = &mc; | 
|  | mem_run.Run(&mem_done); | 
|  | mem_done.WaitForNotification(); | 
|  | benchmark_mc = NULL; | 
|  | mem_usage = mc.PeakHeapGrowth(); | 
|  | } | 
|  | } | 
|  | */ | 
|  | running_benchmark = false; | 
|  |  | 
|  | for (BenchmarkReporter::Run& report : state.runs) { | 
|  | double seconds = (use_real_time ? report.real_accumulated_time | 
|  | : report.cpu_accumulated_time); | 
|  | report.benchmark_name = b.name; | 
|  | report.report_label = state.label; | 
|  | report.bytes_per_second = state.stats.bytes_processed / seconds; | 
|  | report.items_per_second = state.stats.items_processed / seconds; | 
|  | report.max_heapbytes_used = MeasurePeakHeapMemory(b); | 
|  | } | 
|  |  | 
|  | br->ReportRuns(state.runs); | 
|  | } | 
|  |  | 
|  | // Run the specified benchmark, measure its peak memory usage, and | 
|  | // return the peak memory usage. | 
|  | double Benchmark::MeasurePeakHeapMemory(const Instance&) { | 
|  | if (!get_memory_usage) return 0.0; | 
|  | double bytes = 0.0; | 
|  | /*  TODO(dominich) | 
|  | // Should we do multi-threaded runs? | 
|  | const int num_threads = 1; | 
|  | const int num_iters = 1; | 
|  | { | 
|  | //    internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY); | 
|  | running_benchmark = true; | 
|  | timer_manager = new TimerManager(1, NULL); | 
|  | //    benchmark_mc = &mc; | 
|  | timer_manager->StartTimer(); | 
|  |  | 
|  | b.Run(num_iters); | 
|  |  | 
|  | running_benchmark = false; | 
|  | delete timer_manager; | 
|  | timer_manager = NULL; | 
|  | //    benchmark_mc = NULL; | 
|  | //    bytes = mc.PeakHeapGrowth(); | 
|  | } | 
|  | */ | 
|  | return bytes; | 
|  | } | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | State::State(FastClock* clock, SharedState* s, int t) | 
|  | : thread_index(t), | 
|  | state_(STATE_INITIAL), | 
|  | clock_(clock), | 
|  | shared_(s), | 
|  | iterations_(0), | 
|  | start_cpu_(0.0), | 
|  | start_time_(0.0), | 
|  | stop_time_micros_(0.0), | 
|  | start_pause_cpu_(0.0), | 
|  | pause_cpu_time_(0.0), | 
|  | start_pause_real_(0.0), | 
|  | pause_real_time_(0.0), | 
|  | total_iterations_(0), | 
|  | interval_micros_(static_cast<int64_t>(kNumMicrosPerSecond * | 
|  | FLAGS_benchmark_min_time / | 
|  | FLAGS_benchmark_repetitions)), | 
|  | is_continuation_(false), | 
|  | stats_(new ThreadStats()) { | 
|  | CHECK(clock != nullptr); | 
|  | CHECK(s != nullptr); | 
|  | } | 
|  |  | 
|  | bool State::KeepRunning() { | 
|  | // Fast path | 
|  | if ((FLAGS_benchmark_iterations == 0 && | 
|  | !clock_->HasReached(stop_time_micros_ + | 
|  | kNumMicrosPerSecond * pause_real_time_)) || | 
|  | iterations_ < FLAGS_benchmark_iterations) { | 
|  | ++iterations_; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // To block thread 0 until all other threads exit, we have a signal exit | 
|  | // point for KeepRunning() to return false.  The fast path above always | 
|  | // returns true. | 
|  | bool ret = false; | 
|  | switch (state_) { | 
|  | case STATE_INITIAL: | 
|  | ret = StartRunning(); | 
|  | break; | 
|  | case STATE_STARTING: | 
|  | CHECK(false); | 
|  | ret = true; | 
|  | break; | 
|  | case STATE_RUNNING: | 
|  | ret = FinishInterval(); | 
|  | break; | 
|  | case STATE_STOPPING: | 
|  | ret = MaybeStop(); | 
|  | break; | 
|  | case STATE_STOPPED: | 
|  | CHECK(false); | 
|  | ret = true; | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (!ret && shared_->threads > 1 && thread_index == 0){ | 
|  | std::unique_lock<std::mutex> l(shared_->mu); | 
|  |  | 
|  | // Block until all other threads have exited.  We can then safely cleanup | 
|  | // without other threads continuing to access shared variables inside the | 
|  | // user-provided run function. | 
|  | while (shared_->exited < shared_->threads - 1) { | 
|  | shared_->cond.wait(l); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ret) { | 
|  | ++iterations_; | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | void State::PauseTiming() { | 
|  | start_pause_cpu_ = MyCPUUsage() + ChildrenCPUUsage(); | 
|  | start_pause_real_ = walltime::Now(); | 
|  | } | 
|  |  | 
|  | void State::ResumeTiming() { | 
|  | pause_cpu_time_ += MyCPUUsage() + ChildrenCPUUsage() - start_pause_cpu_; | 
|  | pause_real_time_ += walltime::Now() - start_pause_real_; | 
|  | } | 
|  |  | 
|  | void State::SetBytesProcessed(int64_t bytes) { | 
|  | CHECK_EQ(STATE_STOPPED, state_); | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | stats_->bytes_processed = bytes; | 
|  | } | 
|  |  | 
|  | void State::SetItemsProcessed(int64_t items) { | 
|  | CHECK_EQ(STATE_STOPPED, state_); | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | stats_->items_processed = items; | 
|  | } | 
|  |  | 
|  | void State::SetLabel(const std::string& label) { | 
|  | CHECK_EQ(STATE_STOPPED, state_); | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | shared_->label = label; | 
|  | } | 
|  |  | 
|  | int State::range_x() const { | 
|  | CHECK(shared_->instance->rangeXset); | 
|  | /* | 
|  | << | 
|  | "Failed to get range_x as it was not set. Did you register your " | 
|  | "benchmark with a range parameter?"; | 
|  | */ | 
|  | return shared_->instance->rangeX; | 
|  | } | 
|  |  | 
|  | int State::range_y() const { | 
|  | CHECK(shared_->instance->rangeYset); | 
|  | /* << | 
|  | "Failed to get range_y as it was not set. Did you register your " | 
|  | "benchmark with a range parameter?"; | 
|  | */ | 
|  | return shared_->instance->rangeY; | 
|  | } | 
|  |  | 
|  | bool State::StartRunning() { | 
|  | bool last_thread = false; | 
|  | { | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | CHECK_EQ(state_, STATE_INITIAL); | 
|  | state_ = STATE_STARTING; | 
|  | is_continuation_ = false; | 
|  | CHECK_LT(shared_->starting, shared_->threads); | 
|  | ++shared_->starting; | 
|  | last_thread = shared_->starting == shared_->threads; | 
|  | } | 
|  |  | 
|  | if (last_thread) { | 
|  | clock_->InitType(use_real_time ? FastClock::REAL_TIME | 
|  | : FastClock::CPU_TIME); | 
|  | { | 
|  | std::lock_guard<std::mutex> l(starting_mutex); | 
|  | starting_cv.notify_all(); | 
|  | } | 
|  | } else { | 
|  | std::unique_lock<std::mutex> l(starting_mutex); | 
|  | starting_cv.wait(l); | 
|  | } | 
|  | CHECK_EQ(state_, STATE_STARTING); | 
|  | state_ = STATE_RUNNING; | 
|  |  | 
|  | NewInterval(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void State::NewInterval() { | 
|  | stop_time_micros_ = clock_->NowMicros() + interval_micros_; | 
|  | if (!is_continuation_) { | 
|  | VLOG(1) << "Starting new interval; stopping in " << interval_micros_ | 
|  | << "\n"; | 
|  | iterations_ = 0; | 
|  | pause_cpu_time_ = 0; | 
|  | pause_real_time_ = 0; | 
|  | start_cpu_ = MyCPUUsage() + ChildrenCPUUsage(); | 
|  | start_time_ = walltime::Now(); | 
|  | } else { | 
|  | VLOG(1) << "Continuing interval; stopping in " << interval_micros_ | 
|  | << "\n"; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool State::FinishInterval() { | 
|  | if ((FLAGS_benchmark_iterations != 0 && | 
|  | iterations_ < | 
|  | FLAGS_benchmark_iterations / FLAGS_benchmark_repetitions) || | 
|  | iterations_ < 1) { | 
|  | interval_micros_ *= 2; | 
|  | VLOG(1) << "Not enough iterations in interval; " | 
|  | << "Trying again for " << interval_micros_ << " useconds.\n"; | 
|  | is_continuation_ = false; | 
|  | NewInterval(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | BenchmarkReporter::Run data; | 
|  | data.iterations = iterations_; | 
|  | data.thread_index = thread_index; | 
|  |  | 
|  | const double accumulated_time = walltime::Now() - start_time_; | 
|  | const double total_overhead = overhead * iterations_; | 
|  | CHECK_LT(pause_real_time_, accumulated_time); | 
|  | CHECK_LT(pause_real_time_ + total_overhead, accumulated_time); | 
|  | data.real_accumulated_time = | 
|  | accumulated_time - (pause_real_time_ + total_overhead); | 
|  | data.cpu_accumulated_time = (MyCPUUsage() + ChildrenCPUUsage()) - | 
|  | (pause_cpu_time_ + start_cpu_); | 
|  | total_iterations_ += iterations_; | 
|  |  | 
|  | bool keep_going = false; | 
|  | { | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  |  | 
|  | // Either replace the last or add a new data point. | 
|  | if (is_continuation_) | 
|  | shared_->runs.back() = data; | 
|  | else | 
|  | shared_->runs.push_back(data); | 
|  |  | 
|  | if (FLAGS_benchmark_iterations != 0) { | 
|  | // If we need more iterations, run another interval as a continuation. | 
|  | keep_going = total_iterations_ < FLAGS_benchmark_iterations; | 
|  | is_continuation_ = keep_going; | 
|  | } else { | 
|  | // If this is a repetition, run another interval as a new data point. | 
|  | keep_going = shared_->runs.size() < | 
|  | static_cast<size_t>(FLAGS_benchmark_repetitions); | 
|  | is_continuation_ = !keep_going; | 
|  | } | 
|  |  | 
|  | if (!keep_going) { | 
|  | ++shared_->stopping; | 
|  | if (shared_->stopping < shared_->threads) { | 
|  | // Other threads are still running, so continue running but without | 
|  | // timing to present an expected background load to the other threads. | 
|  | state_ = STATE_STOPPING; | 
|  | keep_going = true; | 
|  | } else { | 
|  | state_ = STATE_STOPPED; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (state_ == STATE_RUNNING) NewInterval(); | 
|  | return keep_going; | 
|  | } | 
|  |  | 
|  | bool State::MaybeStop() { | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | if (shared_->stopping < shared_->threads) { | 
|  | CHECK_EQ(state_, STATE_STOPPING); | 
|  | return true; | 
|  | } | 
|  | state_ = STATE_STOPPED; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void State::Run() { | 
|  | stats_->Reset(); | 
|  | shared_->instance->bm->function_(*this); | 
|  | { | 
|  | std::lock_guard<std::mutex> l(shared_->mu); | 
|  | shared_->stats.Add(*stats_); | 
|  | } | 
|  | } | 
|  |  | 
|  | void State::RunAsThread() { | 
|  | thread_ = std::thread(State::RunWrapper, this); | 
|  | } | 
|  |  | 
|  | void State::Wait() { | 
|  | if (thread_.joinable()) { | 
|  | thread_.join(); | 
|  | } | 
|  | } | 
|  |  | 
|  | // static | 
|  | void* State::RunWrapper(void* arg) { | 
|  | State* that = (State*)arg; | 
|  | CHECK(that != nullptr); | 
|  | that->Run(); | 
|  |  | 
|  | std::lock_guard<std::mutex> l(that->shared_->mu); | 
|  |  | 
|  | that->shared_->exited++; | 
|  | if (that->thread_index > 0 && | 
|  | that->shared_->exited == that->shared_->threads - 1) { | 
|  | // All threads but thread 0 have exited the user-provided run function. | 
|  | // Thread 0 can now wake up and exit. | 
|  | that->shared_->cond.notify_one(); | 
|  | } | 
|  |  | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | namespace internal { | 
|  |  | 
|  | void RunMatchingBenchmarks(const std::string& spec, | 
|  | const BenchmarkReporter* reporter) { | 
|  | if (spec.empty()) return; | 
|  |  | 
|  | std::vector<internal::Benchmark::Instance> benchmarks; | 
|  | BenchmarkFamilies::GetInstance()->FindBenchmarks(spec, &benchmarks); | 
|  |  | 
|  | // Determine the width of the name field using a minimum width of 10. | 
|  | // Also determine max number of threads needed. | 
|  | size_t name_field_width = 10; | 
|  | for (const internal::Benchmark::Instance& benchmark : benchmarks) { | 
|  | // Add width for _stddev and threads:XX | 
|  | if (benchmark.threads > 1 && FLAGS_benchmark_repetitions > 1) { | 
|  | name_field_width = | 
|  | std::max<size_t>(name_field_width, benchmark.name.size() + 17); | 
|  | } else if (benchmark.threads > 1) { | 
|  | name_field_width = | 
|  | std::max<size_t>(name_field_width, benchmark.name.size() + 10); | 
|  | } else if (FLAGS_benchmark_repetitions > 1) { | 
|  | name_field_width = | 
|  | std::max<size_t>(name_field_width, benchmark.name.size() + 7); | 
|  | } else { | 
|  | name_field_width = | 
|  | std::max<size_t>(name_field_width, benchmark.name.size()); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Print header here | 
|  | BenchmarkReporter::Context context; | 
|  | context.num_cpus = NumCPUs(); | 
|  | context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f; | 
|  | //  context.cpu_info = base::CompactCPUIDInfoString(); | 
|  | context.cpu_scaling_enabled = CpuScalingEnabled(); | 
|  | context.name_field_width = name_field_width; | 
|  |  | 
|  | if (reporter->ReportContext(context)) | 
|  | for (internal::Benchmark::Instance& benchmark : benchmarks) | 
|  | Benchmark::RunInstance(benchmark, reporter); | 
|  | } | 
|  |  | 
|  | void FindMatchingBenchmarkNames(const std::string& spec, | 
|  | std::vector<std::string>* benchmark_names) { | 
|  | if (spec.empty()) return; | 
|  |  | 
|  | std::vector<internal::Benchmark::Instance> benchmarks; | 
|  | BenchmarkFamilies::GetInstance()->FindBenchmarks(spec, &benchmarks); | 
|  | std::transform(benchmarks.begin(), benchmarks.end(), benchmark_names->begin(), | 
|  | [](const internal::Benchmark::Instance& b) { return b.name; }); | 
|  | } | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | void RunSpecifiedBenchmarks(const BenchmarkReporter* reporter /*= nullptr*/) { | 
|  | std::string spec = FLAGS_benchmark_filter; | 
|  | if (spec.empty() || spec == "all") | 
|  | spec = ".";  // Regexp that matches all benchmarks | 
|  | internal::ConsoleReporter default_reporter; | 
|  | internal::RunMatchingBenchmarks( | 
|  | spec, reporter == nullptr ? &default_reporter : reporter); | 
|  | } | 
|  |  | 
|  | void UseRealTime() { use_real_time = true; } | 
|  |  | 
|  | void Initialize(int* argc, const char** argv) { | 
|  | internal::ParseCommandLineFlags(argc, argv); | 
|  | internal::SetLogLevel(FLAGS_v); | 
|  | // Ensure walltime is initialized by a single thread by forcing the | 
|  | // initialization. | 
|  | walltime::Now(); | 
|  | internal::Benchmark::MeasureOverhead(); | 
|  | } | 
|  |  | 
|  | }  // end namespace benchmark |