blob: 01f2597374484b31b8f74f03ab4179ace204bfa6 [file] [log] [blame]
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
// Copyright (c) 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32)
# define PLATFORM_WINDOWS 1
#endif
#include <ctype.h> // for isspace()
#include <fcntl.h> // for open()
#ifdef HAVE_UNISTD_H
#include <unistd.h> // for read()
#endif
#if defined __MACH__ // Mac OS X, almost certainly
#include <sys/sysctl.h> // how we figure out numcpu's on OS X
#include <sys/types.h>
#elif defined __FreeBSD__
#include <sys/sysctl.h>
#elif defined __sun__ // Solaris
#include <procfs.h> // for, e.g., prmap_t
#elif defined(PLATFORM_WINDOWS)
#include <process.h> // for getpid() (actually, _getpid())
#include <shlwapi.h> // for SHGetValueA()
#include <tlhelp32.h> // for Module32First()
#endif
#include "gutil/sysinfo.h"
#include <algorithm>
#include <cerrno> // for errno
#include <cstdio> // for snprintf(), sscanf()
#include <cstdlib> // for getenv()
#include <cstring> // for memmove(), memchr(), etc.
#include <ctime>
#include <limits>
#include <ostream>
#include "gutil/dynamic_annotations.h" // for RunningOnValgrind
#include "gutil/macros.h"
#include "gutil/walltime.h"
#include <common/logging.h>
using std::numeric_limits;
// This isn't in the 'base' namespace in tcmallc. But, tcmalloc
// exports these functions, so we need to namespace them to avoid
// the conflict.
namespace base {
// ----------------------------------------------------------------------
// CyclesPerSecond()
// NumCPUs()
// It's important this not call malloc! -- they may be called at
// global-construct time, before we've set up all our proper malloc
// hooks and such.
// ----------------------------------------------------------------------
static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous
static int cpuinfo_num_cpus = 1; // Conservative guess
static int cpuinfo_max_cpu_index = -1;
void SleepForNanoseconds(int64_t nanoseconds) {
// Sleep for nanosecond duration
struct timespec sleep_time;
sleep_time.tv_sec = nanoseconds / 1000 / 1000 / 1000;
sleep_time.tv_nsec = (nanoseconds % (1000 * 1000 * 1000));
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
; // Ignore signals and wait for the full interval to elapse.
}
void SleepForMilliseconds(int64_t milliseconds) {
SleepForNanoseconds(milliseconds * 1000 * 1000);
}
// Helper function estimates cycles/sec by observing cycles elapsed during
// sleep(). Using small sleep time decreases accuracy significantly.
static int64 EstimateCyclesPerSecond(const int estimate_time_ms) {
CHECK(estimate_time_ms > 0);
if (estimate_time_ms <= 0)
return 1;
double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much
const int64 start_ticks = CycleClock::Now();
SleepForMilliseconds(estimate_time_ms);
const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks));
return guess;
}
// ReadIntFromFile is only called on linux and cygwin platforms.
#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
// Slurp a file with a single read() call into 'buf'. This is only safe to use on small
// files in places like /proc where we are guaranteed not to get a partial read.
// Any remaining bytes in the buffer are zeroed.
//
// 'buflen' must be more than large enough to hold the whole file, or else this will
// issue a FATAL error.
static bool SlurpSmallTextFile(const char* file, char* buf, int buflen) {
bool ret = false;
int fd;
RETRY_ON_EINTR(fd, open(file, O_RDONLY));
if (fd == -1) return ret;
memset(buf, '\0', buflen);
int n;
RETRY_ON_EINTR(n, read(fd, buf, buflen - 1));
CHECK_NE(n, buflen - 1) << "buffer of len " << buflen << " not large enough to store "
<< "contents of " << file;
if (n > 0) {
ret = true;
}
int close_ret;
RETRY_ON_EINTR(close_ret, close(fd));
if (PREDICT_FALSE(close_ret != 0)) {
PLOG(WARNING) << "Failed to close fd " << fd;
}
return ret;
}
// Helper function for reading an int from a file. Returns true if successful
// and the memory location pointed to by value is set to the value read.
static bool ReadIntFromFile(const char *file, int *value) {
char line[1024];
if (!SlurpSmallTextFile(file, line, arraysize(line))) {
return false;
}
char* err;
const int temp_value = strtol(line, &err, 10);
if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
*value = temp_value;
return true;
}
return false;
}
static int ReadMaxCPUIndex() {
char buf[1024];
// TODO(tarmstrong): KUDU-2730: 'present' doesn't include CPUs that could be hotplugged
// in the future. 'possible' does, but using it instead could result in a blow-up in the
// number of per-CPU data structures.
CHECK(SlurpSmallTextFile("/sys/devices/system/cpu/present", buf, arraysize(buf)));
int max_idx = ParseMaxCpuIndex(buf);
CHECK_GE(max_idx, 0) << "unable to parse max CPU index from: " << buf;
return max_idx;
}
int ParseMaxCpuIndex(const char* str) {
DCHECK(str != nullptr);
const char* pos = str;
// Initialize max_idx to invalid so we can just return if we find zero ranges.
int max_idx = -1;
while (true) {
const char* range_start = pos;
const char* dash = nullptr;
// Scan forward until we find the separator indicating end of range, which is always a
// newline or comma if the input is valid.
for (; *pos != ',' && *pos != '\n'; pos++) {
// Check for early end of string - bail here to avoid advancing past end.
if (*pos == '\0') return -1;
if (*pos == '-') {
// Multiple dashes in range is invalid.
if (dash != nullptr) return -1;
dash = pos;
} else if (!isdigit(*pos)) {
return -1;
}
}
// At this point we found a range [range_start, pos) comprised of digits and an
// optional dash.
const char* num_start = dash == nullptr ? range_start : dash + 1;
// Check for ranges with missing numbers, e.g. "", "3-", "-3".
if (num_start == pos || dash == range_start) return -1;
// The numbers are comprised only of digits, so it can only fail if it is out of
// range of int (the return type of this function).
unsigned long start_idx = strtoul(range_start, nullptr, 10);
if (start_idx > numeric_limits<int>::max()) return -1;
unsigned long end_idx = strtoul(num_start, nullptr, 10);
if (end_idx > numeric_limits<int>::max() || start_idx > end_idx) {
return -1;
}
// Keep track of the max index we've seen so far.
max_idx = std::max(static_cast<int>(end_idx), max_idx);
// End of line, expect no more input.
if (*pos == '\n') break;
++pos;
}
// String must have a single newline at the very end.
if (*pos != '\n' || *(pos + 1) != '\0') return -1;
return max_idx;
}
#endif
// WARNING: logging calls back to InitializeSystemInfo() so it must
// not invoke any logging code. Also, InitializeSystemInfo() can be
// called before main() -- in fact it *must* be since already_called
// isn't protected -- before malloc hooks are properly set up, so
// we make an effort not to call any routines which might allocate
// memory.
static void InitializeSystemInfo() {
static bool already_called = false; // safe if we run before threads
if (already_called) return;
already_called = true;
bool saw_mhz = false;
if (RunningOnValgrind()) {
// Valgrind may slow the progress of time artificially (--scale-time=N
// option). We thus can't rely on CPU Mhz info stored in /sys or /proc
// files. Thus, actually measure the cps.
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100);
saw_mhz = true;
}
#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
char line[1024];
char* err;
int freq;
// If the kernel is exporting the tsc frequency use that. There are issues
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
// exporintg an invalid p-state (on x86) or p-states may be used to put the
// processor in a new mode (turbo mode). Essentially, those frequencies
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
// well.
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// If CPU scaling is in effect, we want to use the *maximum* frequency,
// not whatever CPU speed some random processor happens to be using now.
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz. For example, on a 2GHz machine, the file
// contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
const char* pname = "/proc/cpuinfo";
int fd;
RETRY_ON_EINTR(fd, open(pname, O_RDONLY));
if (fd == -1) {
PLOG(FATAL) << "Unable to read CPU info from /proc. procfs must be mounted.";
}
double bogo_clock = 1.0;
bool saw_bogo = false;
int num_cpus = 0;
line[0] = line[1] = '\0';
int chars_read = 0;
do { // we'll exit when the last read didn't read anything
// Move the next line to the beginning of the buffer
const int oldlinelen = strlen(line);
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
line[0] = '\0';
else // still other lines left to save
memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
// Terminate the new line, reading more if we can't find the newline
char* newline = strchr(line, '\n');
if (newline == NULL) {
const int linelen = strlen(line);
const int bytes_to_read = sizeof(line)-1 - linelen;
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
RETRY_ON_EINTR(chars_read, read(fd, line + linelen, bytes_to_read));
line[linelen + chars_read] = '\0';
newline = strchr(line, '\n');
}
if (newline != NULL)
*newline = '\0';
#if defined(__powerpc__) || defined(__ppc__)
// PowerPC cpus report the frequency in "clock" line
if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
// PowerPC frequencies are only reported as MHz (check 'show_cpuinfo'
// function at arch/powerpc/kernel/setup-common.c)
char *endp = strstr(line, "MHz");
if (endp) {
*endp = 0;
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
saw_mhz = true;
}
}
#else
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept postive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
saw_mhz = true;
}
} else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
saw_bogo = true;
}
#endif
} else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
num_cpus++; // count up every time we see an "processor :" entry
}
} while (chars_read > 0);
int ret;
RETRY_ON_EINTR(ret, close(fd));
if (PREDICT_FALSE(ret != 0)) {
PLOG(WARNING) << "Failed to close fd " << fd;
}
if (!saw_mhz) {
if (saw_bogo) {
// If we didn't find anything better, we'll use bogomips, but
// we're not happy about it.
cpuinfo_cycles_per_second = bogo_clock;
} else {
// If we don't even have bogomips, we'll use the slow estimation.
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
}
}
if (cpuinfo_cycles_per_second == 0.0) {
cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe
}
if (num_cpus > 0) {
cpuinfo_num_cpus = num_cpus;
}
cpuinfo_max_cpu_index = ReadMaxCPUIndex();
#elif defined __FreeBSD__
// For this sysctl to work, the machine must be configured without
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
// and later. Before that, it's a 32-bit quantity (and gives the
// wrong answer on machines faster than 2^32 Hz). See
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
// But also compare FreeBSD 7.0:
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
// To FreeBSD 6.3 (it's the same in 6-STABLE):
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
#if __FreeBSD__ >= 7
uint64_t hz = 0;
#else
unsigned int hz = 0;
#endif
size_t sz = sizeof(hz);
const char *sysctl_path = "machdep.tsc_freq";
if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
sysctl_path, strerror(errno));
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
} else {
cpuinfo_cycles_per_second = hz;
}
// TODO(csilvers): also figure out cpuinfo_num_cpus
#elif defined(PLATFORM_WINDOWS)
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
OSVERSIONINFO os;
os.dwOSVersionInfoSize = sizeof(os);
DWORD data, data_size = sizeof(data);
if (GetVersionEx(&os) &&
os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", NULL, &data, &data_size)))
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
else
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
// Get the number of processors.
SYSTEM_INFO info;
GetSystemInfo(&info);
cpuinfo_num_cpus = info.dwNumberOfProcessors;
#elif defined(__MACH__) && defined(__APPLE__)
// returning "mach time units" per second. the current number of elapsed
// mach time units can be found by calling uint64 mach_absolute_time();
// while not as precise as actual CPU cycles, it is accurate in the face
// of CPU frequency scaling and multi-cpu/core machines.
// Our mac users have these types of machines, and accuracy
// (i.e. correctness) trumps precision.
// See cycleclock.h: CycleClock::Now(), which returns number of mach time
// units on Mac OS X.
mach_timebase_info_data_t timebase_info;
mach_timebase_info(&timebase_info);
double mach_time_units_per_nanosecond =
static_cast<double>(timebase_info.denom) /
static_cast<double>(timebase_info.numer);
cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
int num_cpus = 0;
size_t size = sizeof(num_cpus);
int numcpus_name[] = { CTL_HW, HW_NCPU };
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0)
== 0
&& (size == sizeof(num_cpus)))
cpuinfo_num_cpus = num_cpus;
#else
// Generic cycles per second counter
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
#endif
// On platforms where we can't determine the max CPU index, just use the
// number of CPUs. This might break if CPUs are taken offline, but
// better than a wild guess.
if (cpuinfo_max_cpu_index < 0) {
cpuinfo_max_cpu_index = cpuinfo_num_cpus - 1;
}
}
double CyclesPerSecond(void) {
InitializeSystemInfo();
return cpuinfo_cycles_per_second;
}
int NumCPUs(void) {
InitializeSystemInfo();
return cpuinfo_num_cpus;
}
int MaxCPUIndex(void) {
InitializeSystemInfo();
return cpuinfo_max_cpu_index;
}
} // namespace base