blob: c7f7d1952fc3d59246524e8197891a6cd1db41f6 [file] [log] [blame]
// Copyright 2010 Google Inc. All Rights Reserved.
// Refactored from contributions of various authors in strings/strutil.cc
//
// This file contains string processing functions related to
// numeric values.
#include "gutil/strings/numbers.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <float.h> // for DBL_DIG and FLT_DIG
#include <math.h> // for HUGE_VAL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits>
using std::numeric_limits;
#include <string>
using std::string;
#include "gutil/int128.h"
#include "gutil/integral_types.h"
#include <common/logging.h>
#include "gutil/logging-inl.h"
#include "gutil/gscoped_ptr.h"
#include "gutil/stringprintf.h"
#include "gutil/strtoint.h"
#include "gutil/strings/ascii_ctype.h"
// Reads a <double> in *text, which may not be whitespace-initiated.
// *len is the length, or -1 if text is '\0'-terminated, which is more
// efficient. Sets *text to the end of the double, and val to the
// converted value, and the length of the double is subtracted from
// *len. <double> may also be a '?', in which case val will be
// unchanged. Returns true upon success. If initial_minus is
// non-NULL, then *initial_minus will indicate whether the first
// symbol seen was a '-', which will be ignored. Similarly, if
// final_period is non-NULL, then *final_period will indicate whether
// the last symbol seen was a '.', which will be ignored. This is
// useful in case that an initial '-' or final '.' would have another
// meaning (as a separator, e.g.).
static inline bool EatADouble(const char** text, int* len, bool allow_question,
double* val, bool* initial_minus,
bool* final_period) {
const char* pos = *text;
int rem = *len; // remaining length, or -1 if null-terminated
if (pos == nullptr || rem == 0)
return false;
if (allow_question && (*pos == '?')) {
*text = pos + 1;
if (rem != -1)
*len = rem - 1;
return true;
}
if (initial_minus) {
if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment.
if (rem == 1)
return false;
++pos;
if (rem != -1)
--rem;
}
}
// a double has to begin one of these (we don't allow 'inf' or whitespace)
// this also serves as an optimization.
if (!strchr("-+.0123456789", *pos))
return false;
// strtod is evil in that the second param is a non-const char**
char* end_nonconst;
double retval;
if (rem == -1) {
retval = strtod(pos, &end_nonconst);
} else {
// not '\0'-terminated & no obvious terminator found. must copy.
gscoped_array<char> buf(new char[rem + 1]);
memcpy(buf.get(), pos, rem);
buf[rem] = '\0';
retval = strtod(buf.get(), &end_nonconst);
end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
}
if (pos == end_nonconst)
return false;
if (final_period) {
*final_period = (end_nonconst[-1] == '.');
if (*final_period) {
--end_nonconst;
}
}
*text = end_nonconst;
*val = retval;
if (rem != -1)
*len = rem - (end_nonconst - pos);
return true;
}
// If update, consume one of acceptable_chars from string *text of
// length len and return that char, or '\0' otherwise. If len is -1,
// *text is null-terminated. If update is false, don't alter *text and
// *len. If null_ok, then update must be false, and, if text has no
// more chars, then return '\1' (arbitrary nonzero).
static inline char EatAChar(const char** text, int* len,
const char* acceptable_chars,
bool update, bool null_ok) {
assert(!(update && null_ok));
if ((*len == 0) || (**text == '\0'))
return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode.
if (strchr(acceptable_chars, **text)) {
char result = **text;
if (update) {
++(*text);
if (*len != -1)
--(*len);
}
return result;
}
return '\0'; // no match; no update
}
// Parse an expression in 'text' of the form: <comparator><double> or
// <double><sep><double> See full comments in header file.
bool ParseDoubleRange(const char* text, int len, const char** end,
double* from, double* to, bool* is_currency,
const DoubleRangeOptions& opts) {
const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
if (!opts.dont_modify_unbounded) {
*from = -HUGE_VAL;
*to = HUGE_VAL;
}
if (opts.allow_currency && (is_currency != nullptr))
*is_currency = false;
assert(len >= -1);
assert(opts.separators && (*opts.separators != '\0'));
// these aren't valid separators
assert(strlen(opts.separators) ==
strcspn(opts.separators, "+0123456789eE$"));
assert(opts.num_required_bounds <= 2);
// Handle easier cases of comparators (<, >) first
if (opts.allow_comparators) {
char comparator = EatAChar(&text, &len, "<>", true, false);
if (comparator) {
double* dest = (comparator == '>') ? from : to;
EatAChar(&text, &len, "=", true, false);
if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
if (is_currency != nullptr)
*is_currency = true;
if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr,
nullptr))
return false;
*end = text;
return EatAChar(&text, &len, opts.acceptable_terminators, false,
opts.null_terminator_ok);
}
}
bool seen_dollar = (opts.allow_currency &&
EatAChar(&text, &len, "$", true, false));
// If we see a '-', two things could be happening: -<to> or
// <from>... where <from> is negative. Treat initial minus sign as a
// separator if '-' is a valid separator.
// Similarly, we prepare for the possibility of seeing a '.' at the
// end of the number, in case '.' (which really means '..') is a
// separator.
bool initial_minus_sign = false;
bool final_period = false;
bool* check_initial_minus = (strchr(opts.separators, '-') && !seen_dollar
&& (opts.num_required_bounds < 2)) ?
(&initial_minus_sign) : nullptr;
bool* check_final_period = strchr(opts.separators, '.') ? (&final_period)
: nullptr;
bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers,
from, check_initial_minus, check_final_period);
// if 2 bounds required, must see a double (or '?' if allowed)
if ((opts.num_required_bounds == 2) && !double_seen) return false;
if (seen_dollar && !double_seen) {
--text;
if (len != -1)
++len;
seen_dollar = false;
}
// If we're here, we've read the first double and now expect a
// separator and another <double>.
char separator = EatAChar(&text, &len, opts.separators, true, false);
if (separator == '.') {
// seen one '.' as separator; must check for another; perhaps set seplen=2
if (EatAChar(&text, &len, ".", true, false)) {
if (final_period) {
// We may have three periods in a row. The first is part of the
// first number, the others are a separator. Policy: 234...567
// is "234." to "567", not "234" to ".567".
EatAChar(&text, &len, ".", true, false);
}
} else if (!EatAChar(&text, &len, opts.separators, true, false)) {
// just one '.' and no other separator; uneat the first '.' we saw
--text;
if (len != -1)
++len;
separator = '\0';
}
}
// By now, we've consumed whatever separator there may have been,
// and separator is true iff there was one.
if (!separator) {
if (final_period) // final period now considered part of first double
EatAChar(&text, &len, ".", true, false);
if (initial_minus_sign && double_seen) {
*to = *from;
*from = from_default;
} else if (opts.require_separator ||
(opts.num_required_bounds > 0 && !double_seen) ||
(opts.num_required_bounds > 1) ) {
return false;
}
} else {
if (initial_minus_sign && double_seen)
*from = -(*from);
// read second <double>
bool second_dollar_seen = (seen_dollar
|| (opts.allow_currency && !double_seen))
&& EatAChar(&text, &len, "$", true, false);
bool second_double_seen = EatADouble(
&text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
if (opts.num_required_bounds > double_seen + second_double_seen)
return false;
if (second_dollar_seen && !second_double_seen) {
--text;
if (len != -1)
++len;
second_dollar_seen = false;
}
seen_dollar = seen_dollar || second_dollar_seen;
}
if (seen_dollar && (is_currency != nullptr))
*is_currency = true;
// We're done. But we have to check that the next char is a proper
// terminator.
*end = text;
char terminator = EatAChar(&text, &len, opts.acceptable_terminators, false,
opts.null_terminator_ok);
if (terminator == '.')
--(*end);
return terminator;
}
// ----------------------------------------------------------------------
// ConsumeStrayLeadingZeroes
// Eliminates all leading zeroes (unless the string itself is composed
// of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
// --------------------------------------------------------------------
void ConsumeStrayLeadingZeroes(string *const str) {
const string::size_type len(str->size());
if (len > 1 && (*str)[0] == '0') {
const char
*const begin(str->c_str()),
*const end(begin + len),
*ptr(begin + 1);
while (ptr != end && *ptr == '0') {
++ptr;
}
string::size_type remove(ptr - begin);
DCHECK_GT(ptr, begin);
if (remove == len) {
--remove; // if they are all zero, leave one...
}
str->erase(0, remove);
}
}
// ----------------------------------------------------------------------
// ParseLeadingInt32Value()
// ParseLeadingUInt32Value()
// A simple parser for [u]int32 values. Returns the parsed value
// if a valid value is found; else returns deflt
// This cannot handle decimal numbers with leading 0s.
// --------------------------------------------------------------------
int32 ParseLeadingInt32Value(const char *str, int32 deflt) {
char *error = nullptr;
long value = strtol(str, &error, 0);
// Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits.
if (value > numeric_limits<int32>::max()) {
value = numeric_limits<int32>::max();
} else if (value < numeric_limits<int32>::min()) {
value = numeric_limits<int32>::min();
}
return (error == str) ? deflt : value;
}
uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) {
if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
// When long is 32 bits, we can use strtoul.
char *error = nullptr;
const uint32 value = strtoul(str, &error, 0);
return (error == str) ? deflt : value;
} else {
// When long is 64 bits, we must use strto64 and handle limits
// by hand. The reason we cannot use a 64-bit strtoul is that
// it would be impossible to differentiate "-2" (that should wrap
// around to the value UINT_MAX-1) from a string with ULONG_MAX-1
// (that should be pegged to UINT_MAX due to overflow).
char *error = nullptr;
int64 value = strto64(str, &error, 0);
if (value > numeric_limits<uint32>::max() ||
value < -static_cast<int64>(numeric_limits<uint32>::max())) {
value = numeric_limits<uint32>::max();
}
// Within these limits, truncation to 32 bits handles negatives correctly.
return (error == str) ? deflt : value;
}
}
// ----------------------------------------------------------------------
// ParseLeadingDec32Value
// ParseLeadingUDec32Value
// A simple parser for [u]int32 values. Returns the parsed value
// if a valid value is found; else returns deflt
// The string passed in is treated as *10 based*.
// This can handle strings with leading 0s.
// --------------------------------------------------------------------
int32 ParseLeadingDec32Value(const char *str, int32 deflt) {
char *error = nullptr;
long value = strtol(str, &error, 10);
// Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits.
if (value > numeric_limits<int32>::max()) {
value = numeric_limits<int32>::max();
} else if (value < numeric_limits<int32>::min()) {
value = numeric_limits<int32>::min();
}
return (error == str) ? deflt : value;
}
uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) {
if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
// When long is 32 bits, we can use strtoul.
char *error = nullptr;
const uint32 value = strtoul(str, &error, 10);
return (error == str) ? deflt : value;
} else {
// When long is 64 bits, we must use strto64 and handle limits
// by hand. The reason we cannot use a 64-bit strtoul is that
// it would be impossible to differentiate "-2" (that should wrap
// around to the value UINT_MAX-1) from a string with ULONG_MAX-1
// (that should be pegged to UINT_MAX due to overflow).
char *error = nullptr;
int64 value = strto64(str, &error, 10);
if (value > numeric_limits<uint32>::max() ||
value < -static_cast<int64>(numeric_limits<uint32>::max())) {
value = numeric_limits<uint32>::max();
}
// Within these limits, truncation to 32 bits handles negatives correctly.
return (error == str) ? deflt : value;
}
}
// ----------------------------------------------------------------------
// ParseLeadingUInt64Value
// ParseLeadingInt64Value
// ParseLeadingHex64Value
// A simple parser for 64-bit values. Returns the parsed value if a
// valid integer is found; else returns deflt
// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
// --------------------------------------------------------------------
uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
char *error = nullptr;
const uint64 value = strtou64(str, &error, 0);
return (error == str) ? deflt : value;
}
int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
char *error = nullptr;
const int64 value = strto64(str, &error, 0);
return (error == str) ? deflt : value;
}
uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
char *error = nullptr;
const uint64 value = strtou64(str, &error, 16);
return (error == str) ? deflt : value;
}
// ----------------------------------------------------------------------
// ParseLeadingDec64Value
// ParseLeadingUDec64Value
// A simple parser for [u]int64 values. Returns the parsed value
// if a valid value is found; else returns deflt
// The string passed in is treated as *10 based*.
// This can handle strings with leading 0s.
// --------------------------------------------------------------------
int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
char *error = nullptr;
const int64 value = strto64(str, &error, 10);
return (error == str) ? deflt : value;
}
uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
char *error = nullptr;
const uint64 value = strtou64(str, &error, 10);
return (error == str) ? deflt : value;
}
// ----------------------------------------------------------------------
// ParseLeadingDoubleValue()
// A simple parser for double values. Returns the parsed value
// if a valid value is found; else returns deflt
// --------------------------------------------------------------------
double ParseLeadingDoubleValue(const char *str, double deflt) {
char *error = nullptr;
errno = 0;
const double value = strtod(str, &error);
if (errno != 0 || // overflow/underflow happened
error == str) { // no valid parse
return deflt;
} else {
return value;
}
}
// ----------------------------------------------------------------------
// ParseLeadingBoolValue()
// A recognizer of boolean string values. Returns the parsed value
// if a valid value is found; else returns deflt. This skips leading
// whitespace, is case insensitive, and recognizes these forms:
// 0/1, false/true, no/yes, n/y
// --------------------------------------------------------------------
bool ParseLeadingBoolValue(const char *str, bool deflt) {
static const int kMaxLen = 5;
char value[kMaxLen + 1];
// Skip whitespace
while (ascii_isspace(*str)) {
++str;
}
int len = 0;
for (; len <= kMaxLen && ascii_isalnum(*str); ++str)
value[len++] = ascii_tolower(*str);
if (len == 0 || len > kMaxLen)
return deflt;
value[len] = '\0';
switch (len) {
case 1:
if (value[0] == '0' || value[0] == 'n')
return false;
if (value[0] == '1' || value[0] == 'y')
return true;
break;
case 2:
if (!strcmp(value, "no"))
return false;
break;
case 3:
if (!strcmp(value, "yes"))
return true;
break;
case 4:
if (!strcmp(value, "true"))
return true;
break;
case 5:
if (!strcmp(value, "false"))
return false;
break;
}
return deflt;
}
// ----------------------------------------------------------------------
// FpToString()
// FloatToString()
// IntToString()
// Convert various types to their string representation, possibly padded
// with spaces, using snprintf format specifiers.
// ----------------------------------------------------------------------
string FpToString(Fprint fp) {
char buf[17];
snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
return string(buf);
}
// Default arguments
string Uint128ToHexString(uint128 ui128) {
char buf[33];
snprintf(buf, sizeof(buf), "%016" PRIx64,
Uint128High64(ui128));
snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64,
Uint128Low64(ui128));
return string(buf);
}
namespace {
// Represents integer values of digits.
// Uses 36 to indicate an invalid character since we support
// bases up to 36.
static const int8 kAsciiToInt[256] = {
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
36, 36, 36, 36, 36, 36, 36,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 36, 36, 36, 36, 36,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36 };
// Input format based on POSIX.1-2008 strtol
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
template<typename IntType>
bool safe_int_internal(const char* start, const char* end, int base,
IntType* value_p) {
// Consume whitespace.
while (start < end && ascii_isspace(start[0])) {
++start;
}
while (start < end && ascii_isspace(end[-1])) {
--end;
}
if (start >= end) {
return false;
}
// Consume sign.
const bool negative = (start[0] == '-');
if (negative || start[0] == '+') {
++start;
if (start >= end) {
return false;
}
}
// Consume base-dependent prefix.
// base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
// base 16: "0x" -> base 16
// Also validate the base.
if (base == 0) {
if (end - start >= 2 && start[0] == '0' &&
(start[1] == 'x' || start[1] == 'X')) {
base = 16;
start += 2;
} else if (end - start >= 1 && start[0] == '0') {
base = 8;
start += 1;
} else {
base = 10;
}
} else if (base == 16) {
if (end - start >= 2 && start[0] == '0' &&
(start[1] == 'x' || start[1] == 'X')) {
start += 2;
}
} else if (base >= 2 && base <= 36) {
// okay
} else {
return false;
}
// Consume digits.
//
// The classic loop:
//
// for each digit
// value = value * base + digit
// value *= sign
//
// The classic loop needs overflow checking. It also fails on the most
// negative integer, -2147483648 in 32-bit two's complement representation.
//
// My improved loop:
//
// if (!negative)
// for each digit
// value = value * base
// value = value + digit
// else
// for each digit
// value = value * base
// value = value - digit
//
// Overflow checking becomes simple.
//
// I present the positive code first for easier reading.
IntType value = 0;
if (!negative) {
const IntType vmax = std::numeric_limits<IntType>::max();
assert(vmax > 0);
assert(vmax >= base);
const IntType vmax_over_base = vmax / base;
// loop over digits
// loop body is interleaved for perf, not readability
for (; start < end; ++start) {
unsigned char c = static_cast<unsigned char>(start[0]);
int digit = kAsciiToInt[c];
if (value > vmax_over_base) return false;
value *= base;
if (digit >= base) return false;
if (value > vmax - digit) return false;
value += digit;
}
} else {
const IntType vmin = std::numeric_limits<IntType>::min();
assert(vmin < 0);
assert(vmin <= 0 - base);
IntType vmin_over_base = vmin / base;
// 2003 c++ standard [expr.mul]
// "... the sign of the remainder is implementation-defined."
// Although (vmin/base)*base + vmin%base is always vmin.
// 2011 c++ standard tightens the spec but we cannot rely on it.
if (vmin % base > 0) {
vmin_over_base += 1;
}
// loop over digits
// loop body is interleaved for perf, not readability
for (; start < end; ++start) {
unsigned char c = static_cast<unsigned char>(start[0]);
int digit = kAsciiToInt[c];
if (value < vmin_over_base) return false;
value *= base;
if (digit >= base) return false;
if (value < vmin + digit) return false;
value -= digit;
}
}
// Store output.
*value_p = value;
return true;
}
} // anonymous namespace
bool safe_strto32_base(const char* startptr, const int buffer_size,
int32* v, int base) {
return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
}
bool safe_strto64_base(const char* startptr, const int buffer_size,
int64* v, int base) {
return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
}
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
}
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
}
bool safe_strto32_base(const char* str, int32* value, int base) {
char* endptr;
errno = 0; // errno only gets set on errors
*value = strto32(str, &endptr, base);
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
return *str != '\0' && *endptr == '\0' && errno == 0;
}
bool safe_strto64_base(const char* str, int64* value, int base) {
char* endptr;
errno = 0; // errno only gets set on errors
*value = strto64(str, &endptr, base);
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
return *str != '\0' && *endptr == '\0' && errno == 0;
}
bool safe_strtou32_base(const char* str, uint32* value, int base) {
// strtoul does not give any errors on negative numbers, so we have to
// search the string for '-' manually.
while (ascii_isspace(*str)) ++str;
if (*str == '-') return false;
char* endptr;
errno = 0; // errno only gets set on errors
*value = strtou32(str, &endptr, base);
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
return *str != '\0' && *endptr == '\0' && errno == 0;
}
bool safe_strtou64_base(const char* str, uint64* value, int base) {
// strtou64 does not give any errors on negative numbers, so we have to
// search the string for '-' manually.
while (ascii_isspace(*str)) ++str;
if (*str == '-') return false;
char* endptr;
errno = 0; // errno only gets set on errors
*value = strtou64(str, &endptr, base);
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
return *str != '\0' && *endptr == '\0' && errno == 0;
}
// ----------------------------------------------------------------------
// u64tostr_base36()
// Converts unsigned number to string representation in base-36.
// --------------------------------------------------------------------
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
CHECK_GT(buf_size, 0);
CHECK(buffer);
static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
buffer[buf_size - 1] = '\0';
size_t result_size = 1;
do {
if (buf_size == result_size) { // Ran out of space.
return 0;
}
int remainder = number % 36;
number /= 36;
buffer[buf_size - result_size - 1] = kAlphabet[remainder];
result_size++;
} while (number);
memmove(buffer, buffer + buf_size - result_size, result_size);
return result_size - 1;
}
// Generate functions that wrap safe_strtoXXX_base.
#define GEN_SAFE_STRTO(name, type) \
bool name##_base(const string& str, type* value, int base) { \
return name##_base(str.c_str(), value, base); \
} \
bool name(const char* str, type* value) { \
return name##_base(str, value, 10); \
} \
bool name(const string& str, type* value) { \
return name##_base(str.c_str(), value, 10); \
}
GEN_SAFE_STRTO(safe_strto32, int32);
GEN_SAFE_STRTO(safe_strtou32, uint32);
GEN_SAFE_STRTO(safe_strto64, int64);
GEN_SAFE_STRTO(safe_strtou64, uint64);
#undef GEN_SAFE_STRTO
bool safe_strtof(const char* str, float* value) {
char* endptr;
#ifdef _MSC_VER // has no strtof()
*value = strtod(str, &endptr);
#else
*value = strtof(str, &endptr);
#endif
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
// Ignore range errors from strtod/strtof.
// The values it returns on underflow and
// overflow are the right fallback in a
// robust setting.
return *str != '\0' && *endptr == '\0';
}
bool safe_strtod(const char* str, double* value) {
char* endptr;
*value = strtod(str, &endptr);
if (endptr != str) {
while (ascii_isspace(*endptr)) ++endptr;
}
// Ignore range errors from strtod. The values it
// returns on underflow and overflow are the right
// fallback in a robust setting.
return *str != '\0' && *endptr == '\0';
}
bool safe_strtof(const string& str, float* value) {
return safe_strtof(str.c_str(), value);
}
bool safe_strtod(const string& str, double* value) {
return safe_strtod(str.c_str(), value);
}
uint64 atoi_kmgt(const char* s) {
char* endptr;
uint64 n = strtou64(s, &endptr, 10);
uint64 scale = 1;
char c = *endptr;
if (c != '\0') {
c = ascii_toupper(c);
switch (c) {
case 'K':
scale = GG_ULONGLONG(1) << 10;
break;
case 'M':
scale = GG_ULONGLONG(1) << 20;
break;
case 'G':
scale = GG_ULONGLONG(1) << 30;
break;
case 'T':
scale = GG_ULONGLONG(1) << 40;
break;
default:
LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
<< " should be one of `K', `M', `G', and `T'.";
}
}
return n * scale;
}
// ----------------------------------------------------------------------
// FastIntToBuffer()
// FastInt64ToBuffer()
// FastHexToBuffer()
// FastHex64ToBuffer()
// FastHex32ToBuffer()
// FastTimeToBuffer()
// These are intended for speed. FastHexToBuffer() assumes the
// integer is non-negative. FastHexToBuffer() puts output in
// hex rather than decimal. FastTimeToBuffer() puts the output
// into RFC822 format. If time is 0, uses the current time.
//
// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
// padded to exactly 16 bytes (plus one byte for '\0')
//
// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
// padded to exactly 8 bytes (plus one byte for '\0')
//
// All functions take the output buffer as an arg. FastInt()
// uses at most 22 bytes, FastTime() uses exactly 30 bytes.
// They all return a pointer to the beginning of the output,
// which may not be the beginning of the input buffer. (Though
// for FastTimeToBuffer(), we guarantee that it is.)
// ----------------------------------------------------------------------
char *FastInt64ToBuffer(int64 i, char* buffer) {
FastInt64ToBufferLeft(i, buffer);
return buffer;
}
char *FastInt32ToBuffer(int32 i, char* buffer) {
FastInt32ToBufferLeft(i, buffer);
return buffer;
}
char *FastHexToBuffer(int i, char* buffer) {
CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
static const char *hexdigits = "0123456789abcdef";
char *p = buffer + 21;
*p-- = '\0';
do {
*p-- = hexdigits[i & 15]; // mod by 16
i >>= 4; // divide by 16
} while (i > 0);
return p + 1;
}
char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
static const char *hexdigits = "0123456789abcdef";
buffer[num_byte] = '\0';
for (int i = num_byte - 1; i >= 0; i--) {
buffer[i] = hexdigits[value & 0xf];
value >>= 4;
}
return buffer;
}
char *FastHex64ToBuffer(uint64 value, char* buffer) {
return InternalFastHexToBuffer(value, buffer, 16);
}
char *FastHex32ToBuffer(uint32 value, char* buffer) {
return InternalFastHexToBuffer(value, buffer, 8);
}
// TODO(user): revisit the two_ASCII_digits optimization.
//
// Several converters use this table to reduce
// division and modulo operations.
extern const char two_ASCII_digits[100][2]; // from strutil.cc
// ----------------------------------------------------------------------
// FastInt32ToBufferLeft()
// FastUInt32ToBufferLeft()
// FastInt64ToBufferLeft()
// FastUInt64ToBufferLeft()
//
// Like the Fast*ToBuffer() functions above, these are intended for speed.
// Unlike the Fast*ToBuffer() functions, however, these functions write
// their output to the beginning of the buffer (hence the name, as the
// output is left-aligned). The caller is responsible for ensuring that
// the buffer has enough space to hold the output.
//
// Returns a pointer to the end of the string (i.e. the null character
// terminating the string).
// ----------------------------------------------------------------------
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
uint digits;
const char *ASCII_digits = nullptr;
// The idea of this implementation is to trim the number of divides to as few
// as possible by using multiplication and subtraction rather than mod (%),
// and by outputting two digits at a time rather than one.
// The huge-number case is first, in the hopes that the compiler will output
// that case in one branch-free block of code, and only output conditional
// branches into it from below.
if (u >= 1000000000) { // >= 1,000,000,000
digits = u / 100000000; // 100,000,000
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
sublt100_000_000:
u -= digits * 100000000; // 100,000,000
lt100_000_000:
digits = u / 1000000; // 1,000,000
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
sublt1_000_000:
u -= digits * 1000000; // 1,000,000
lt1_000_000:
digits = u / 10000; // 10,000
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
sublt10_000:
u -= digits * 10000; // 10,000
lt10_000:
digits = u / 100;
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
sublt100:
u -= digits * 100;
lt100:
digits = u;
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
done:
*buffer = 0;
return buffer;
}
if (u < 100) {
digits = u;
if (u >= 10) goto lt100;
*buffer++ = '0' + digits;
goto done;
}
if (u < 10000) { // 10,000
if (u >= 1000) goto lt10_000;
digits = u / 100;
*buffer++ = '0' + digits;
goto sublt100;
}
if (u < 1000000) { // 1,000,000
if (u >= 100000) goto lt1_000_000;
digits = u / 10000; // 10,000
*buffer++ = '0' + digits;
goto sublt10_000;
}
if (u < 100000000) { // 100,000,000
if (u >= 10000000) goto lt100_000_000;
digits = u / 1000000; // 1,000,000
*buffer++ = '0' + digits;
goto sublt1_000_000;
}
// we already know that u < 1,000,000,000
digits = u / 100000000; // 100,000,000
*buffer++ = '0' + digits;
goto sublt100_000_000;
}
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
uint32 u = i;
if (i < 0) {
*buffer++ = '-';
// We need to do the negation in modular (i.e., "unsigned")
// arithmetic; MSVC++ apprently warns for plain "-u", so
// we write the equivalent expression "0 - u" instead.
u = 0 - u;
}
return FastUInt32ToBufferLeft(u, buffer);
}
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
uint digits;
const char *ASCII_digits = nullptr;
uint32 u = static_cast<uint32>(u64);
if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
uint64 top_11_digits = u64 / 1000000000;
buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
u = u64 - (top_11_digits * 1000000000);
digits = u / 10000000; // 10,000,000
DCHECK_LT(digits, 100);
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
u -= digits * 10000000; // 10,000,000
digits = u / 100000; // 100,000
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
u -= digits * 100000; // 100,000
digits = u / 1000; // 1,000
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
u -= digits * 1000; // 1,000
digits = u / 10;
ASCII_digits = two_ASCII_digits[digits];
buffer[0] = ASCII_digits[0];
buffer[1] = ASCII_digits[1];
buffer += 2;
u -= digits * 10;
digits = u;
*buffer++ = '0' + digits;
*buffer = 0;
return buffer;
}
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
uint64 u = i;
if (i < 0) {
*buffer++ = '-';
u = 0 - u;
}
return FastUInt64ToBufferLeft(u, buffer);
}
int HexDigitsPrefix(const char* buf, int num_digits) {
for (int i = 0; i < num_digits; i++)
if (!ascii_isxdigit(buf[i]))
return 0; // This also detects end of string as '\0' is not xdigit.
return 1;
}
// ----------------------------------------------------------------------
// AutoDigitStrCmp
// AutoDigitLessThan
// StrictAutoDigitLessThan
// autodigit_less
// autodigit_greater
// strict_autodigit_less
// strict_autodigit_greater
// These are like less<string> and greater<string>, except when a
// run of digits is encountered at corresponding points in the two
// arguments. Such digit strings are compared numerically instead
// of lexicographically. Therefore if you sort by
// "autodigit_less", some machine names might get sorted as:
// exaf1
// exaf2
// exaf10
// When using "strict" comparison (AutoDigitStrCmp with the strict flag
// set to true, or the strict version of the other functions),
// strings that represent equal numbers will not be considered equal if
// the string representations are not identical. That is, "01" < "1" in
// strict mode, but "01" == "1" otherwise.
// ----------------------------------------------------------------------
int AutoDigitStrCmp(const char* a, int alen,
const char* b, int blen,
bool strict) {
int aindex = 0;
int bindex = 0;
while ((aindex < alen) && (bindex < blen)) {
if (isdigit(a[aindex]) && isdigit(b[bindex])) {
// Compare runs of digits. Instead of extracting numbers, we
// just skip leading zeroes, and then get the run-lengths. This
// allows us to handle arbitrary precision numbers. We remember
// how many zeroes we found so that we can differentiate between
// "1" and "01" in strict mode.
// Skip leading zeroes, but remember how many we found
int azeroes = aindex;
int bzeroes = bindex;
while ((aindex < alen) && (a[aindex] == '0')) aindex++;
while ((bindex < blen) && (b[bindex] == '0')) bindex++;
azeroes = aindex - azeroes;
bzeroes = bindex - bzeroes;
// Count digit lengths
int astart = aindex;
int bstart = bindex;
while ((aindex < alen) && isdigit(a[aindex])) aindex++;
while ((bindex < blen) && isdigit(b[bindex])) bindex++;
if (aindex - astart < bindex - bstart) {
// a has shorter run of digits: so smaller
return -1;
} else if (aindex - astart > bindex - bstart) {
// a has longer run of digits: so larger
return 1;
} else {
// Same lengths, so compare digit by digit
for (int i = 0; i < aindex-astart; i++) {
if (a[astart+i] < b[bstart+i]) {
return -1;
} else if (a[astart+i] > b[bstart+i]) {
return 1;
}
}
// Equal: did one have more leading zeroes?
if (strict && azeroes != bzeroes) {
if (azeroes > bzeroes) {
// a has more leading zeroes: a < b
return -1;
} else {
// b has more leading zeroes: a > b
return 1;
}
}
// Equal: so continue scanning
}
} else if (a[aindex] < b[bindex]) {
return -1;
} else if (a[aindex] > b[bindex]) {
return 1;
} else {
aindex++;
bindex++;
}
}
if (aindex < alen) {
// b is prefix of a
return 1;
} else if (bindex < blen) {
// a is prefix of b
return -1;
} else {
// a is equal to b
return 0;
}
}
bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
}
bool StrictAutoDigitLessThan(const char* a, int alen,
const char* b, int blen) {
return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
}
// ----------------------------------------------------------------------
// SimpleDtoa()
// SimpleFtoa()
// DoubleToBuffer()
// FloatToBuffer()
// We want to print the value without losing precision, but we also do
// not want to print more digits than necessary. This turns out to be
// trickier than it sounds. Numbers like 0.2 cannot be represented
// exactly in binary. If we print 0.2 with a very large precision,
// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
// On the other hand, if we set the precision too low, we lose
// significant digits when printing numbers that actually need them.
// It turns out there is no precision value that does the right thing
// for all numbers.
//
// Our strategy is to first try printing with a precision that is never
// over-precise, then parse the result with strtod() to see if it
// matches. If not, we print again with a precision that will always
// give a precise result, but may use more digits than necessary.
//
// An arguably better strategy would be to use the algorithm described
// in "How to Print Floating-Point Numbers Accurately" by Steele &
// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
// however, that the following implementation is about as fast as
// DMG's code. Furthermore, DMG's code locks mutexes, which means it
// will not scale well on multi-core machines. DMG's code is slightly
// more accurate (in that it will never use more digits than
// necessary), but this is probably irrelevant for most users.
//
// Rob Pike and Ken Thompson also have an implementation of dtoa() in
// third_party/fmt/fltfmt.cc. Their implementation is similar to this
// one in that it makes guesses and then uses strtod() to check them.
// Their implementation is faster because they use their own code to
// generate the digits in the first place rather than use snprintf(),
// thus avoiding format string parsing overhead. However, this makes
// it considerably more complicated than the following implementation,
// and it is embedded in a larger library. If speed turns out to be
// an issue, we could re-implement this in terms of their
// implementation.
// ----------------------------------------------------------------------
string SimpleDtoa(double value) {
char buffer[kDoubleToBufferSize];
return DoubleToBuffer(value, buffer);
}
string SimpleFtoa(float value) {
char buffer[kFloatToBufferSize];
return FloatToBuffer(value, buffer);
}
char* DoubleToBuffer(double value, char* buffer) {
// DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
// platforms these days. Just in case some system exists where DBL_DIG
// is significantly larger -- and risks overflowing our buffer -- we have
// this assert.
COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
int snprintf_result =
snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
// The snprintf should never overflow because the buffer is significantly
// larger than the precision we asked for.
DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
if (strtod(buffer, nullptr) != value) {
snprintf_result =
snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
// Should never overflow; see above.
DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
}
return buffer;
}
char* FloatToBuffer(float value, char* buffer) {
// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
// platforms these days. Just in case some system exists where FLT_DIG
// is significantly larger -- and risks overflowing our buffer -- we have
// this assert.
COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
int snprintf_result =
snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
// The snprintf should never overflow because the buffer is significantly
// larger than the precision we asked for.
DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
float parsed_value;
if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
snprintf_result =
snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
// Should never overflow; see above.
DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
}
return buffer;
}
// ----------------------------------------------------------------------
// SimpleItoaWithCommas()
// Description: converts an integer to a string.
// Puts commas every 3 spaces.
// Faster than printf("%d")?
//
// Return value: string
// ----------------------------------------------------------------------
string SimpleItoaWithCommas(int32 i) {
// 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
// Longest is -2,147,483,648.
char local[14];
char *p = local + sizeof(local);
// Need to use uint32 instead of int32 to correctly handle
// -2,147,483,648.
uint32 n = i;
if (i < 0)
n = 0 - n; // negate the unsigned value to avoid overflow
*--p = '0' + n % 10; // this case deals with the number "0"
n /= 10;
while (n) {
*--p = '0' + n % 10;
n /= 10;
if (n == 0) break;
*--p = '0' + n % 10;
n /= 10;
if (n == 0) break;
*--p = ',';
*--p = '0' + n % 10;
n /= 10;
// For this unrolling, we check if n == 0 in the main while loop
}
if (i < 0)
*--p = '-';
return string(p, local + sizeof(local));
}
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
// compile.
string SimpleItoaWithCommas(uint32 i) {
// 10 digits and 3 commas are good for 32-bit or smaller ints.
// Longest is 4,294,967,295.
char local[13];
char *p = local + sizeof(local);
*--p = '0' + i % 10; // this case deals with the number "0"
i /= 10;
while (i) {
*--p = '0' + i % 10;
i /= 10;
if (i == 0) break;
*--p = '0' + i % 10;
i /= 10;
if (i == 0) break;
*--p = ',';
*--p = '0' + i % 10;
i /= 10;
// For this unrolling, we check if i == 0 in the main while loop
}
return string(p, local + sizeof(local));
}
string SimpleItoaWithCommas(int64 i) {
// 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
char local[26];
char *p = local + sizeof(local);
// Need to use uint64 instead of int64 to correctly handle
// -9,223,372,036,854,775,808.
uint64 n = i;
if (i < 0)
n = 0 - n;
*--p = '0' + n % 10; // this case deals with the number "0"
n /= 10;
while (n) {
*--p = '0' + n % 10;
n /= 10;
if (n == 0) break;
*--p = '0' + n % 10;
n /= 10;
if (n == 0) break;
*--p = ',';
*--p = '0' + n % 10;
n /= 10;
// For this unrolling, we check if n == 0 in the main while loop
}
if (i < 0)
*--p = '-';
return string(p, local + sizeof(local));
}
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
// compile.
string SimpleItoaWithCommas(uint64 i) {
// 20 digits and 6 commas are good for 64-bit or smaller ints.
// Longest is 18,446,744,073,709,551,615.
char local[26];
char *p = local + sizeof(local);
*--p = '0' + i % 10; // this case deals with the number "0"
i /= 10;
while (i) {
*--p = '0' + i % 10;
i /= 10;
if (i == 0) break;
*--p = '0' + i % 10;
i /= 10;
if (i == 0) break;
*--p = ',';
*--p = '0' + i % 10;
i /= 10;
// For this unrolling, we check if i == 0 in the main while loop
}
return string(p, local + sizeof(local));
}
// ----------------------------------------------------------------------
// ItoaKMGT()
// Description: converts an integer to a string
// Truncates values to a readable unit: K, G, M or T
// Opposite of atoi_kmgt()
// e.g. 100 -> "100" 1500 -> "1500" 4000 -> "3K" 57185920 -> "45M"
//
// Return value: string
// ----------------------------------------------------------------------
string ItoaKMGT(int64 i) {
const char *sign = "", *suffix = "";
if (i < 0) {
// We lose some accuracy if the caller passes LONG_LONG_MIN, but
// that's OK as this function is only for human readability
if (i == numeric_limits<int64>::min()) i++;
sign = "-";
i = -i;
}
int64 val;
if ((val = (i >> 40)) > 1) {
suffix = "T";
} else if ((val = (i >> 30)) > 1) {
suffix = "G";
} else if ((val = (i >> 20)) > 1) {
suffix = "M";
} else if ((val = (i >> 10)) > 1) {
suffix = "K";
} else {
val = i;
}
return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
}
// DEPRECATED(wadetregaskis).
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
string FloatToString(float f, const char* format) {
return StringPrintf(format, f);
}
string IntToString(int i, const char* format) {
return StringPrintf(format, i);
}
string Int64ToString(int64 i64, const char* format) {
return StringPrintf(format, i64);
}
string UInt64ToString(uint64 ui64, const char* format) {
return StringPrintf(format, ui64);
}